ARKitでハンドトラッキングをしてみた
はじめに
以前mediapipeのiosラッパーライブラリを使ってハンドトラッキングを試してみましたが、今回はSceneKitのオブジェクトをAR表示させてでハンドトラッキングさせてみました。
できたもの
ARKitのレンダリング
ARKitでどうやって現在カメラに表示されている画像を取得するのか悩みましたが、sceneView.session.currentFrame?.capturedImageに保持しているのでそこから引っ張ってくれば毎フレームの画像を取得することができます。
あとは色色座標系を変換してHandTrackerのメソッドに渡してあげればOKです。
func renderer(_ renderer: SCNSceneRenderer, updateAtTime time: TimeInterval) {
// ARKit 設定時にカメラからの画像が空で渡されるのでその場合は処理しない
guard let cuptureImage = sceneView.session.currentFrame?.capturedImage else {
return
}
let rgbaPixcellBuffer = try! cuptureImage.toBGRA()
self.tracker.processVideoFrame(rgbaPixcellBuffer)
}
色色座標系の変換
mediapipeに手の座標を計算させるためには色色座標系を変換する必要があります。
AVFoundationでBufferを取得した場合はRGB形式で渡ってくるので問題ありませんが、ARKitを使って表示する場合、ARSCNViewからBufferを取得することになるので、YCbCr形式になっているのでRGB形式に変換してあげる必要があります。
変換についてはYUVのCVPixelBufferをBGRAに変換するの記事を100%参考にしました。
import Accelerate
extension CVPixelBuffer {
public func toBGRA() throws -> CVPixelBuffer? {
let pixelBuffer = self
/// Check format
let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
guard pixelFormat == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange else { return pixelBuffer }
/// Split plane
let yImage = pixelBuffer.with({ VImage(pixelBuffer: $0, plane: 0) })!
let cbcrImage = pixelBuffer.with({ VImage(pixelBuffer: $0, plane: 1) })!
/// Create output pixelBuffer
let outPixelBuffer = CVPixelBuffer.make(width: yImage.width, height: yImage.height, format: kCVPixelFormatType_32BGRA)!
/// Convert yuv to argb
var argbImage = outPixelBuffer.with({ VImage(pixelBuffer: $0) })!
try argbImage.draw(yBuffer: yImage.buffer, cbcrBuffer: cbcrImage.buffer)
/// Convert argb to bgra
argbImage.permute(channelMap: [3, 2, 1, 0])
return outPixelBuffer
}
}
struct VImage {
let width: Int
let height: Int
let bytesPerRow: Int
var buffer: vImage_Buffer
init?(pixelBuffer: CVPixelBuffer, plane: Int) {
guard let rawBuffer = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, plane) else { return nil }
self.width = CVPixelBufferGetWidthOfPlane(pixelBuffer, plane)
self.height = CVPixelBufferGetHeightOfPlane(pixelBuffer, plane)
self.bytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, plane)
self.buffer = vImage_Buffer(
data: UnsafeMutableRawPointer(mutating: rawBuffer),
height: vImagePixelCount(height),
width: vImagePixelCount(width),
rowBytes: bytesPerRow
)
}
init?(pixelBuffer: CVPixelBuffer) {
guard let rawBuffer = CVPixelBufferGetBaseAddress(pixelBuffer) else { return nil }
self.width = CVPixelBufferGetWidth(pixelBuffer)
self.height = CVPixelBufferGetHeight(pixelBuffer)
self.bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
self.buffer = vImage_Buffer(
data: UnsafeMutableRawPointer(mutating: rawBuffer),
height: vImagePixelCount(height),
width: vImagePixelCount(width),
rowBytes: bytesPerRow
)
}
mutating func draw(yBuffer: vImage_Buffer, cbcrBuffer: vImage_Buffer) throws {
try buffer.draw(yBuffer: yBuffer, cbcrBuffer: cbcrBuffer)
}
mutating func permute(channelMap: [UInt8]) {
buffer.permute(channelMap: channelMap)
}
}
extension CVPixelBuffer {
func with<T>(_ closure: ((_ pixelBuffer: CVPixelBuffer) -> T)) -> T {
CVPixelBufferLockBaseAddress(self, .readOnly)
let result = closure(self)
CVPixelBufferUnlockBaseAddress(self, .readOnly)
return result
}
static func make(width: Int, height: Int, format: OSType) -> CVPixelBuffer? {
var pixelBuffer: CVPixelBuffer? = nil
CVPixelBufferCreate(kCFAllocatorDefault,
width,
height,
format,
nil,
&pixelBuffer)
return pixelBuffer
}
}
extension vImage_Buffer {
mutating func draw(yBuffer: vImage_Buffer, cbcrBuffer: vImage_Buffer) throws {
var yBuffer = yBuffer
var cbcrBuffer = cbcrBuffer
var conversionMatrix: vImage_YpCbCrToARGB = {
var pixelRange = vImage_YpCbCrPixelRange(Yp_bias: 0, CbCr_bias: 128, YpRangeMax: 255, CbCrRangeMax: 255, YpMax: 255, YpMin: 1, CbCrMax: 255, CbCrMin: 0)
var matrix = vImage_YpCbCrToARGB()
vImageConvert_YpCbCrToARGB_GenerateConversion(kvImage_YpCbCrToARGBMatrix_ITU_R_709_2, &pixelRange, &matrix, kvImage420Yp8_CbCr8, kvImageARGB8888, UInt32(kvImageNoFlags))
return matrix
}()
let error = vImageConvert_420Yp8_CbCr8ToARGB8888(&yBuffer, &cbcrBuffer, &self, &conversionMatrix, nil, 255, UInt32(kvImageNoFlags))
if error != kvImageNoError {
fatalError()
}
}
mutating func permute(channelMap: [UInt8]) {
vImagePermuteChannels_ARGB8888(&self, &self, channelMap, 0)
}
}
座標変換
mediapipe殻渡された座標を正規化する必要があると思うのですが、ここをどう正規化すれば良いのか思いつかなかったので固定値をぶち込んで動かしました。
検証に使っていた端末はiPhone11です。
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate,TrackerDelegate{
func handTracker(_ handTracker: HandTracker!, didOutputLandmarks landmarks: [Landmark]!) {
print(landmarks[0].x, landmarks[0].y, landmarks[0].z)
if let boxNode = sceneView.scene.rootNode.childNode(withName: "box", recursively: true) {
let pos = landmarks[0]
DispatchQueue.main.async {
boxNode.position = SCNVector3(-pos.y + 0.3, -pos.x + 0.7, -0.5 + pos.z)
}
}
}
func handTracker(_ handTracker: HandTracker!, didOutputPixelBuffer pixelBuffer: CVPixelBuffer!) {
}
}
コード一覧
import SceneKit
import ARKit
import Accelerate
class ViewController: UIViewController {
let tracker: HandTracker = HandTracker()!
@IBOutlet var sceneView: ARSCNView! {
didSet {
sceneView.delegate = self
sceneView.showsStatistics = true
let scene = SCNScene()
sceneView.scene = scene
}
}
override func viewDidLoad() {
super.viewDidLoad()
tracker.startGraph()
tracker.delegate = self
let box = SCNBox(width: 0.05, height: 0.05, length: 0.05,chamferRadius: 0)
let material = SCNMaterial()
material.diffuse.contents = UIColor.purple
let boxNode = SCNNode(geometry: box)
boxNode.name = "box"
boxNode.geometry?.materials = [material]
boxNode.position = SCNVector3(0.8184342/sceneView.frame.width, 0.7038554/sceneView.frame.height, -0.5)
sceneView.scene.rootNode.addChildNode(boxNode)
}
override func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
let configuration = ARWorldTrackingConfiguration()
sceneView.session.run(configuration)
}
override func viewWillDisappear(_ animated: Bool) {
super.viewWillDisappear(animated)
sceneView.session.pause()
}
func renderer(_ renderer: SCNSceneRenderer, updateAtTime time: TimeInterval) {
// ARKit 設定時にカメラからの画像が空で渡されるのでその場合は処理しない
guard let cuptureImage = sceneView.session.currentFrame?.capturedImage else {
return
}
let rgbaPixcellBuffer = try! cuptureImage.toBGRA()
self.tracker.processVideoFrame(rgbaPixcellBuffer)
}
}
extension ViewController: ARSCNViewDelegate {}
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate,TrackerDelegate{
func handTracker(_ handTracker: HandTracker!, didOutputLandmarks landmarks: [Landmark]!) {
print(landmarks[0].x, landmarks[0].y, landmarks[0].z)
if let boxNode = sceneView.scene.rootNode.childNode(withName: "box", recursively: true) {
let pos = landmarks[0]
DispatchQueue.main.async {
boxNode.position = SCNVector3(-pos.y + 0.3, -pos.x + 0.7, -0.5 + pos.z)
}
}
}
func handTracker(_ handTracker: HandTracker!, didOutputPixelBuffer pixelBuffer: CVPixelBuffer!) {
}
}
extension CVPixelBuffer {
public func toBGRA() throws -> CVPixelBuffer? {
let pixelBuffer = self
/// Check format
let pixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
guard pixelFormat == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange else { return pixelBuffer }
/// Split plane
let yImage = pixelBuffer.with({ VImage(pixelBuffer: $0, plane: 0) })!
let cbcrImage = pixelBuffer.with({ VImage(pixelBuffer: $0, plane: 1) })!
/// Create output pixelBuffer
let outPixelBuffer = CVPixelBuffer.make(width: yImage.width, height: yImage.height, format: kCVPixelFormatType_32BGRA)!
/// Convert yuv to argb
var argbImage = outPixelBuffer.with({ VImage(pixelBuffer: $0) })!
try argbImage.draw(yBuffer: yImage.buffer, cbcrBuffer: cbcrImage.buffer)
/// Convert argb to bgra
argbImage.permute(channelMap: [3, 2, 1, 0])
return outPixelBuffer
}
}
struct VImage {
let width: Int
let height: Int
let bytesPerRow: Int
var buffer: vImage_Buffer
init?(pixelBuffer: CVPixelBuffer, plane: Int) {
guard let rawBuffer = CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, plane) else { return nil }
self.width = CVPixelBufferGetWidthOfPlane(pixelBuffer, plane)
self.height = CVPixelBufferGetHeightOfPlane(pixelBuffer, plane)
self.bytesPerRow = CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, plane)
self.buffer = vImage_Buffer(
data: UnsafeMutableRawPointer(mutating: rawBuffer),
height: vImagePixelCount(height),
width: vImagePixelCount(width),
rowBytes: bytesPerRow
)
}
init?(pixelBuffer: CVPixelBuffer) {
guard let rawBuffer = CVPixelBufferGetBaseAddress(pixelBuffer) else { return nil }
self.width = CVPixelBufferGetWidth(pixelBuffer)
self.height = CVPixelBufferGetHeight(pixelBuffer)
self.bytesPerRow = CVPixelBufferGetBytesPerRow(pixelBuffer)
self.buffer = vImage_Buffer(
data: UnsafeMutableRawPointer(mutating: rawBuffer),
height: vImagePixelCount(height),
width: vImagePixelCount(width),
rowBytes: bytesPerRow
)
}
mutating func draw(yBuffer: vImage_Buffer, cbcrBuffer: vImage_Buffer) throws {
try buffer.draw(yBuffer: yBuffer, cbcrBuffer: cbcrBuffer)
}
mutating func permute(channelMap: [UInt8]) {
buffer.permute(channelMap: channelMap)
}
}
extension CVPixelBuffer {
func with<T>(_ closure: ((_ pixelBuffer: CVPixelBuffer) -> T)) -> T {
CVPixelBufferLockBaseAddress(self, .readOnly)
let result = closure(self)
CVPixelBufferUnlockBaseAddress(self, .readOnly)
return result
}
static func make(width: Int, height: Int, format: OSType) -> CVPixelBuffer? {
var pixelBuffer: CVPixelBuffer? = nil
CVPixelBufferCreate(kCFAllocatorDefault,
width,
height,
format,
nil,
&pixelBuffer)
return pixelBuffer
}
}
extension vImage_Buffer {
mutating func draw(yBuffer: vImage_Buffer, cbcrBuffer: vImage_Buffer) throws {
var yBuffer = yBuffer
var cbcrBuffer = cbcrBuffer
var conversionMatrix: vImage_YpCbCrToARGB = {
var pixelRange = vImage_YpCbCrPixelRange(Yp_bias: 0, CbCr_bias: 128, YpRangeMax: 255, CbCrRangeMax: 255, YpMax: 255, YpMin: 1, CbCrMax: 255, CbCrMin: 0)
var matrix = vImage_YpCbCrToARGB()
vImageConvert_YpCbCrToARGB_GenerateConversion(kvImage_YpCbCrToARGBMatrix_ITU_R_709_2, &pixelRange, &matrix, kvImage420Yp8_CbCr8, kvImageARGB8888, UInt32(kvImageNoFlags))
return matrix
}()
let error = vImageConvert_420Yp8_CbCr8ToARGB8888(&yBuffer, &cbcrBuffer, &self, &conversionMatrix, nil, 255, UInt32(kvImageNoFlags))
if error != kvImageNoError {
fatalError()
}
}
mutating func permute(channelMap: [UInt8]) {
vImagePermuteChannels_ARGB8888(&self, &self, channelMap, 0)
}
}