leoxing1996
add midas manually
b18cfd3
// Copyright 2019 The TensorFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
import Accelerate
import CoreImage
import Foundation
import TensorFlowLite
import UIKit
/// This class handles all data preprocessing and makes calls to run inference on a given frame
/// by invoking the `Interpreter`. It then formats the inferences obtained.
class ModelDataHandler {
// MARK: - Private Properties
/// TensorFlow Lite `Interpreter` object for performing inference on a given model.
private var interpreter: Interpreter
/// TensorFlow lite `Tensor` of model input and output.
private var inputTensor: Tensor
//private var heatsTensor: Tensor
//private var offsetsTensor: Tensor
private var outputTensor: Tensor
// MARK: - Initialization
/// A failable initializer for `ModelDataHandler`. A new instance is created if the model is
/// successfully loaded from the app's main bundle. Default `threadCount` is 2.
init(
threadCount: Int = Constants.defaultThreadCount,
delegate: Delegates = Constants.defaultDelegate
) throws {
// Construct the path to the model file.
guard
let modelPath = Bundle.main.path(
forResource: Model.file.name,
ofType: Model.file.extension
)
else {
fatalError("Failed to load the model file with name: \(Model.file.name).")
}
// Specify the options for the `Interpreter`.
var options = Interpreter.Options()
options.threadCount = threadCount
// Specify the delegates for the `Interpreter`.
var delegates: [Delegate]?
switch delegate {
case .Metal:
delegates = [MetalDelegate()]
case .CoreML:
if let coreMLDelegate = CoreMLDelegate() {
delegates = [coreMLDelegate]
} else {
delegates = nil
}
default:
delegates = nil
}
// Create the `Interpreter`.
interpreter = try Interpreter(modelPath: modelPath, options: options, delegates: delegates)
// Initialize input and output `Tensor`s.
// Allocate memory for the model's input `Tensor`s.
try interpreter.allocateTensors()
// Get allocated input and output `Tensor`s.
inputTensor = try interpreter.input(at: 0)
outputTensor = try interpreter.output(at: 0)
//heatsTensor = try interpreter.output(at: 0)
//offsetsTensor = try interpreter.output(at: 1)
/*
// Check if input and output `Tensor`s are in the expected formats.
guard (inputTensor.dataType == .uInt8) == Model.isQuantized else {
fatalError("Unexpected Model: quantization is \(!Model.isQuantized)")
}
guard inputTensor.shape.dimensions[0] == Model.input.batchSize,
inputTensor.shape.dimensions[1] == Model.input.height,
inputTensor.shape.dimensions[2] == Model.input.width,
inputTensor.shape.dimensions[3] == Model.input.channelSize
else {
fatalError("Unexpected Model: input shape")
}
guard heatsTensor.shape.dimensions[0] == Model.output.batchSize,
heatsTensor.shape.dimensions[1] == Model.output.height,
heatsTensor.shape.dimensions[2] == Model.output.width,
heatsTensor.shape.dimensions[3] == Model.output.keypointSize
else {
fatalError("Unexpected Model: heat tensor")
}
guard offsetsTensor.shape.dimensions[0] == Model.output.batchSize,
offsetsTensor.shape.dimensions[1] == Model.output.height,
offsetsTensor.shape.dimensions[2] == Model.output.width,
offsetsTensor.shape.dimensions[3] == Model.output.offsetSize
else {
fatalError("Unexpected Model: offset tensor")
}
*/
}
/// Runs Midas model with given image with given source area to destination area.
///
/// - Parameters:
/// - on: Input image to run the model.
/// - from: Range of input image to run the model.
/// - to: Size of view to render the result.
/// - Returns: Result of the inference and the times consumed in every steps.
func runMidas(on pixelbuffer: CVPixelBuffer, from source: CGRect, to dest: CGSize)
//-> (Result, Times)?
//-> (FlatArray<Float32>, Times)?
-> ([Float], Int, Int, Times)?
{
// Start times of each process.
let preprocessingStartTime: Date
let inferenceStartTime: Date
let postprocessingStartTime: Date
// Processing times in miliseconds.
let preprocessingTime: TimeInterval
let inferenceTime: TimeInterval
let postprocessingTime: TimeInterval
preprocessingStartTime = Date()
guard let data = preprocess(of: pixelbuffer, from: source) else {
os_log("Preprocessing failed", type: .error)
return nil
}
preprocessingTime = Date().timeIntervalSince(preprocessingStartTime) * 1000
inferenceStartTime = Date()
inference(from: data)
inferenceTime = Date().timeIntervalSince(inferenceStartTime) * 1000
postprocessingStartTime = Date()
//guard let result = postprocess(to: dest) else {
// os_log("Postprocessing failed", type: .error)
// return nil
//}
postprocessingTime = Date().timeIntervalSince(postprocessingStartTime) * 1000
let results: [Float]
switch outputTensor.dataType {
case .uInt8:
guard let quantization = outputTensor.quantizationParameters else {
print("No results returned because the quantization values for the output tensor are nil.")
return nil
}
let quantizedResults = [UInt8](outputTensor.data)
results = quantizedResults.map {
quantization.scale * Float(Int($0) - quantization.zeroPoint)
}
case .float32:
results = [Float32](unsafeData: outputTensor.data) ?? []
default:
print("Output tensor data type \(outputTensor.dataType) is unsupported for this example app.")
return nil
}
let times = Times(
preprocessing: preprocessingTime,
inference: inferenceTime,
postprocessing: postprocessingTime)
return (results, Model.input.width, Model.input.height, times)
}
// MARK: - Private functions to run model
/// Preprocesses given rectangle image to be `Data` of disired size by croping and resizing it.
///
/// - Parameters:
/// - of: Input image to crop and resize.
/// - from: Target area to be cropped and resized.
/// - Returns: The cropped and resized image. `nil` if it can not be processed.
private func preprocess(of pixelBuffer: CVPixelBuffer, from targetSquare: CGRect) -> Data? {
let sourcePixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer)
assert(sourcePixelFormat == kCVPixelFormatType_32BGRA)
// Resize `targetSquare` of input image to `modelSize`.
let modelSize = CGSize(width: Model.input.width, height: Model.input.height)
guard let thumbnail = pixelBuffer.resize(from: targetSquare, to: modelSize)
else {
return nil
}
// Remove the alpha component from the image buffer to get the initialized `Data`.
let byteCount =
Model.input.batchSize
* Model.input.height * Model.input.width
* Model.input.channelSize
guard
let inputData = thumbnail.rgbData(
isModelQuantized: Model.isQuantized
)
else {
os_log("Failed to convert the image buffer to RGB data.", type: .error)
return nil
}
return inputData
}
/*
/// Postprocesses output `Tensor`s to `Result` with size of view to render the result.
///
/// - Parameters:
/// - to: Size of view to be displaied.
/// - Returns: Postprocessed `Result`. `nil` if it can not be processed.
private func postprocess(to viewSize: CGSize) -> Result? {
// MARK: Formats output tensors
// Convert `Tensor` to `FlatArray`. As Midas is not quantized, convert them to Float type
// `FlatArray`.
let heats = FlatArray<Float32>(tensor: heatsTensor)
let offsets = FlatArray<Float32>(tensor: offsetsTensor)
// MARK: Find position of each key point
// Finds the (row, col) locations of where the keypoints are most likely to be. The highest
// `heats[0, row, col, keypoint]` value, the more likely `keypoint` being located in (`row`,
// `col`).
let keypointPositions = (0..<Model.output.keypointSize).map { keypoint -> (Int, Int) in
var maxValue = heats[0, 0, 0, keypoint]
var maxRow = 0
var maxCol = 0
for row in 0..<Model.output.height {
for col in 0..<Model.output.width {
if heats[0, row, col, keypoint] > maxValue {
maxValue = heats[0, row, col, keypoint]
maxRow = row
maxCol = col
}
}
}
return (maxRow, maxCol)
}
// MARK: Calculates total confidence score
// Calculates total confidence score of each key position.
let totalScoreSum = keypointPositions.enumerated().reduce(0.0) { accumulator, elem -> Float32 in
accumulator + sigmoid(heats[0, elem.element.0, elem.element.1, elem.offset])
}
let totalScore = totalScoreSum / Float32(Model.output.keypointSize)
// MARK: Calculate key point position on model input
// Calculates `KeyPoint` coordination model input image with `offsets` adjustment.
let coords = keypointPositions.enumerated().map { index, elem -> (y: Float32, x: Float32) in
let (y, x) = elem
let yCoord =
Float32(y) / Float32(Model.output.height - 1) * Float32(Model.input.height)
+ offsets[0, y, x, index]
let xCoord =
Float32(x) / Float32(Model.output.width - 1) * Float32(Model.input.width)
+ offsets[0, y, x, index + Model.output.keypointSize]
return (y: yCoord, x: xCoord)
}
// MARK: Transform key point position and make lines
// Make `Result` from `keypointPosition'. Each point is adjusted to `ViewSize` to be drawn.
var result = Result(dots: [], lines: [], score: totalScore)
var bodyPartToDotMap = [BodyPart: CGPoint]()
for (index, part) in BodyPart.allCases.enumerated() {
let position = CGPoint(
x: CGFloat(coords[index].x) * viewSize.width / CGFloat(Model.input.width),
y: CGFloat(coords[index].y) * viewSize.height / CGFloat(Model.input.height)
)
bodyPartToDotMap[part] = position
result.dots.append(position)
}
do {
try result.lines = BodyPart.lines.map { map throws -> Line in
guard let from = bodyPartToDotMap[map.from] else {
throw PostprocessError.missingBodyPart(of: map.from)
}
guard let to = bodyPartToDotMap[map.to] else {
throw PostprocessError.missingBodyPart(of: map.to)
}
return Line(from: from, to: to)
}
} catch PostprocessError.missingBodyPart(let missingPart) {
os_log("Postprocessing error: %s is missing.", type: .error, missingPart.rawValue)
return nil
} catch {
os_log("Postprocessing error: %s", type: .error, error.localizedDescription)
return nil
}
return result
}
*/
/// Run inference with given `Data`
///
/// Parameter `from`: `Data` of input image to run model.
private func inference(from data: Data) {
// Copy the initialized `Data` to the input `Tensor`.
do {
try interpreter.copy(data, toInputAt: 0)
// Run inference by invoking the `Interpreter`.
try interpreter.invoke()
// Get the output `Tensor` to process the inference results.
outputTensor = try interpreter.output(at: 0)
//heatsTensor = try interpreter.output(at: 0)
//offsetsTensor = try interpreter.output(at: 1)
} catch let error {
os_log(
"Failed to invoke the interpreter with error: %s", type: .error,
error.localizedDescription)
return
}
}
/// Returns value within [0,1].
private func sigmoid(_ x: Float32) -> Float32 {
return (1.0 / (1.0 + exp(-x)))
}
}
// MARK: - Data types for inference result
struct KeyPoint {
var bodyPart: BodyPart = BodyPart.NOSE
var position: CGPoint = CGPoint()
var score: Float = 0.0
}
struct Line {
let from: CGPoint
let to: CGPoint
}
struct Times {
var preprocessing: Double
var inference: Double
var postprocessing: Double
}
struct Result {
var dots: [CGPoint]
var lines: [Line]
var score: Float
}
enum BodyPart: String, CaseIterable {
case NOSE = "nose"
case LEFT_EYE = "left eye"
case RIGHT_EYE = "right eye"
case LEFT_EAR = "left ear"
case RIGHT_EAR = "right ear"
case LEFT_SHOULDER = "left shoulder"
case RIGHT_SHOULDER = "right shoulder"
case LEFT_ELBOW = "left elbow"
case RIGHT_ELBOW = "right elbow"
case LEFT_WRIST = "left wrist"
case RIGHT_WRIST = "right wrist"
case LEFT_HIP = "left hip"
case RIGHT_HIP = "right hip"
case LEFT_KNEE = "left knee"
case RIGHT_KNEE = "right knee"
case LEFT_ANKLE = "left ankle"
case RIGHT_ANKLE = "right ankle"
/// List of lines connecting each part.
static let lines = [
(from: BodyPart.LEFT_WRIST, to: BodyPart.LEFT_ELBOW),
(from: BodyPart.LEFT_ELBOW, to: BodyPart.LEFT_SHOULDER),
(from: BodyPart.LEFT_SHOULDER, to: BodyPart.RIGHT_SHOULDER),
(from: BodyPart.RIGHT_SHOULDER, to: BodyPart.RIGHT_ELBOW),
(from: BodyPart.RIGHT_ELBOW, to: BodyPart.RIGHT_WRIST),
(from: BodyPart.LEFT_SHOULDER, to: BodyPart.LEFT_HIP),
(from: BodyPart.LEFT_HIP, to: BodyPart.RIGHT_HIP),
(from: BodyPart.RIGHT_HIP, to: BodyPart.RIGHT_SHOULDER),
(from: BodyPart.LEFT_HIP, to: BodyPart.LEFT_KNEE),
(from: BodyPart.LEFT_KNEE, to: BodyPart.LEFT_ANKLE),
(from: BodyPart.RIGHT_HIP, to: BodyPart.RIGHT_KNEE),
(from: BodyPart.RIGHT_KNEE, to: BodyPart.RIGHT_ANKLE),
]
}
// MARK: - Delegates Enum
enum Delegates: Int, CaseIterable {
case CPU
case Metal
case CoreML
var description: String {
switch self {
case .CPU:
return "CPU"
case .Metal:
return "GPU"
case .CoreML:
return "NPU"
}
}
}
// MARK: - Custom Errors
enum PostprocessError: Error {
case missingBodyPart(of: BodyPart)
}
// MARK: - Information about the model file.
typealias FileInfo = (name: String, extension: String)
enum Model {
static let file: FileInfo = (
name: "model_opt", extension: "tflite"
)
static let input = (batchSize: 1, height: 256, width: 256, channelSize: 3)
static let output = (batchSize: 1, height: 256, width: 256, channelSize: 1)
static let isQuantized = false
}
extension Array {
/// Creates a new array from the bytes of the given unsafe data.
///
/// - Warning: The array's `Element` type must be trivial in that it can be copied bit for bit
/// with no indirection or reference-counting operations; otherwise, copying the raw bytes in
/// the `unsafeData`'s buffer to a new array returns an unsafe copy.
/// - Note: Returns `nil` if `unsafeData.count` is not a multiple of
/// `MemoryLayout<Element>.stride`.
/// - Parameter unsafeData: The data containing the bytes to turn into an array.
init?(unsafeData: Data) {
guard unsafeData.count % MemoryLayout<Element>.stride == 0 else { return nil }
#if swift(>=5.0)
self = unsafeData.withUnsafeBytes { .init($0.bindMemory(to: Element.self)) }
#else
self = unsafeData.withUnsafeBytes {
.init(UnsafeBufferPointer<Element>(
start: $0,
count: unsafeData.count / MemoryLayout<Element>.stride
))
}
#endif // swift(>=5.0)
}
}