sentis-hand-landmark / RunHandLandmark.cs
using UnityEngine;
using Unity.Sentis;
using UnityEngine.Video;
using UnityEngine.UI;
using System.IO;
using Lays = Unity.Sentis.Layers;
using System.Collections.Generic;
* Hand Landmarks Inference
* ========================
* Basic inference script for blaze hand landmarks
* Put this script on the Main Camera
* Drag the sentis file onto the modelAsset field
* Create a RawImage of in the scene
* Put a link to that image in previewUI
* Put a video in Assets/StreamingAssets folder and put the name of it int videoName
* Or put a test image in inputImage
* Set inputType to appropriate input
public class RunHandLandmark : MonoBehaviour
//Draw the *.sentis or *.onnx model asset here:
public ModelAsset asset;
string modelName = "hand_landmark.sentis";
//Drag a link to a raw image here:
public RawImage previewUI = null;
// Put your bounding box sprite image here
public Sprite boxSprite;
// 6 optional sprite images (left eye, right eye, nose, mouth, left ear, right ear)
public Sprite[] markerTextures;
public string videoName = "chatting.mp4";
public Texture2D inputImage;
public InputType inputType = InputType.Video;
//Resolution of preview image or video
Vector2Int resolution = new Vector2Int(640, 640);
WebCamTexture webcam;
VideoPlayer video;
const BackendType backend = BackendType.GPUCompute;
RenderTexture targetTexture;
public enum InputType { Image, Video, Webcam };
IWorker worker;
//Holds image size
const int size = 224;
Model model;
//webcam device name:
const string deviceName = "";
bool closing = false;
public struct BoundingBox
public float centerX;
public float centerY;
public float width;
public float height;
List<GameObject> boxPool = new();
void Start()
//(Note: if using a webcam on mobile get permissions here first)
targetTexture = new RenderTexture(resolution.x, resolution.y, 0);
previewUI.texture = targetTexture;
void SetupModel()
model = ModelLoader.Load(asset);
//model = ModelLoader.Load(Path.Join(Application.streamingAssetsPath ,modelName));
public void SetupEngine()
worker = WorkerFactory.CreateWorker(backend, model);
void SetupInput()
switch (inputType)
case InputType.Webcam:
webcam = new WebCamTexture(deviceName, resolution.x, resolution.y);
webcam.requestedFPS = 30;
case InputType.Video:
video = gameObject.AddComponent<VideoPlayer>();//new VideoPlayer();
video.renderMode = VideoRenderMode.APIOnly;
video.source = VideoSource.Url;
video.url = Application.streamingAssetsPath + "/"+videoName;
video.isLooping = true;
Graphics.Blit(inputImage, targetTexture);
void Update()
if (inputType == InputType.Webcam)
// Format video input
if (!webcam.didUpdateThisFrame) return;
var aspect1 = (float)webcam.width / webcam.height;
var aspect2 = (float)resolution.x / resolution.y;
var gap = aspect2 / aspect1;
var vflip = webcam.videoVerticallyMirrored;
var scale = new Vector2(gap, vflip ? -1 : 1);
var offset = new Vector2((1 - gap) / 2, vflip ? 1 : 0);
Graphics.Blit(webcam, targetTexture, scale, offset);
if (inputType == InputType.Video)
var aspect1 = (float)video.width / video.height;
var aspect2 = (float)resolution.x / resolution.y;
var gap = aspect2 / aspect1;
var vflip = false;
var scale = new Vector2(gap, vflip ? -1 : 1);
var offset = new Vector2((1 - gap) / 2, vflip ? 1 : 0);
Graphics.Blit(video.texture, targetTexture, scale, offset);
if (inputType == InputType.Image)
Graphics.Blit(inputImage, targetTexture);
if (Input.GetKeyDown(KeyCode.Escape))
closing = true;
if (Input.GetKeyDown(KeyCode.P))
previewUI.enabled = !previewUI.enabled;
void LateUpdate()
if (!closing)
void DrawLandmarks(TensorFloat landmarks, Vector2 scale)
//Draw the landmarks on the hand
for (int j = 0; j < 21; j++)
var marker = new BoundingBox
centerX = landmarks[0, j * 3] * scale.x - (size / 2) * scale.x,
centerY = landmarks[0, j * 3 + 1] * scale.y - (size/2) * scale.y,
width = 8f * scale.x,
height = 8f * scale.y,
DrawBox(marker, j < markerTextures.Length ? markerTextures[j] : boxSprite, j);
void RunInference(Texture source)
var transform = new TextureTransform();
transform.SetDimensions(size, size, 3);
transform.SetTensorLayout(0, 1, 2, 3);
using var image = TextureConverter.ToTensor(source, transform);
using var landmarks = worker.PeekOutput("Identity") as TensorFloat;
Vector2 markerScale = previewUI.rectTransform.rect.size/ size;
DrawLandmarks(landmarks, markerScale);
bool showExtraInformation = false;
if (showExtraInformation)
using var A = worker.PeekOutput("Identity_1") as TensorFloat;
using var B = worker.PeekOutput("Identity_2") as TensorFloat;
Debug.Log("A,B=" + A[0, 0] + "," + B[0, 0]);
public void DrawBox(BoundingBox box, Sprite sprite, int ID)
GameObject panel = null;
if (ID >= boxPool.Count)
panel = new GameObject("landmark");
panel.transform.SetParent(previewUI.transform, false);
panel = boxPool[ID];
var img = panel.GetComponent<Image>();
img.color = Color.white;
img.sprite = sprite;
img.type = Image.Type.Sliced;
panel.transform.localPosition = new Vector3(box.centerX, -box.centerY);
RectTransform rt = panel.GetComponent<RectTransform>();
rt.sizeDelta = new Vector2(box.width, box.height);
public void ClearAnnotations()
for (int i = 0; i < boxPool.Count; i++)
void CleanUp()
closing = true;
if (webcam) Destroy(webcam);
if (video) Destroy(video); = null;
worker = null;
void OnDestroy()