using System.Collections.Generic; using Unity.Sentis; using UnityEngine; using UnityEngine.UI; using UnityEngine.Video; /* * YOLO inference script * ===================== * * Place this script on the Main Camera. * * Place the yolov7-tiny.sentis file and a *.mp4 video file in the Assets/StreamingAssets folder * */ public class RunYOLO : MonoBehaviour { const string modelName = "yolov7-tiny.sentis"; const string videoName = "giraffes.mp4"; // Link the classes.txt here: public TextAsset labelsAsset; // Create a Raw Image of size 640x640 and link it here: public RawImage displayImage; // Link to a bounding box texture here: public Sprite boxTexture; // Link to the font for the labels: public Font font; private Transform displayLocation; private Model model; private IWorker engine; private string[] labels; private RenderTexture targetRT; const BackendType backend = BackendType.GPUCompute; private const int imageWidth = 640; private const int imageHeight = 640; private VideoPlayer video; //bounding box data public struct BoundingBox { public float centerX; public float centerY; public float width; public float height; public string label; public float confidence; } void Start() { Application.targetFrameRate = 60; Screen.orientation = ScreenOrientation.LandscapeLeft; //Parse neural net labels labels = labelsAsset.text.Split('\n'); //Load model model = ModelLoader.Load(Application.streamingAssetsPath + "/" + modelName); targetRT = new RenderTexture(imageWidth, imageHeight, 0); //Create image to display video displayLocation = displayImage.transform; //Create engine to run model engine = WorkerFactory.CreateWorker(backend, model); SetupInput(); } void SetupInput() { video = gameObject.AddComponent(); video.renderMode = VideoRenderMode.APIOnly; video.source = VideoSource.Url; video.url = Application.streamingAssetsPath + "/" + videoName; video.isLooping = true; video.Play(); } private void Update() { ExecuteML(); if (Input.GetKeyDown(KeyCode.Escape)) { Application.Quit(); } } public void ExecuteML() { ClearAnnotations(); if (video && video.texture) { float aspect = video.width * 1f / video.height; Graphics.Blit(video.texture, targetRT, new Vector2(1f / aspect, 1), new Vector2(0, 0)); displayImage.texture = targetRT; } else return; using var input = TextureConverter.ToTensor(targetRT, imageWidth, imageHeight, 3); engine.Execute(input); //Read output tensors var output = engine.PeekOutput() as TensorFloat; output.MakeReadable(); //Draw the bounding boxes for (int n = 0; n < output.shape[0]; n++) { var box = new BoundingBox { centerX = (output[n, 1] + output[n, 3]) / 2 - 320, centerY = (output[n, 2] + output[n, 4]) / 2 - 320, width = output[n, 3] - output[n, 1], height = output[n, 4] - output[n, 2], label = labels[(int)output[n, 5]], confidence = output[n, 6] }; DrawBox(box); } input.Dispose(); } public void DrawBox(BoundingBox box) { Color color = Color.yellow; GameObject panel = new GameObject("ObjectBox"); panel.AddComponent(); Image img = panel.AddComponent(); img.color = color; img.sprite = boxTexture; panel.transform.SetParent(displayLocation, false); panel.transform.localPosition = new Vector3(box.centerX, -box.centerY); RectTransform rt = panel.GetComponent(); rt.sizeDelta = new Vector2(box.width, box.height); //add class label var text = new GameObject("ObjectLabel"); text.AddComponent(); Text txt = text.AddComponent(); text.transform.SetParent(panel.transform, false); txt.font = font; txt.text = box.label; txt.color = color; txt.fontSize = 40; txt.horizontalOverflow = HorizontalWrapMode.Overflow; RectTransform rt2 = text.GetComponent(); rt2.offsetMin = new Vector2(20, rt2.offsetMin.y); rt2.offsetMax = new Vector2(0, rt2.offsetMax.y); rt2.offsetMax = new Vector2(rt2.offsetMax.x, 30); rt2.offsetMin = new Vector2(rt2.offsetMin.x, 0); rt2.anchorMin = new Vector2(0,0); rt2.anchorMax = new Vector2(1, 1); img.sprite = boxTexture; img.type = Image.Type.Sliced; } public void ClearAnnotations() { foreach (Transform child in displayLocation) { Destroy(child.gameObject); } } private void OnDestroy() { engine?.Dispose(); } }