File size: 5,892 Bytes

using System;
using System.Collections.Generic;
using System.Linq;
using Unity.Sentis;
using UnityEngine;

public sealed class DebertaV3 : MonoBehaviour
{
    public ModelAsset model;
    public TextAsset vocabulary;
    public bool multipleTrueClasses;
    public string text = "Angela Merkel is a politician in Germany and leader of the CDU";
    public string hypothesisTemplate = "This example is about {}";
    public string[] classes = { "politics", "economy", "entertainment", "environment" };

    Ops ops;
    IWorker engine;
    ITensorAllocator allocator;
    string[] vocabularyTokens;

    const int padToken = 0;
    const int startToken = 1;
    const int separatorToken = 2;
    const int vocabToTokenOffset = 260;
    const BackendType backend = BackendType.GPUCompute;

    void Start()
    {
        vocabularyTokens = vocabulary.text.Split("\n");

        allocator = new TensorCachingAllocator();
        ops = WorkerFactory.CreateOps(backend, allocator);

        Model loadedModel = ModelLoader.Load(model);
        engine = WorkerFactory.CreateWorker(backend, loadedModel);

        if (classes.Length == 0)
        {
            Debug.LogError("There need to be more than 0 classes");
            return;
        }

        string[] hypotheses = classes.Select(x => hypothesisTemplate.Replace("{}", x)).ToArray();
        Batch batch = GetTokenizedBatch(text, hypotheses);
        float[] scores = GetBatchScores(batch);

        for (int i = 0; i < scores.Length; i++)
        {
            Debug.Log($"[{classes[i]}] Entailment Score: {scores[i]}");
        }
    }

    float[] GetBatchScores(Batch batch)
    {
        using var inputIds = new TensorInt(new TensorShape(batch.BatchCount, batch.BatchLength), batch.BatchedTokens);
        using var attentionMask = new TensorInt(new TensorShape(batch.BatchCount, batch.BatchLength), batch.BatchedMasks);

        Dictionary<string, Tensor> inputs = new()
        {
            {"input_ids", inputIds},
            {"attention_mask", attentionMask}
        };

        engine.Execute(inputs);
        TensorFloat logits = (TensorFloat)engine.PeekOutput("logits");
        float[] scores = ScoresFromLogits(logits);

        return scores;
    }

    Batch GetTokenizedBatch(string prompt, string[] hypotheses)
    {
        Batch batch = new Batch();

        List<int> promptTokens = Tokenize(prompt);
        promptTokens.Insert(0, startToken);

        List<int>[] tokenizedHypotheses = hypotheses.Select(Tokenize).ToArray();
        int maxTokenLength = tokenizedHypotheses.Max(x => x.Count);

        // Each example in the batch follows this format:
        // Start Prompt Separator Hypothesis Separator Padding

        int[] batchedTokens = tokenizedHypotheses.SelectMany(hypothesis => promptTokens
                .Append(separatorToken)
                .Concat(hypothesis)
                .Append(separatorToken)
                .Concat(Enumerable.Repeat(padToken, maxTokenLength - hypothesis.Count)))
            .ToArray();


        // The attention masks have the same length as the tokens.
        // Each attention mask contains repeating 1s for each token, except for padding tokens.

        int[] batchedMasks = tokenizedHypotheses.SelectMany(hypothesis => Enumerable.Repeat(1, promptTokens.Count + 1)
                .Concat(Enumerable.Repeat(1, hypothesis.Count + 1))
                .Concat(Enumerable.Repeat(0, maxTokenLength - hypothesis.Count)))
            .ToArray();

        batch.BatchCount = hypotheses.Length;
        batch.BatchLength = batchedTokens.Length / hypotheses.Length;
        batch.BatchedTokens = batchedTokens;
        batch.BatchedMasks = batchedMasks;

        return batch;
    }

    float[] ScoresFromLogits(TensorFloat logits)
    {
        // The logits represent the model's predictions for entailment and non-entailment for each example in the batch.
        // They are of shape [batch size, 2], with two values per example.
        // To obtain a single value (score) per example, a softmax function is applied

        TensorFloat tensorScores;
        if (multipleTrueClasses || logits.shape.Length(0, 1) == 1)
        {
            // Softmax over the entailment vs. contradiction dimension for each label independently
            tensorScores = ops.Softmax(logits, -1);
        }
        else
        {
            // Softmax over all candidate labels
            tensorScores = ops.Softmax(logits, 0);
        }

        tensorScores.MakeReadable();
        float[] tensorArray = tensorScores.ToReadOnlyArray();

        tensorScores.Dispose();

        // Select the first column which is the column where the scores are stored
        float[] scores = new float[tensorArray.Length / 2];
        for (int i = 0; i < scores.Length; i++)
        {
            scores[i] = tensorArray[i * 2];
        }

        return scores;
    }

    List<int> Tokenize(string input)
    {
        string[] words = input.Split(null);

        List<int> ids = new();

        foreach (string word in words)
        {
            int start = 0;
            for(int i = word.Length; i >= 0;i--)
            {
                string subWord = start == 0 ? "▁" + word.Substring(start, i) : word.Substring(start, i-start);
                int index = Array.IndexOf(vocabularyTokens, subWord);
                if (index >= 0)
                {
                    ids.Add(index + vocabToTokenOffset);
                    if (i == word.Length) break;
                    start = i;
                    i = word.Length + 1;
                }
            }
        }

        return ids;
    }

    void OnDestroy()
    {
        engine?.Dispose();
        allocator?.Dispose();
        ops?.Dispose();
    }

    struct Batch
    {
        public int BatchCount;
        public int BatchLength;
        public int[] BatchedTokens;
        public int[] BatchedMasks;
    }
}