Pippe commited on
Commit
7368ee6
1 Parent(s): bb514d6

Upload 4 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ deberta-v3-base-zeroshot-v1.1-all-33.sentis filter=lfs diff=lfs merge=lfs -text
DebertaV3.cs ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ using System;
2
+ using System.Collections.Generic;
3
+ using System.Linq;
4
+ using Unity.Sentis;
5
+ using UnityEngine;
6
+
7
+ public sealed class DebertaV3 : MonoBehaviour
8
+ {
9
+ public ModelAsset model;
10
+ public TextAsset vocabulary;
11
+ public bool multipleTrueClasses;
12
+ public string text = "Angela Merkel is a politician in Germany and leader of the CDU";
13
+ public string hypothesisTemplate = "This example is about {}";
14
+ public string[] classes = { "politics", "economy", "entertainment", "environment" };
15
+
16
+ Ops ops;
17
+ IWorker engine;
18
+ ITensorAllocator allocator;
19
+ string[] vocabularyTokens;
20
+
21
+ const int padToken = 0;
22
+ const int startToken = 1;
23
+ const int separatorToken = 2;
24
+ const int vocabToTokenOffset = 260;
25
+ const BackendType backend = BackendType.GPUCompute;
26
+
27
+ void Start()
28
+ {
29
+ vocabularyTokens = vocabulary.text.Split("\n");
30
+
31
+ allocator = new TensorCachingAllocator();
32
+ ops = WorkerFactory.CreateOps(backend, allocator);
33
+
34
+ Model loadedModel = ModelLoader.Load(model);
35
+ engine = WorkerFactory.CreateWorker(backend, loadedModel);
36
+
37
+ string[] hypotheses = classes.Select(x => hypothesisTemplate.Replace("{}", x)).ToArray();
38
+ Batch batch = GetTokenizedBatch(text, hypotheses);
39
+ float[] scores = GetBatchScores(batch);
40
+
41
+ for (int i = 0; i < scores.Length; i++)
42
+ {
43
+ Debug.Log($"[{classes[i]}] Entailment Score: {scores[i]}");
44
+ }
45
+ }
46
+
47
+ float[] GetBatchScores(Batch batch)
48
+ {
49
+ using var inputIds = new TensorInt(new TensorShape(batch.BatchCount, batch.BatchLength), batch.BatchedTokens);
50
+ using var attentionMask = new TensorInt(new TensorShape(batch.BatchCount, batch.BatchLength), batch.BatchedMasks);
51
+
52
+ Dictionary<string, Tensor> inputs = new()
53
+ {
54
+ {"input_ids", inputIds},
55
+ {"attention_mask", attentionMask}
56
+ };
57
+
58
+ engine.Execute(inputs);
59
+ TensorFloat logits = (TensorFloat)engine.PeekOutput("logits");
60
+ float[] scores = ScoresFromLogits(logits);
61
+
62
+ return scores;
63
+ }
64
+
65
+ Batch GetTokenizedBatch(string prompt, string[] hypotheses)
66
+ {
67
+ Batch batch = new Batch();
68
+
69
+ List<int> promptTokens = Tokenize(prompt);
70
+ promptTokens.Insert(0, startToken);
71
+
72
+ List<int>[] tokenizedHypotheses = hypotheses.Select(Tokenize).ToArray();
73
+ int maxTokenLength = tokenizedHypotheses.Max(x => x.Count);
74
+
75
+ // Each example in the batch follows this format:
76
+ // Start Prompt Separator Hypothesis Separator Padding
77
+
78
+ int[] batchedTokens = tokenizedHypotheses.SelectMany(hypothesis => promptTokens
79
+ .Append(separatorToken)
80
+ .Concat(hypothesis)
81
+ .Append(separatorToken)
82
+ .Concat(Enumerable.Repeat(padToken, maxTokenLength - hypothesis.Count)))
83
+ .ToArray();
84
+
85
+
86
+ // The attention masks have the same length as the tokens.
87
+ // Each attention mask contains repeating 1s for each token, except for padding tokens.
88
+
89
+ int[] batchedMasks = tokenizedHypotheses.SelectMany(hypothesis => Enumerable.Repeat(1, promptTokens.Count + 1)
90
+ .Concat(Enumerable.Repeat(1, hypothesis.Count + 1))
91
+ .Concat(Enumerable.Repeat(0, maxTokenLength - hypothesis.Count)))
92
+ .ToArray();
93
+
94
+ batch.BatchCount = hypotheses.Length;
95
+ batch.BatchLength = batchedTokens.Length / hypotheses.Length;
96
+ batch.BatchedTokens = batchedTokens;
97
+ batch.BatchedMasks = batchedMasks;
98
+
99
+ return batch;
100
+ }
101
+
102
+ float[] ScoresFromLogits(TensorFloat logits)
103
+ {
104
+ // The logits represent the model's predictions for entailment and non-entailment for each example in the batch.
105
+ // They are of shape [batch size, 2], with two values per example.
106
+ // To obtain a single value (score) per example, a softmax function is applied
107
+
108
+ TensorFloat tensorScores;
109
+ if (multipleTrueClasses || logits.shape.length == 1)
110
+ {
111
+ // Softmax over the entailment vs. contradiction dimension for each label independently
112
+ tensorScores = ops.Softmax(logits, -1);
113
+ }
114
+ else
115
+ {
116
+ // Softmax over all candidate labels
117
+ tensorScores = ops.Softmax(logits, 0);
118
+ }
119
+
120
+ tensorScores.MakeReadable();
121
+ float[] tensorArray = tensorScores.ToReadOnlyArray();
122
+
123
+ tensorScores.Dispose();
124
+
125
+ // Select the first column which is the column where the scores are stored
126
+ float[] scores = new float[tensorArray.Length / 2];
127
+ for (int i = 0; i < scores.Length; i++)
128
+ {
129
+ scores[i] = tensorArray[i * 2];
130
+ }
131
+
132
+ return scores;
133
+ }
134
+
135
+ List<int> Tokenize(string input)
136
+ {
137
+ string[] words = input.Split(null);
138
+
139
+ List<int> ids = new();
140
+
141
+ foreach (string word in words)
142
+ {
143
+ int start = 0;
144
+ for(int i = word.Length; i >= 0;i--)
145
+ {
146
+ string subWord = start == 0 ? "▁" + word.Substring(start, i) : word.Substring(start, i-start);
147
+ int index = Array.IndexOf(vocabularyTokens, subWord);
148
+ if (index >= 0)
149
+ {
150
+ ids.Add(index + vocabToTokenOffset);
151
+ if (i == word.Length) break;
152
+ start = i;
153
+ i = word.Length + 1;
154
+ }
155
+ }
156
+ }
157
+
158
+ return ids;
159
+ }
160
+
161
+ void OnDestroy()
162
+ {
163
+ engine?.Dispose();
164
+ allocator?.Dispose();
165
+ ops?.Dispose();
166
+ }
167
+
168
+ struct Batch
169
+ {
170
+ public int BatchCount;
171
+ public int BatchLength;
172
+ public int[] BatchedTokens;
173
+ public int[] BatchedMasks;
174
+ }
175
+ }
deberta-v3-base-zeroshot-v1.1-all-33.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cda45c4074994990222c0192ab5083fb99fb9d3e4dacdffb4c97a754b4d97c5
3
+ size 738563189
deberta-v3-base-zeroshot-v1.1-all-33.sentis ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4999ac24dac25a77affb5b0086e93228d6d17716f653bdd780875fc243b53ab
3
+ size 775143176
vocab.txt ADDED
The diff for this file is too large to render. See raw diff