ddrg
/

Fill-Mask
Transformers
PyTorch
deberta-v2
Inference Endpoints
jdrechsel commited on
Commit
29affe2
1 Parent(s): c7ffa45

Upload 7 files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[MASK]": 128000,
3
+ "\\Bbb": 128053,
4
+ "\\Big": 128280,
5
+ "\\Bigl": 128262,
6
+ "\\Bigr": 128264,
7
+ "\\Box": 128285,
8
+ "\\Delta": 128068,
9
+ "\\Gamma": 128089,
10
+ "\\Im": 128291,
11
+ "\\Lambda": 128162,
12
+ "\\Leftarrow": 128283,
13
+ "\\Leftrightarrow": 128151,
14
+ "\\Longleftrightarrow": 128275,
15
+ "\\Longrightarrow": 128216,
16
+ "\\Omega": 128064,
17
+ "\\Phi": 128134,
18
+ "\\Pi": 128234,
19
+ "\\Pr": 128207,
20
+ "\\Psi": 128212,
21
+ "\\R": 128252,
22
+ "\\Re": 128243,
23
+ "\\Rightarrow": 128066,
24
+ "\\Sigma": 128136,
25
+ "\\Theta": 128208,
26
+ "\\Vert": 128141,
27
+ "\\\\": 128009,
28
+ "\\a": 128259,
29
+ "\\aleph": 128253,
30
+ "\\alpha": 128028,
31
+ "\\angle": 128165,
32
+ "\\approx": 128079,
33
+ "\\arcsin": 128284,
34
+ "\\arctan": 128191,
35
+ "\\arg": 128254,
36
+ "\\ast": 128139,
37
+ "\\b": 128286,
38
+ "\\backslash": 128156,
39
+ "\\bar": 128070,
40
+ "\\barwedge": 128257,
41
+ "\\begin": 128021,
42
+ "\\beta": 128046,
43
+ "\\big": 128107,
44
+ "\\bigcap": 128158,
45
+ "\\bigcup": 128121,
46
+ "\\bigg": 128127,
47
+ "\\biggl": 128299,
48
+ "\\biggr": 128293,
49
+ "\\bigl": 128215,
50
+ "\\bigoplus": 128242,
51
+ "\\bigr": 128203,
52
+ "\\bigtriangleup": 128277,
53
+ "\\bigwedge": 128300,
54
+ "\\binom": 128052,
55
+ "\\blacksquare": 128295,
56
+ "\\bmod": 128172,
57
+ "\\boldsymbol": 128160,
58
+ "\\bot": 128251,
59
+ "\\boxdot": 128231,
60
+ "\\boxed": 128260,
61
+ "\\bullet": 128187,
62
+ "\\cal": 128209,
63
+ "\\cancel": 128129,
64
+ "\\cap": 128038,
65
+ "\\cdot": 128001,
66
+ "\\cdots": 128023,
67
+ "\\cfrac": 128245,
68
+ "\\chi": 128106,
69
+ "\\choose": 128133,
70
+ "\\circ": 128056,
71
+ "\\colon": 128135,
72
+ "\\coloneqq": 128138,
73
+ "\\color": 128217,
74
+ "\\complement": 128120,
75
+ "\\cong": 128110,
76
+ "\\cos": 128024,
77
+ "\\cosh": 128188,
78
+ "\\cot": 128092,
79
+ "\\cr": 128204,
80
+ "\\csc": 128077,
81
+ "\\cup": 128050,
82
+ "\\d": 128241,
83
+ "\\dagger": 128199,
84
+ "\\ddot": 128287,
85
+ "\\ddots": 128213,
86
+ "\\deg": 128210,
87
+ "\\delta": 128048,
88
+ "\\det": 128157,
89
+ "\\dfrac": 128007,
90
+ "\\diamond": 128267,
91
+ "\\dim": 128150,
92
+ "\\displaystyle": 128080,
93
+ "\\div": 128124,
94
+ "\\dot": 128147,
95
+ "\\dots": 128078,
96
+ "\\dotsc": 128268,
97
+ "\\dotsm": 128279,
98
+ "\\downarrow": 128240,
99
+ "\\dt": 128296,
100
+ "\\dx": 128211,
101
+ "\\ell": 128118,
102
+ "\\emptyset": 128094,
103
+ "\\end": 128022,
104
+ "\\epsilon": 128033,
105
+ "\\equiv": 128054,
106
+ "\\eta": 128101,
107
+ "\\exists": 128096,
108
+ "\\exp": 128137,
109
+ "\\f": 128250,
110
+ "\\forall": 128049,
111
+ "\\frac": 128002,
112
+ "\\frak": 128273,
113
+ "\\gamma": 128062,
114
+ "\\gcd": 128194,
115
+ "\\ge": 128086,
116
+ "\\gep": 128228,
117
+ "\\geq": 128025,
118
+ "\\geqslant": 128224,
119
+ "\\ger": 128229,
120
+ "\\ges": 128225,
121
+ "\\get": 128230,
122
+ "\\gray": 128117,
123
+ "\\gt": 128155,
124
+ "\\hat": 128090,
125
+ "\\hline": 128183,
126
+ "\\hookrightarrow": 128256,
127
+ "\\hphantom": 128182,
128
+ "\\hspace": 128174,
129
+ "\\iff": 128144,
130
+ "\\iint": 128219,
131
+ "\\implies": 128087,
132
+ "\\in": 128006,
133
+ "\\inf": 128154,
134
+ "\\infty": 128011,
135
+ "\\int": 128014,
136
+ "\\iota": 128266,
137
+ "\\kappa": 128161,
138
+ "\\ker": 128167,
139
+ "\\lVert": 128186,
140
+ "\\lambda": 128035,
141
+ "\\land": 128140,
142
+ "\\langle": 128057,
143
+ "\\lbrace": 128197,
144
+ "\\lceil": 128226,
145
+ "\\lcm": 128180,
146
+ "\\ldots": 128072,
147
+ "\\le": 128051,
148
+ "\\left": 128003,
149
+ "\\leftarrow": 128274,
150
+ "\\leftrightarrow": 128255,
151
+ "\\lep": 128232,
152
+ "\\leq": 128016,
153
+ "\\leqslant": 128177,
154
+ "\\ler": 128223,
155
+ "\\les": 128222,
156
+ "\\let": 128227,
157
+ "\\lfloor": 128126,
158
+ "\\lim": 128030,
159
+ "\\liminf": 128221,
160
+ "\\limits": 128032,
161
+ "\\limsup": 128185,
162
+ "\\ln": 128042,
163
+ "\\lnot": 128173,
164
+ "\\log": 128037,
165
+ "\\longrightarrow": 128159,
166
+ "\\lor": 128163,
167
+ "\\lt": 128104,
168
+ "\\lvert": 128146,
169
+ "\\mapsto": 128105,
170
+ "\\mathbb": 128005,
171
+ "\\mathbf": 128036,
172
+ "\\mathcal": 128027,
173
+ "\\mathfrak": 128071,
174
+ "\\mathop": 128244,
175
+ "\\mathrm": 128029,
176
+ "\\mathscr": 128112,
177
+ "\\mathsf": 128169,
178
+ "\\max": 128122,
179
+ "\\mid": 128058,
180
+ "\\middle": 128184,
181
+ "\\min": 128143,
182
+ "\\mod": 128145,
183
+ "\\models": 128281,
184
+ "\\mu": 128044,
185
+ "\\nabla": 128111,
186
+ "\\ne": 128113,
187
+ "\\neg": 128148,
188
+ "\\neq": 128013,
189
+ "\\newcommand": 128214,
190
+ "\\ni": 128276,
191
+ "\\nlet": 128278,
192
+ "\\nmid": 128249,
193
+ "\\not": 128130,
194
+ "\\notin": 128152,
195
+ "\\nu": 128091,
196
+ "\\odot": 128246,
197
+ "\\oint": 128261,
198
+ "\\omega": 128059,
199
+ "\\operatorname": 128039,
200
+ "\\oplus": 128114,
201
+ "\\oslash": 128258,
202
+ "\\otimes": 128088,
203
+ "\\over": 128103,
204
+ "\\overline": 128063,
205
+ "\\overset": 128196,
206
+ "\\p": 128288,
207
+ "\\parallel": 128292,
208
+ "\\pars": 128270,
209
+ "\\partial": 128026,
210
+ "\\perp": 128170,
211
+ "\\phantom": 128192,
212
+ "\\phi": 128041,
213
+ "\\pi": 128010,
214
+ "\\pm": 128047,
215
+ "\\pmatrix": 128247,
216
+ "\\pmod": 128102,
217
+ "\\pmxd": 128200,
218
+ "\\prec": 128294,
219
+ "\\prime": 128100,
220
+ "\\prod": 128098,
221
+ "\\psi": 128081,
222
+ "\\qquad": 128116,
223
+ "\\quad": 128060,
224
+ "\\rVert": 128189,
225
+ "\\rangle": 128055,
226
+ "\\rbrace": 128201,
227
+ "\\rceil": 128220,
228
+ "\\rfloor": 128125,
229
+ "\\rho": 128074,
230
+ "\\right": 128004,
231
+ "\\rightarrow": 128034,
232
+ "\\root": 128263,
233
+ "\\rvert": 128149,
234
+ "\\scriptsize": 128239,
235
+ "\\sec": 128076,
236
+ "\\setminus": 128084,
237
+ "\\sf": 128297,
238
+ "\\sigma": 128045,
239
+ "\\sim": 128132,
240
+ "\\simeq": 128179,
241
+ "\\sin": 128019,
242
+ "\\sinh": 128193,
243
+ "\\space": 128097,
244
+ "\\sqrt": 128012,
245
+ "\\square": 128235,
246
+ "\\stackrel": 128198,
247
+ "\\star": 128166,
248
+ "\\subset": 128082,
249
+ "\\subseteq": 128093,
250
+ "\\subsetneq": 128269,
251
+ "\\substack": 128181,
252
+ "\\sum": 128015,
253
+ "\\sup": 128099,
254
+ "\\supset": 128265,
255
+ "\\supseteq": 128298,
256
+ "\\tag": 128083,
257
+ "\\tan": 128061,
258
+ "\\tanh": 128289,
259
+ "\\tau": 128069,
260
+ "\\text": 128018,
261
+ "\\textbf": 128119,
262
+ "\\textit": 128282,
263
+ "\\textrm": 128164,
264
+ "\\tfrac": 128040,
265
+ "\\therefore": 128248,
266
+ "\\theta": 128031,
267
+ "\\thinspace": 128043,
268
+ "\\tilde": 128115,
269
+ "\\times": 128008,
270
+ "\\to": 128017,
271
+ "\\top": 128218,
272
+ "\\triangle": 128178,
273
+ "\\triangleleft": 128290,
274
+ "\\underbrace": 128175,
275
+ "\\underline": 128176,
276
+ "\\uparrow": 128236,
277
+ "\\varepsilon": 128067,
278
+ "\\varnothing": 128171,
279
+ "\\varphi": 128065,
280
+ "\\vartheta": 128128,
281
+ "\\vdash": 128237,
282
+ "\\vdots": 128142,
283
+ "\\vec": 128131,
284
+ "\\vee": 128168,
285
+ "\\vert": 128109,
286
+ "\\vphantom": 128190,
287
+ "\\wedge": 128108,
288
+ "\\widehat": 128202,
289
+ "\\widetilde": 128206,
290
+ "\\x": 128205,
291
+ "\\xi": 128085,
292
+ "\\xrightarrow": 128195,
293
+ "\\zeta": 128073,
294
+ "\\|": 128020,
295
+ "align*": 128123,
296
+ "bmatrix": 128095,
297
+ "eqnarray": 128233,
298
+ "eqnarray*": 128271,
299
+ "equation": 128153,
300
+ "equation*": 128238,
301
+ "pmatrix": 128075,
302
+ "vmatrix": 128272
303
+ }
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../../final/models/pretraining/DML-MFM-NFIR-FFIR-MTM",
3
+ "architectures": [
4
+ "DebertaV2ForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "layer_norm_eps": 1e-07,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "pooler_dropout": 0,
21
+ "pooler_hidden_act": "gelu",
22
+ "pooler_hidden_size": 768,
23
+ "pos_att_type": [
24
+ "p2c",
25
+ "c2p"
26
+ ],
27
+ "position_biased_input": false,
28
+ "position_buckets": 256,
29
+ "relative_attention": true,
30
+ "share_att_key": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.30.2",
33
+ "type_vocab_size": 0,
34
+ "vocab_size": 128301
35
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e303241bfaca8dcb6b3b07b6f97fd88b9e05c91984b847818f964991c05d4d5
3
+ size 738388721
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]",
15
+ "vocab_type": "spm"
16
+ }