jdrechsel commited on
Commit
cbdf211
1 Parent(s): e54aa4d

Upload 8 files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "\\Bbb": 29048,
3
+ "\\Big": 29275,
4
+ "\\Bigl": 29257,
5
+ "\\Bigr": 29259,
6
+ "\\Box": 29280,
7
+ "\\Delta": 29063,
8
+ "\\Gamma": 29084,
9
+ "\\Im": 29286,
10
+ "\\Lambda": 29157,
11
+ "\\Leftarrow": 29278,
12
+ "\\Leftrightarrow": 29147,
13
+ "\\Longleftrightarrow": 29270,
14
+ "\\Longrightarrow": 29211,
15
+ "\\Omega": 29059,
16
+ "\\Phi": 29130,
17
+ "\\Pi": 29229,
18
+ "\\Pr": 29202,
19
+ "\\Psi": 29207,
20
+ "\\R": 29247,
21
+ "\\Re": 29238,
22
+ "\\Rightarrow": 29061,
23
+ "\\Sigma": 29132,
24
+ "\\Theta": 29203,
25
+ "\\Vert": 29137,
26
+ "\\\\": 29004,
27
+ "\\a": 29254,
28
+ "\\aleph": 29248,
29
+ "\\alpha": 29023,
30
+ "\\angle": 29160,
31
+ "\\approx": 29074,
32
+ "\\arcsin": 29279,
33
+ "\\arctan": 29186,
34
+ "\\arg": 29249,
35
+ "\\ast": 29135,
36
+ "\\b": 29281,
37
+ "\\backslash": 29151,
38
+ "\\bar": 29065,
39
+ "\\barwedge": 29252,
40
+ "\\begin": 29016,
41
+ "\\beta": 29041,
42
+ "\\big": 29103,
43
+ "\\bigcap": 29153,
44
+ "\\bigcup": 29117,
45
+ "\\bigg": 29123,
46
+ "\\biggl": 29294,
47
+ "\\biggr": 29288,
48
+ "\\bigl": 29210,
49
+ "\\bigoplus": 29237,
50
+ "\\bigr": 29198,
51
+ "\\bigtriangleup": 29272,
52
+ "\\bigwedge": 29295,
53
+ "\\binom": 29047,
54
+ "\\blacksquare": 29290,
55
+ "\\bmod": 29167,
56
+ "\\boldsymbol": 29155,
57
+ "\\bot": 29246,
58
+ "\\boxdot": 29226,
59
+ "\\boxed": 29255,
60
+ "\\bullet": 29182,
61
+ "\\cal": 29204,
62
+ "\\cancel": 29125,
63
+ "\\cap": 29033,
64
+ "\\cdot": 28996,
65
+ "\\cdots": 29018,
66
+ "\\cfrac": 29240,
67
+ "\\chi": 29102,
68
+ "\\choose": 29129,
69
+ "\\circ": 29051,
70
+ "\\colon": 29131,
71
+ "\\coloneqq": 29134,
72
+ "\\color": 29212,
73
+ "\\complement": 29116,
74
+ "\\cong": 29106,
75
+ "\\cos": 29019,
76
+ "\\cosh": 29183,
77
+ "\\cot": 29087,
78
+ "\\cr": 29199,
79
+ "\\csc": 29072,
80
+ "\\cup": 29045,
81
+ "\\d": 29236,
82
+ "\\dagger": 29194,
83
+ "\\ddot": 29282,
84
+ "\\ddots": 29208,
85
+ "\\deg": 29205,
86
+ "\\delta": 29043,
87
+ "\\det": 29152,
88
+ "\\dfrac": 29002,
89
+ "\\diamond": 29262,
90
+ "\\dim": 29146,
91
+ "\\displaystyle": 29075,
92
+ "\\div": 29120,
93
+ "\\dot": 29143,
94
+ "\\dots": 29073,
95
+ "\\dotsc": 29263,
96
+ "\\dotsm": 29274,
97
+ "\\downarrow": 29235,
98
+ "\\dt": 29291,
99
+ "\\dx": 29206,
100
+ "\\ell": 29114,
101
+ "\\emptyset": 29090,
102
+ "\\end": 29017,
103
+ "\\epsilon": 29028,
104
+ "\\equiv": 29049,
105
+ "\\eta": 29097,
106
+ "\\exists": 29092,
107
+ "\\exp": 29133,
108
+ "\\f": 29245,
109
+ "\\forall": 29044,
110
+ "\\frac": 28997,
111
+ "\\frak": 29268,
112
+ "\\gamma": 29057,
113
+ "\\gcd": 29189,
114
+ "\\ge": 29081,
115
+ "\\gep": 29223,
116
+ "\\geq": 29020,
117
+ "\\geqslant": 29219,
118
+ "\\ger": 29224,
119
+ "\\ges": 29220,
120
+ "\\get": 29225,
121
+ "\\gray": 29113,
122
+ "\\gt": 29150,
123
+ "\\hat": 29085,
124
+ "\\hline": 29178,
125
+ "\\hookrightarrow": 29251,
126
+ "\\hphantom": 29177,
127
+ "\\hspace": 29169,
128
+ "\\iff": 29140,
129
+ "\\iint": 29214,
130
+ "\\implies": 29082,
131
+ "\\in": 29001,
132
+ "\\inf": 29149,
133
+ "\\infty": 29006,
134
+ "\\int": 29009,
135
+ "\\iota": 29261,
136
+ "\\kappa": 29156,
137
+ "\\ker": 29162,
138
+ "\\lVert": 29181,
139
+ "\\lambda": 29030,
140
+ "\\land": 29136,
141
+ "\\langle": 29052,
142
+ "\\lbrace": 29192,
143
+ "\\lceil": 29221,
144
+ "\\lcm": 29175,
145
+ "\\ldots": 29067,
146
+ "\\le": 29046,
147
+ "\\left": 28998,
148
+ "\\leftarrow": 29269,
149
+ "\\leftrightarrow": 29250,
150
+ "\\lep": 29227,
151
+ "\\leq": 29011,
152
+ "\\leqslant": 29172,
153
+ "\\ler": 29218,
154
+ "\\les": 29217,
155
+ "\\let": 29222,
156
+ "\\lfloor": 29122,
157
+ "\\lim": 29025,
158
+ "\\liminf": 29216,
159
+ "\\limits": 29027,
160
+ "\\limsup": 29180,
161
+ "\\ln": 29037,
162
+ "\\lnot": 29168,
163
+ "\\log": 29032,
164
+ "\\longrightarrow": 29154,
165
+ "\\lor": 29158,
166
+ "\\lt": 29100,
167
+ "\\lvert": 29142,
168
+ "\\mapsto": 29101,
169
+ "\\mathbb": 29000,
170
+ "\\mathbf": 29031,
171
+ "\\mathcal": 29022,
172
+ "\\mathfrak": 29066,
173
+ "\\mathop": 29239,
174
+ "\\mathrm": 29024,
175
+ "\\mathscr": 29108,
176
+ "\\mathsf": 29164,
177
+ "\\max": 29118,
178
+ "\\mid": 29053,
179
+ "\\middle": 29179,
180
+ "\\min": 29139,
181
+ "\\mod": 29141,
182
+ "\\models": 29276,
183
+ "\\mu": 29039,
184
+ "\\nabla": 29107,
185
+ "\\ne": 29109,
186
+ "\\neg": 29144,
187
+ "\\neq": 29008,
188
+ "\\newcommand": 29209,
189
+ "\\ni": 29271,
190
+ "\\nlet": 29273,
191
+ "\\nmid": 29244,
192
+ "\\not": 29126,
193
+ "\\notin": 29148,
194
+ "\\nu": 29086,
195
+ "\\odot": 29241,
196
+ "\\oint": 29256,
197
+ "\\omega": 29054,
198
+ "\\operatorname": 29034,
199
+ "\\oplus": 29110,
200
+ "\\oslash": 29253,
201
+ "\\otimes": 29083,
202
+ "\\over": 29099,
203
+ "\\overline": 29058,
204
+ "\\overset": 29191,
205
+ "\\p": 29283,
206
+ "\\parallel": 29287,
207
+ "\\pars": 29265,
208
+ "\\partial": 29021,
209
+ "\\perp": 29165,
210
+ "\\phantom": 29187,
211
+ "\\phi": 29036,
212
+ "\\pi": 29005,
213
+ "\\pm": 29042,
214
+ "\\pmatrix": 29242,
215
+ "\\pmod": 29098,
216
+ "\\pmxd": 29195,
217
+ "\\prec": 29289,
218
+ "\\prime": 29096,
219
+ "\\prod": 29094,
220
+ "\\psi": 29076,
221
+ "\\qquad": 29112,
222
+ "\\quad": 29055,
223
+ "\\rVert": 29184,
224
+ "\\rangle": 29050,
225
+ "\\rbrace": 29196,
226
+ "\\rceil": 29215,
227
+ "\\rfloor": 29121,
228
+ "\\rho": 29069,
229
+ "\\right": 28999,
230
+ "\\rightarrow": 29029,
231
+ "\\root": 29258,
232
+ "\\rvert": 29145,
233
+ "\\scriptsize": 29234,
234
+ "\\sec": 29071,
235
+ "\\setminus": 29079,
236
+ "\\sf": 29292,
237
+ "\\sigma": 29040,
238
+ "\\sim": 29128,
239
+ "\\simeq": 29174,
240
+ "\\sin": 29014,
241
+ "\\sinh": 29188,
242
+ "\\space": 29093,
243
+ "\\sqrt": 29007,
244
+ "\\square": 29230,
245
+ "\\stackrel": 29193,
246
+ "\\star": 29161,
247
+ "\\subset": 29077,
248
+ "\\subseteq": 29088,
249
+ "\\subsetneq": 29264,
250
+ "\\substack": 29176,
251
+ "\\sum": 29010,
252
+ "\\sup": 29095,
253
+ "\\supset": 29260,
254
+ "\\supseteq": 29293,
255
+ "\\tag": 29078,
256
+ "\\tan": 29056,
257
+ "\\tanh": 29284,
258
+ "\\tau": 29064,
259
+ "\\text": 29013,
260
+ "\\textbf": 29115,
261
+ "\\textit": 29277,
262
+ "\\textrm": 29159,
263
+ "\\tfrac": 29035,
264
+ "\\therefore": 29243,
265
+ "\\theta": 29026,
266
+ "\\thinspace": 29038,
267
+ "\\tilde": 29111,
268
+ "\\times": 29003,
269
+ "\\to": 29012,
270
+ "\\top": 29213,
271
+ "\\triangle": 29173,
272
+ "\\triangleleft": 29285,
273
+ "\\underbrace": 29170,
274
+ "\\underline": 29171,
275
+ "\\uparrow": 29231,
276
+ "\\varepsilon": 29062,
277
+ "\\varnothing": 29166,
278
+ "\\varphi": 29060,
279
+ "\\vartheta": 29124,
280
+ "\\vdash": 29232,
281
+ "\\vdots": 29138,
282
+ "\\vec": 29127,
283
+ "\\vee": 29163,
284
+ "\\vert": 29105,
285
+ "\\vphantom": 29185,
286
+ "\\wedge": 29104,
287
+ "\\widehat": 29197,
288
+ "\\widetilde": 29201,
289
+ "\\x": 29200,
290
+ "\\xi": 29080,
291
+ "\\xrightarrow": 29190,
292
+ "\\zeta": 29068,
293
+ "\\|": 29015,
294
+ "align": 29089,
295
+ "align*": 29119,
296
+ "bmatrix": 29091,
297
+ "eqnarray": 29228,
298
+ "eqnarray*": 29266,
299
+ "equation*": 29233,
300
+ "pmatrix": 29070,
301
+ "vmatrix": 29267
302
+ }
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/models/tokenized",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.25.1",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 29296
26
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ad0e71c27c6fb4af46a1c398603e50805a68b79af61bb3543e9bcc86b4b6166
3
+ size 434231085
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
stats.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"objectives": "MFM,NFIR,FFIR,MTM", "mixed": true, "steps": "250000"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": false,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "name_or_path": "/models/tokenized",
8
+ "pad_token": "[PAD]",
9
+ "sep_token": "[SEP]",
10
+ "special_tokens_map_file": "/models/tokenized/special_tokens_map.json",
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff