rjarpa commited on
Commit
215efd0
1 Parent(s): 898078a

End of training

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 1,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 5,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 20,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.30.2",
37
+ "use_cache": true,
38
+ "vocab_size": 223
39
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 5,
5
+ "transformers_version": "4.30.2"
6
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc56c3323da40ca2d4d750770e991e68ee2db716e0d14b2a195fa73d2c575b7e
3
+ size 344102749
special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "\n",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "\n",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
tokenizer.json ADDED
@@ -0,0 +1,519 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 20,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": null,
10
+ "added_tokens": [
11
+ {
12
+ "id": 0,
13
+ "content": "[UNK]",
14
+ "single_word": false,
15
+ "lstrip": false,
16
+ "rstrip": false,
17
+ "normalized": false,
18
+ "special": true
19
+ },
20
+ {
21
+ "id": 1,
22
+ "content": "[CLS]",
23
+ "single_word": false,
24
+ "lstrip": false,
25
+ "rstrip": false,
26
+ "normalized": false,
27
+ "special": true
28
+ },
29
+ {
30
+ "id": 2,
31
+ "content": "[SEP]",
32
+ "single_word": false,
33
+ "lstrip": false,
34
+ "rstrip": false,
35
+ "normalized": false,
36
+ "special": true
37
+ },
38
+ {
39
+ "id": 3,
40
+ "content": "[PAD]",
41
+ "single_word": false,
42
+ "lstrip": false,
43
+ "rstrip": false,
44
+ "normalized": false,
45
+ "special": true
46
+ },
47
+ {
48
+ "id": 4,
49
+ "content": "[MASK]",
50
+ "single_word": false,
51
+ "lstrip": false,
52
+ "rstrip": false,
53
+ "normalized": false,
54
+ "special": true
55
+ },
56
+ {
57
+ "id": 5,
58
+ "content": "\n",
59
+ "single_word": false,
60
+ "lstrip": false,
61
+ "rstrip": false,
62
+ "normalized": false,
63
+ "special": true
64
+ }
65
+ ],
66
+ "normalizer": null,
67
+ "pre_tokenizer": null,
68
+ "post_processor": null,
69
+ "decoder": null,
70
+ "model": {
71
+ "type": "BPE",
72
+ "dropout": null,
73
+ "unk_token": "[UNK]",
74
+ "continuing_subword_prefix": null,
75
+ "end_of_word_suffix": null,
76
+ "fuse_unk": false,
77
+ "byte_fallback": false,
78
+ "vocab": {
79
+ "[UNK]": 0,
80
+ "[CLS]": 1,
81
+ "[SEP]": 2,
82
+ "[PAD]": 3,
83
+ "[MASK]": 4,
84
+ "\n": 5,
85
+ "A": 6,
86
+ "B": 7,
87
+ "C": 8,
88
+ "D": 9,
89
+ "BB": 10,
90
+ "AA": 11,
91
+ "DD": 12,
92
+ "CC": 13,
93
+ "BBBB": 14,
94
+ "AAAA": 15,
95
+ "DDDD": 16,
96
+ "CCCC": 17,
97
+ "BBBBBBBB": 18,
98
+ "AAAAAAAA": 19,
99
+ "DDDDDDDD": 20,
100
+ "CCCCCCCC": 21,
101
+ "BBB": 22,
102
+ "AAA": 23,
103
+ "CCC": 24,
104
+ "DDD": 25,
105
+ "BBBBB": 26,
106
+ "AAAAA": 27,
107
+ "DDDDD": 28,
108
+ "CCCCC": 29,
109
+ "BBBBBB": 30,
110
+ "AAAAAA": 31,
111
+ "DDDDDD": 32,
112
+ "CCCCCC": 33,
113
+ "BBBBBBB": 34,
114
+ "BBBBBBBBBBBBBBBB": 35,
115
+ "AAAAAAA": 36,
116
+ "AAAAAAAAAAAAAAAA": 37,
117
+ "DDDDDDD": 38,
118
+ "CCCCCCC": 39,
119
+ "DDDDDDDDDDDDDDDD": 40,
120
+ "CCCCCCCCCCCCCCCC": 41,
121
+ "BBBBBBBBB": 42,
122
+ "AAAAAAAAA": 43,
123
+ "DDDDDDDDD": 44,
124
+ "BBBBBBBBBB": 45,
125
+ "AAAAAAAAAA": 46,
126
+ "CCCCCCCCC": 47,
127
+ "BBBBBBBBBBB": 48,
128
+ "AAAAAAAAAAA": 49,
129
+ "DDDDDDDDDD": 50,
130
+ "CCCCCCCCCC": 51,
131
+ "BBBBBBBBBBBB": 52,
132
+ "DDDDDDDDDDD": 53,
133
+ "AAAAAAAAAAAA": 54,
134
+ "CCCCCCCCCCC": 55,
135
+ "BBBAAA": 56,
136
+ "BBBBBBBBBBBBB": 57,
137
+ "AAAAAAAAAAAAA": 58,
138
+ "BBBCCC": 59,
139
+ "DDDDDDDDDDDD": 60,
140
+ "BBBDDD": 61,
141
+ "CCCCCCCCCCCC": 62,
142
+ "AAACCC": 63,
143
+ "AAADDD": 64,
144
+ "AAAAAAAAAAAAAA": 65,
145
+ "AAAABBBB": 66,
146
+ "BBBBBBBBBBBBBB": 67,
147
+ "DDDDDDDDDDDDD": 68,
148
+ "CCCCCCCCCCCCC": 69,
149
+ "BBBBBBBBBBBBBBB": 70,
150
+ "AAABBB": 71,
151
+ "DDDCCC": 72,
152
+ "BBBBAAAA": 73,
153
+ "AAAAAAAAAAAAAAA": 74,
154
+ "BBBBCCCC": 75,
155
+ "DDDDDDDDDDDDDD": 76,
156
+ "BBBBDDDD": 77,
157
+ "BBBBCCC": 78,
158
+ "AAAACCCC": 79,
159
+ "AAAADDDD": 80,
160
+ "AAAACCC": 81,
161
+ "BBBCCCC": 82,
162
+ "CCCCCCCCCCCCCC": 83,
163
+ "BBBDDDD": 84,
164
+ "AAADDDD": 85,
165
+ "AAAADDD": 86,
166
+ "BBBBDDD": 87,
167
+ "AAACCCC": 88,
168
+ "DDDDDDDDDDDDDDD": 89,
169
+ "CCCCCCCCCCCCCCC": 90,
170
+ "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB": 91,
171
+ "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA": 92,
172
+ "BBBBAAAAA": 93,
173
+ "BBBAA": 94,
174
+ "AAAABBBBB": 95,
175
+ "CCCDDD": 96,
176
+ "BBBBAAA": 97,
177
+ "BBBCC": 98,
178
+ "BBBBBBBBBBBBBBBBB": 99,
179
+ "BBBBBAAAAA": 100,
180
+ "AAAAAAAAAAAAAAAAA": 101,
181
+ "BBBDD": 102,
182
+ "AAABB": 103,
183
+ "BBBBBCCCC": 104,
184
+ "DDDDCCCC": 105,
185
+ "BBBBDDDDD": 106,
186
+ "BBBAAAAA": 107,
187
+ "BBBBCCCCC": 108,
188
+ "BBBAAAA": 109,
189
+ "AAACC": 110,
190
+ "DDDDCCC": 111,
191
+ "AAADD": 112,
192
+ "BBBBBBBBBBBBBBBBBB": 113,
193
+ "AAAAACCCC": 114,
194
+ "AAAACCCCC": 115,
195
+ "AAAAAAAAAAAAAAAAAA": 116,
196
+ "BBCCC": 117,
197
+ "AAAADDDDD": 118,
198
+ "BBBBBDDDD": 119,
199
+ "BBBCCCCC": 120,
200
+ "BBBBBCCC": 121,
201
+ "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD": 122,
202
+ "BBBBBDDDDD": 123,
203
+ "DDDCCCC": 124,
204
+ "AAAABBBBBB": 125,
205
+ "BBBBAAAAAA": 126,
206
+ "CCCCCCCCCCCCCCCCC": 127,
207
+ "BBBDDDDD": 128,
208
+ "BBBBBCCCCC": 129,
209
+ "BBDDD": 130,
210
+ "DDDDDDDDDDDDDDDDD": 131,
211
+ "AADDD": 132,
212
+ "BBBBBDDD": 133,
213
+ "AAAAADDDD": 134,
214
+ "AAABBBBBB": 135,
215
+ "AACCC": 136,
216
+ "BBBAAAAAA": 137,
217
+ "AAAAACCCCC": 138,
218
+ "DDDDDCCCC": 139,
219
+ "BBBBBAAAAAA": 140,
220
+ "AAAAACCC": 141,
221
+ "AAACCCCC": 142,
222
+ "CCCCCCCCCCCCCCCCCC": 143,
223
+ "DDDDDDDDDDDDDDDDDD": 144,
224
+ "AAADDDDD": 145,
225
+ "AAAAADDDDD": 146,
226
+ "AAAADDDDDD": 147,
227
+ "BBBBDDDDDD": 148,
228
+ "BBBBBBCCCC": 149,
229
+ "DDCCC": 150,
230
+ "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC": 151,
231
+ "BBBBCCCCCC": 152,
232
+ "AAAAAACCCC": 153,
233
+ "BBBBBBDDDD": 154,
234
+ "AAAABBBBBBB": 155,
235
+ "BBBCCCCCC": 156,
236
+ "BBBBBBDDDDD": 157,
237
+ "BBBBBCCCCCC": 158,
238
+ "BBBDDDDDD": 159,
239
+ "AAAACCCCCC": 160,
240
+ "BBCC": 161,
241
+ "CCCCDDD": 162,
242
+ "AAAAADDD": 163,
243
+ "BBBBBDDDDDD": 164,
244
+ "BBBBBBCCC": 165,
245
+ "DDDDCCCCC": 166,
246
+ "BBBBBBCCCCC": 167,
247
+ "BBBBAAAAAAA": 168,
248
+ "AAACCCCCC": 169,
249
+ "AAABBBBBBB": 170,
250
+ "CCDDD": 171,
251
+ "BBBBBAAAA": 172,
252
+ "BBBBBBDDD": 173,
253
+ "AAAAAADDDDD": 174,
254
+ "CCCCDDDD": 175,
255
+ "AAAAAACCCCC": 176,
256
+ "AAAAADDDDDD": 177,
257
+ "DDDDDDDDDDDDDDDDDDD": 178,
258
+ "BBAA": 179,
259
+ "AAABBBB": 180,
260
+ "AAABBBBB": 181,
261
+ "BBBAAAAAAA": 182,
262
+ "AAAAAADDDD": 183,
263
+ "CCCCCCCCCCCCCCCCCCC": 184,
264
+ "AAAAACCCCCC": 185,
265
+ "AAAAABBBBBB": 186,
266
+ "AAADDDDDD": 187,
267
+ "BBBBBBBCCCC": 188,
268
+ "BBDD": 189,
269
+ "BBBBBBBCCC": 190,
270
+ "CCCCCDDDD": 191,
271
+ "AAAAAACCC": 192,
272
+ "BBBBBAAAAAAA": 193,
273
+ "BBBDDDDDDD": 194,
274
+ "DDDDDCCC": 195,
275
+ "DDDDDCCCCC": 196,
276
+ "AAAAAADDDDDD": 197,
277
+ "AACC": 198,
278
+ "BBBBBBBDDDD": 199,
279
+ "AAAAAADDD": 200,
280
+ "BBBBCCCCCCC": 201,
281
+ "AAAABBBBBBBB": 202,
282
+ "BBBCCCCCCC": 203,
283
+ "BBBBBDDDDDDD": 204,
284
+ "BBBBDDDDDDD": 205,
285
+ "BBBBBBCCCCCC": 206,
286
+ "AAAACCCCCCC": 207,
287
+ "AAABBBBBBBB": 208,
288
+ "BBCCCC": 209,
289
+ "AAAADDDDDDD": 210,
290
+ "BBBAAAAAAAA": 211,
291
+ "BBBBBBDDDDDD": 212,
292
+ "BBBBAAAAAAAA": 213,
293
+ "AAAAAACCCCCC": 214,
294
+ "BBBBBBAAAAA": 215,
295
+ "BBBBCC": 216,
296
+ "AAAAAAACCCC": 217,
297
+ "DDDDDDDDDDDDDDDDDDDD": 218,
298
+ "AAACCCCCCC": 219,
299
+ "BBBBBBBDDD": 220,
300
+ "BBBBBBBCCCCC": 221,
301
+ "AADD": 222
302
+ },
303
+ "merges": [
304
+ "B B",
305
+ "A A",
306
+ "D D",
307
+ "C C",
308
+ "BB BB",
309
+ "AA AA",
310
+ "DD DD",
311
+ "CC CC",
312
+ "BBBB BBBB",
313
+ "AAAA AAAA",
314
+ "DDDD DDDD",
315
+ "CCCC CCCC",
316
+ "BB B",
317
+ "AA A",
318
+ "CC C",
319
+ "DD D",
320
+ "BBBB B",
321
+ "AAAA A",
322
+ "DDDD D",
323
+ "CCCC C",
324
+ "BBBB BB",
325
+ "AAAA AA",
326
+ "DDDD DD",
327
+ "CCCC CC",
328
+ "BBBB BBB",
329
+ "BBBBBBBB BBBBBBBB",
330
+ "AAAA AAA",
331
+ "AAAAAAAA AAAAAAAA",
332
+ "DDDD DDD",
333
+ "CCCC CCC",
334
+ "DDDDDDDD DDDDDDDD",
335
+ "CCCCCCCC CCCCCCCC",
336
+ "BBBBBBBB B",
337
+ "AAAAAAAA A",
338
+ "DDDDDDDD D",
339
+ "BBBBBBBB BB",
340
+ "AAAAAAAA AA",
341
+ "CCCCCCCC C",
342
+ "BBBBBBBB BBB",
343
+ "AAAAAAAA AAA",
344
+ "DDDDDDDD DD",
345
+ "CCCCCCCC CC",
346
+ "BBBBBBBB BBBB",
347
+ "DDDDDDDD DDD",
348
+ "AAAAAAAA AAAA",
349
+ "CCCCCCCC CCC",
350
+ "BBB AAA",
351
+ "BBBBBBBB BBBBB",
352
+ "AAAAAAAA AAAAA",
353
+ "BBB CCC",
354
+ "DDDDDDDD DDDD",
355
+ "BBB DDD",
356
+ "CCCCCCCC CCCC",
357
+ "AAA CCC",
358
+ "AAA DDD",
359
+ "AAAAAAAA AAAAAA",
360
+ "AAAA BBBB",
361
+ "BBBBBBBB BBBBBB",
362
+ "DDDDDDDD DDDDD",
363
+ "CCCCCCCC CCCCC",
364
+ "BBBBBBBB BBBBBBB",
365
+ "AAA BBB",
366
+ "DDD CCC",
367
+ "BBBB AAAA",
368
+ "AAAAAAAA AAAAAAA",
369
+ "BBBB CCCC",
370
+ "DDDDDDDD DDDDDD",
371
+ "BBBB DDDD",
372
+ "BBBB CCC",
373
+ "AAAA CCCC",
374
+ "AAAA DDDD",
375
+ "AAAA CCC",
376
+ "BBB CCCC",
377
+ "CCCCCCCC CCCCCC",
378
+ "BBB DDDD",
379
+ "AAA DDDD",
380
+ "AAAA DDD",
381
+ "BBBB DDD",
382
+ "AAA CCCC",
383
+ "DDDDDDDD DDDDDDD",
384
+ "CCCCCCCC CCCCCCC",
385
+ "BBBBBBBBBBBBBBBB BBBBBBBBBBBBBBBB",
386
+ "AAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAA",
387
+ "BBBB AAAAA",
388
+ "BBB AA",
389
+ "AAAA BBBBB",
390
+ "CCC DDD",
391
+ "BBBB AAA",
392
+ "BBB CC",
393
+ "BBBBBBBBBBBBBBBB B",
394
+ "BBBBB AAAAA",
395
+ "AAAAAAAAAAAAAAAA A",
396
+ "BBB DD",
397
+ "AAA BB",
398
+ "BBBBB CCCC",
399
+ "DDDD CCCC",
400
+ "BBBB DDDDD",
401
+ "BBB AAAAA",
402
+ "BBBB CCCCC",
403
+ "BBB AAAA",
404
+ "AAA CC",
405
+ "DDDD CCC",
406
+ "AAA DD",
407
+ "BBBBBBBBBBBBBBBB BB",
408
+ "AAAAA CCCC",
409
+ "AAAA CCCCC",
410
+ "AAAAAAAAAAAAAAAA AA",
411
+ "BB CCC",
412
+ "AAAA DDDDD",
413
+ "BBBBB DDDD",
414
+ "BBB CCCCC",
415
+ "BBBBB CCC",
416
+ "DDDDDDDDDDDDDDDD DDDDDDDDDDDDDDDD",
417
+ "BBBBB DDDDD",
418
+ "DDD CCCC",
419
+ "AAAA BBBBBB",
420
+ "BBBB AAAAAA",
421
+ "CCCCCCCCCCCCCCCC C",
422
+ "BBB DDDDD",
423
+ "BBBBB CCCCC",
424
+ "BB DDD",
425
+ "DDDDDDDDDDDDDDDD D",
426
+ "AA DDD",
427
+ "BBBBB DDD",
428
+ "AAAAA DDDD",
429
+ "AAA BBBBBB",
430
+ "AA CCC",
431
+ "BBB AAAAAA",
432
+ "AAAAA CCCCC",
433
+ "DDDDD CCCC",
434
+ "BBBBB AAAAAA",
435
+ "AAAAA CCC",
436
+ "AAA CCCCC",
437
+ "CCCCCCCCCCCCCCCC CC",
438
+ "DDDDDDDDDDDDDDDD DD",
439
+ "AAA DDDDD",
440
+ "AAAAA DDDDD",
441
+ "AAAA DDDDDD",
442
+ "BBBB DDDDDD",
443
+ "BBBBBB CCCC",
444
+ "DD CCC",
445
+ "CCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCC",
446
+ "BBBB CCCCCC",
447
+ "AAAAAA CCCC",
448
+ "BBBBBB DDDD",
449
+ "AAAA BBBBBBB",
450
+ "BBB CCCCCC",
451
+ "BBBBBB DDDDD",
452
+ "BBBBB CCCCCC",
453
+ "BBB DDDDDD",
454
+ "AAAA CCCCCC",
455
+ "BB CC",
456
+ "CCCC DDD",
457
+ "AAAAA DDD",
458
+ "BBBBB DDDDDD",
459
+ "BBBBBB CCC",
460
+ "DDDD CCCCC",
461
+ "BBBBBB CCCCC",
462
+ "BBBB AAAAAAA",
463
+ "AAA CCCCCC",
464
+ "AAA BBBBBBB",
465
+ "CC DDD",
466
+ "BBBBB AAAA",
467
+ "BBBBBB DDD",
468
+ "AAAAAA DDDDD",
469
+ "CCCC DDDD",
470
+ "AAAAAA CCCCC",
471
+ "AAAAA DDDDDD",
472
+ "DDDDDDDDDDDDDDDD DDD",
473
+ "BB AA",
474
+ "AAA BBBB",
475
+ "AAA BBBBB",
476
+ "BBB AAAAAAA",
477
+ "AAAAAA DDDD",
478
+ "CCCCCCCCCCCCCCCC CCC",
479
+ "AAAAA CCCCCC",
480
+ "AAAAA BBBBBB",
481
+ "AAA DDDDDD",
482
+ "BBBBBBB CCCC",
483
+ "BB DD",
484
+ "BBBBBBB CCC",
485
+ "CCCCC DDDD",
486
+ "AAAAAA CCC",
487
+ "BBBBB AAAAAAA",
488
+ "BBB DDDDDDD",
489
+ "DDDDD CCC",
490
+ "DDDDD CCCCC",
491
+ "AAAAAA DDDDDD",
492
+ "AA CC",
493
+ "BBBBBBB DDDD",
494
+ "AAAAAA DDD",
495
+ "BBBB CCCCCCC",
496
+ "AAAA BBBBBBBB",
497
+ "BBB CCCCCCC",
498
+ "BBBBB DDDDDDD",
499
+ "BBBB DDDDDDD",
500
+ "BBBBBB CCCCCC",
501
+ "AAAA CCCCCCC",
502
+ "AAA BBBBBBBB",
503
+ "BB CCCC",
504
+ "AAAA DDDDDDD",
505
+ "BBB AAAAAAAA",
506
+ "BBBBBB DDDDDD",
507
+ "BBBB AAAAAAAA",
508
+ "AAAAAA CCCCCC",
509
+ "BBBBBB AAAAA",
510
+ "BBBB CC",
511
+ "AAAAAAA CCCC",
512
+ "DDDDDDDDDDDDDDDD DDDD",
513
+ "AAA CCCCCCC",
514
+ "BBBBBBB DDD",
515
+ "BBBBBBB CCCCC",
516
+ "AA DD"
517
+ ]
518
+ }
519
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "model_max_length": 1000000000000000019884624838656,
4
+ "tokenizer_class": "PreTrainedTokenizerFast"
5
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:553005660a19b817a9fbed1ae41fc7c480fcc4cb32bf9251221698de1e539581
3
+ size 3899