Auditt commited on
Commit
ccd2740
Β·
verified Β·
1 Parent(s): 65c4664

Training in progress, epoch 1

Browse files
added_tokens.json CHANGED
@@ -1,41 +1,40 @@
1
  {
2
- "\n": 100285,
3
- "Ξ΅": 100311,
4
- "Ο‡": 100284,
5
- "β†’": 100297,
6
- "↦": 100278,
7
- "∧": 100316,
8
- "β‰ ": 100314,
9
- "≀": 100291,
10
- "β‰₯": 100289,
11
- "β‹€": 100312,
12
- "⋁": 100302,
13
- "〈": 100315,
14
- "βŒͺ": 100303,
15
- "βŸ‚": 100305,
16
- "𐔰": 100304,
17
- "𐔱": 100292,
18
- "𐔲": 100301,
19
- "𐔳": 100282,
20
- "𐔷": 100306,
21
- "𐔸": 100309,
22
- "𐔻": 100308,
23
- "𐔾": 100293,
24
- "𐕀": 100313,
25
- "𐕂": 100286,
26
- "𐕃": 100283,
27
- "π•Š": 100287,
28
- "π•Ž": 100299,
29
- "𐕏": 100280,
30
- "𐕐": 100279,
31
- "𐕕": 100290,
32
- "𐕖": 100307,
33
- "𐕙": 100294,
34
- "𐕛": 100295,
35
- "π•œ": 100296,
36
- "π•ž": 100281,
37
  "π•Ÿ": 100288,
38
- "𐕠": 100300,
39
- "𐕣": 100298,
40
- "𝜎": 100310
41
  }
 
1
  {
2
+ "Ξ΅": 100315,
3
+ "Ο‡": 100297,
4
+ "β†’": 100312,
5
+ "↦": 100310,
6
+ "∧": 100286,
7
+ "β‰ ": 100304,
8
+ "≀": 100306,
9
+ "β‰₯": 100287,
10
+ "β‹€": 100292,
11
+ "⋁": 100282,
12
+ "βŸ‚": 100290,
13
+ "γ€ˆ": 100299,
14
+ "〉": 100280,
15
+ "𐔰": 100314,
16
+ "𐔱": 100308,
17
+ "𐔲": 100278,
18
+ "𐔳": 100303,
19
+ "𐔷": 100305,
20
+ "𐔸": 100285,
21
+ "𐔻": 100301,
22
+ "𐔾": 100279,
23
+ "𐕀": 100302,
24
+ "𐕂": 100309,
25
+ "𐕃": 100294,
26
+ "π•Š": 100298,
27
+ "π•Ž": 100311,
28
+ "𐕏": 100291,
29
+ "𐕐": 100300,
30
+ "𐕕": 100296,
31
+ "𐕖": 100289,
32
+ "𐕙": 100295,
33
+ "𐕛": 100313,
34
+ "π•œ": 100284,
35
+ "π•ž": 100307,
 
36
  "π•Ÿ": 100288,
37
+ "𐕠": 100283,
38
+ "𐕣": 100293,
39
+ "𝜎": 100281
40
  }
config.json CHANGED
@@ -4,6 +4,7 @@
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
 
7
  "dtype": "bfloat16",
8
  "eos_token_id": 100257,
9
  "hidden_act": "silu",
@@ -64,5 +65,5 @@
64
  "tie_word_embeddings": false,
65
  "transformers_version": "4.57.0",
66
  "use_cache": false,
67
- "vocab_size": 100317
68
  }
 
4
  ],
5
  "attention_bias": false,
6
  "attention_dropout": 0.0,
7
+ "bos_token_id": 100257,
8
  "dtype": "bfloat16",
9
  "eos_token_id": 100257,
10
  "hidden_act": "silu",
 
65
  "tie_word_embeddings": false,
66
  "transformers_version": "4.57.0",
67
  "use_cache": false,
68
+ "vocab_size": 100316
69
  }
debug.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ [2026-01-05 11:40:24,307] [WARNING] [py.warnings._showwarnmsg:110] [PID:8210] /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
2
+ warnings.warn( # warn only once
3
+
4
+
5
+ [2026-01-05 11:40:56,610] [WARNING] [py.warnings._showwarnmsg:110] [PID:8210] /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
generation_config.json CHANGED
@@ -1,11 +1,13 @@
1
  {
2
  "_from_model_config": true,
 
3
  "do_sample": true,
4
  "eos_token_id": [
5
  100265,
6
  100257
7
  ],
8
  "max_new_tokens": 32768,
 
9
  "temperature": 0.6,
10
  "top_p": 0.95,
11
  "transformers_version": "4.57.0"
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 100257,
4
  "do_sample": true,
5
  "eos_token_id": [
6
  100265,
7
  100257
8
  ],
9
  "max_new_tokens": 32768,
10
+ "pad_token_id": 100277,
11
  "temperature": 0.6,
12
  "top_p": 0.95,
13
  "transformers_version": "4.57.0"
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a58504ada504fff3b0880f66c624deae1e7e43693b91dbba38e50951fdd0e2c5
3
- size 4970304464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59b145176a34b4b8a81585973f476bd62f9b37bddd6e170fa3474e5d6374fc1e
3
+ size 4970296272
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78b41126bc3294445ff606af8271689c61079f273388231a106b05aa48982559
3
  size 4981161496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5525b8a74d9dbf1e7ce3430d176cbd56a6287c36685f07d5c70da58b001f46ac
3
  size 4981161496
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:646fd532e8cfb60a18ee799f2d42c86824882736c5c1676236500a82de244368
3
- size 4645236728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b670ca3d925e24851ce7d797ccd995cfdfb7489dc0f1ac735c40eb6b68e333
3
+ size 4645228536
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
- "total_parameters": 7298330624,
4
- "total_size": 14596661248
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_parameters": 7298322432,
4
+ "total_size": 14596644864
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00003-of-00003.safetensors",
tokenizer.json CHANGED
@@ -203,92 +203,92 @@
203
  },
204
  {
205
  "id": 100278,
206
- "content": "↦",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
- "normalized": true,
211
  "special": false
212
  },
213
  {
214
  "id": 100279,
215
- "content": "𐕐",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
- "normalized": true,
220
  "special": false
221
  },
222
  {
223
  "id": 100280,
224
- "content": "𐕏",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
- "normalized": true,
229
  "special": false
230
  },
231
  {
232
  "id": 100281,
233
- "content": "π•ž",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
- "normalized": true,
238
  "special": false
239
  },
240
  {
241
  "id": 100282,
242
- "content": "𐔳",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
- "normalized": true,
247
  "special": false
248
  },
249
  {
250
  "id": 100283,
251
- "content": "𐕃",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
- "normalized": true,
256
  "special": false
257
  },
258
  {
259
  "id": 100284,
260
- "content": "Ο‡",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
- "normalized": true,
265
  "special": false
266
  },
267
  {
268
  "id": 100285,
269
- "content": "\n",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
- "normalized": true,
274
  "special": false
275
  },
276
  {
277
  "id": 100286,
278
- "content": "𐕂",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
- "normalized": true,
283
  "special": false
284
  },
285
  {
286
  "id": 100287,
287
- "content": "π•Š",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
- "normalized": true,
292
  "special": false
293
  },
294
  {
@@ -297,259 +297,250 @@
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
- "normalized": true,
301
  "special": false
302
  },
303
  {
304
  "id": 100289,
305
- "content": "β‰₯",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
- "normalized": true,
310
  "special": false
311
  },
312
  {
313
  "id": 100290,
314
- "content": "𐕕",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
- "normalized": true,
319
  "special": false
320
  },
321
  {
322
  "id": 100291,
323
- "content": "≀",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
- "normalized": true,
328
  "special": false
329
  },
330
  {
331
  "id": 100292,
332
- "content": "𐔱",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
- "normalized": true,
337
  "special": false
338
  },
339
  {
340
  "id": 100293,
341
- "content": "𐔾",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
- "normalized": true,
346
  "special": false
347
  },
348
  {
349
  "id": 100294,
350
- "content": "𐕙",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
- "normalized": true,
355
  "special": false
356
  },
357
  {
358
  "id": 100295,
359
- "content": "𐕛",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
- "normalized": true,
364
  "special": false
365
  },
366
  {
367
  "id": 100296,
368
- "content": "π•œ",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
- "normalized": true,
373
  "special": false
374
  },
375
  {
376
  "id": 100297,
377
- "content": "β†’",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
- "normalized": true,
382
  "special": false
383
  },
384
  {
385
  "id": 100298,
386
- "content": "𐕣",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
- "normalized": true,
391
  "special": false
392
  },
393
  {
394
  "id": 100299,
395
- "content": "π•Ž",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
- "normalized": true,
400
  "special": false
401
  },
402
  {
403
  "id": 100300,
404
- "content": "𐕠",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
- "normalized": true,
409
  "special": false
410
  },
411
  {
412
  "id": 100301,
413
- "content": "𐔲",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
- "normalized": true,
418
  "special": false
419
  },
420
  {
421
  "id": 100302,
422
- "content": "⋁",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
- "normalized": true,
427
  "special": false
428
  },
429
  {
430
  "id": 100303,
431
- "content": "βŒͺ",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
- "normalized": true,
436
  "special": false
437
  },
438
  {
439
  "id": 100304,
440
- "content": "𐔰",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
- "normalized": true,
445
  "special": false
446
  },
447
  {
448
  "id": 100305,
449
- "content": "βŸ‚",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
- "normalized": true,
454
  "special": false
455
  },
456
  {
457
  "id": 100306,
458
- "content": "𐔷",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
- "normalized": true,
463
  "special": false
464
  },
465
  {
466
  "id": 100307,
467
- "content": "𐕖",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
- "normalized": true,
472
  "special": false
473
  },
474
  {
475
  "id": 100308,
476
- "content": "𐔻",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
- "normalized": true,
481
  "special": false
482
  },
483
  {
484
  "id": 100309,
485
- "content": "𐔸",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
- "normalized": true,
490
  "special": false
491
  },
492
  {
493
  "id": 100310,
494
- "content": "𝜎",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
- "normalized": true,
499
  "special": false
500
  },
501
  {
502
  "id": 100311,
503
- "content": "Ξ΅",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
- "normalized": true,
508
  "special": false
509
  },
510
  {
511
  "id": 100312,
512
- "content": "β‹€",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
- "normalized": true,
517
  "special": false
518
  },
519
  {
520
  "id": 100313,
521
- "content": "𐕀",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
- "normalized": true,
526
  "special": false
527
  },
528
  {
529
  "id": 100314,
530
- "content": "β‰ ",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
- "normalized": true,
535
  "special": false
536
  },
537
  {
538
  "id": 100315,
539
- "content": "〈",
540
- "single_word": false,
541
- "lstrip": false,
542
- "rstrip": false,
543
- "normalized": true,
544
- "special": false
545
- },
546
- {
547
- "id": 100316,
548
- "content": "∧",
549
  "single_word": false,
550
  "lstrip": false,
551
  "rstrip": false,
552
- "normalized": true,
553
  "special": false
554
  }
555
  ],
 
203
  },
204
  {
205
  "id": 100278,
206
+ "content": "𐔲",
207
  "single_word": false,
208
  "lstrip": false,
209
  "rstrip": false,
210
+ "normalized": false,
211
  "special": false
212
  },
213
  {
214
  "id": 100279,
215
+ "content": "𐔾",
216
  "single_word": false,
217
  "lstrip": false,
218
  "rstrip": false,
219
+ "normalized": false,
220
  "special": false
221
  },
222
  {
223
  "id": 100280,
224
+ "content": "〉",
225
  "single_word": false,
226
  "lstrip": false,
227
  "rstrip": false,
228
+ "normalized": false,
229
  "special": false
230
  },
231
  {
232
  "id": 100281,
233
+ "content": "𝜎",
234
  "single_word": false,
235
  "lstrip": false,
236
  "rstrip": false,
237
+ "normalized": false,
238
  "special": false
239
  },
240
  {
241
  "id": 100282,
242
+ "content": "⋁",
243
  "single_word": false,
244
  "lstrip": false,
245
  "rstrip": false,
246
+ "normalized": false,
247
  "special": false
248
  },
249
  {
250
  "id": 100283,
251
+ "content": "𐕠",
252
  "single_word": false,
253
  "lstrip": false,
254
  "rstrip": false,
255
+ "normalized": false,
256
  "special": false
257
  },
258
  {
259
  "id": 100284,
260
+ "content": "π•œ",
261
  "single_word": false,
262
  "lstrip": false,
263
  "rstrip": false,
264
+ "normalized": false,
265
  "special": false
266
  },
267
  {
268
  "id": 100285,
269
+ "content": "𐔸",
270
  "single_word": false,
271
  "lstrip": false,
272
  "rstrip": false,
273
+ "normalized": false,
274
  "special": false
275
  },
276
  {
277
  "id": 100286,
278
+ "content": "∧",
279
  "single_word": false,
280
  "lstrip": false,
281
  "rstrip": false,
282
+ "normalized": false,
283
  "special": false
284
  },
285
  {
286
  "id": 100287,
287
+ "content": "β‰₯",
288
  "single_word": false,
289
  "lstrip": false,
290
  "rstrip": false,
291
+ "normalized": false,
292
  "special": false
293
  },
294
  {
 
297
  "single_word": false,
298
  "lstrip": false,
299
  "rstrip": false,
300
+ "normalized": false,
301
  "special": false
302
  },
303
  {
304
  "id": 100289,
305
+ "content": "𐕖",
306
  "single_word": false,
307
  "lstrip": false,
308
  "rstrip": false,
309
+ "normalized": false,
310
  "special": false
311
  },
312
  {
313
  "id": 100290,
314
+ "content": "βŸ‚",
315
  "single_word": false,
316
  "lstrip": false,
317
  "rstrip": false,
318
+ "normalized": false,
319
  "special": false
320
  },
321
  {
322
  "id": 100291,
323
+ "content": "𐕏",
324
  "single_word": false,
325
  "lstrip": false,
326
  "rstrip": false,
327
+ "normalized": false,
328
  "special": false
329
  },
330
  {
331
  "id": 100292,
332
+ "content": "β‹€",
333
  "single_word": false,
334
  "lstrip": false,
335
  "rstrip": false,
336
+ "normalized": false,
337
  "special": false
338
  },
339
  {
340
  "id": 100293,
341
+ "content": "𐕣",
342
  "single_word": false,
343
  "lstrip": false,
344
  "rstrip": false,
345
+ "normalized": false,
346
  "special": false
347
  },
348
  {
349
  "id": 100294,
350
+ "content": "𐕃",
351
  "single_word": false,
352
  "lstrip": false,
353
  "rstrip": false,
354
+ "normalized": false,
355
  "special": false
356
  },
357
  {
358
  "id": 100295,
359
+ "content": "𐕙",
360
  "single_word": false,
361
  "lstrip": false,
362
  "rstrip": false,
363
+ "normalized": false,
364
  "special": false
365
  },
366
  {
367
  "id": 100296,
368
+ "content": "𐕕",
369
  "single_word": false,
370
  "lstrip": false,
371
  "rstrip": false,
372
+ "normalized": false,
373
  "special": false
374
  },
375
  {
376
  "id": 100297,
377
+ "content": "Ο‡",
378
  "single_word": false,
379
  "lstrip": false,
380
  "rstrip": false,
381
+ "normalized": false,
382
  "special": false
383
  },
384
  {
385
  "id": 100298,
386
+ "content": "π•Š",
387
  "single_word": false,
388
  "lstrip": false,
389
  "rstrip": false,
390
+ "normalized": false,
391
  "special": false
392
  },
393
  {
394
  "id": 100299,
395
+ "content": "γ€ˆ",
396
  "single_word": false,
397
  "lstrip": false,
398
  "rstrip": false,
399
+ "normalized": false,
400
  "special": false
401
  },
402
  {
403
  "id": 100300,
404
+ "content": "𐕐",
405
  "single_word": false,
406
  "lstrip": false,
407
  "rstrip": false,
408
+ "normalized": false,
409
  "special": false
410
  },
411
  {
412
  "id": 100301,
413
+ "content": "𐔻",
414
  "single_word": false,
415
  "lstrip": false,
416
  "rstrip": false,
417
+ "normalized": false,
418
  "special": false
419
  },
420
  {
421
  "id": 100302,
422
+ "content": "𐕀",
423
  "single_word": false,
424
  "lstrip": false,
425
  "rstrip": false,
426
+ "normalized": false,
427
  "special": false
428
  },
429
  {
430
  "id": 100303,
431
+ "content": "𐔳",
432
  "single_word": false,
433
  "lstrip": false,
434
  "rstrip": false,
435
+ "normalized": false,
436
  "special": false
437
  },
438
  {
439
  "id": 100304,
440
+ "content": "β‰ ",
441
  "single_word": false,
442
  "lstrip": false,
443
  "rstrip": false,
444
+ "normalized": false,
445
  "special": false
446
  },
447
  {
448
  "id": 100305,
449
+ "content": "𐔷",
450
  "single_word": false,
451
  "lstrip": false,
452
  "rstrip": false,
453
+ "normalized": false,
454
  "special": false
455
  },
456
  {
457
  "id": 100306,
458
+ "content": "≀",
459
  "single_word": false,
460
  "lstrip": false,
461
  "rstrip": false,
462
+ "normalized": false,
463
  "special": false
464
  },
465
  {
466
  "id": 100307,
467
+ "content": "π•ž",
468
  "single_word": false,
469
  "lstrip": false,
470
  "rstrip": false,
471
+ "normalized": false,
472
  "special": false
473
  },
474
  {
475
  "id": 100308,
476
+ "content": "𐔱",
477
  "single_word": false,
478
  "lstrip": false,
479
  "rstrip": false,
480
+ "normalized": false,
481
  "special": false
482
  },
483
  {
484
  "id": 100309,
485
+ "content": "𐕂",
486
  "single_word": false,
487
  "lstrip": false,
488
  "rstrip": false,
489
+ "normalized": false,
490
  "special": false
491
  },
492
  {
493
  "id": 100310,
494
+ "content": "↦",
495
  "single_word": false,
496
  "lstrip": false,
497
  "rstrip": false,
498
+ "normalized": false,
499
  "special": false
500
  },
501
  {
502
  "id": 100311,
503
+ "content": "π•Ž",
504
  "single_word": false,
505
  "lstrip": false,
506
  "rstrip": false,
507
+ "normalized": false,
508
  "special": false
509
  },
510
  {
511
  "id": 100312,
512
+ "content": "β†’",
513
  "single_word": false,
514
  "lstrip": false,
515
  "rstrip": false,
516
+ "normalized": false,
517
  "special": false
518
  },
519
  {
520
  "id": 100313,
521
+ "content": "𐕛",
522
  "single_word": false,
523
  "lstrip": false,
524
  "rstrip": false,
525
+ "normalized": false,
526
  "special": false
527
  },
528
  {
529
  "id": 100314,
530
+ "content": "𐔰",
531
  "single_word": false,
532
  "lstrip": false,
533
  "rstrip": false,
534
+ "normalized": false,
535
  "special": false
536
  },
537
  {
538
  "id": 100315,
539
+ "content": "Ξ΅",
 
 
 
 
 
 
 
 
 
540
  "single_word": false,
541
  "lstrip": false,
542
  "rstrip": false,
543
+ "normalized": false,
544
  "special": false
545
  }
546
  ],
tokenizer_config.json CHANGED
@@ -178,81 +178,81 @@
178
  "special": true
179
  },
180
  "100278": {
181
- "content": "↦",
182
  "lstrip": false,
183
- "normalized": true,
184
  "rstrip": false,
185
  "single_word": false,
186
  "special": false
187
  },
188
  "100279": {
189
- "content": "𐕐",
190
  "lstrip": false,
191
- "normalized": true,
192
  "rstrip": false,
193
  "single_word": false,
194
  "special": false
195
  },
196
  "100280": {
197
- "content": "𐕏",
198
  "lstrip": false,
199
- "normalized": true,
200
  "rstrip": false,
201
  "single_word": false,
202
  "special": false
203
  },
204
  "100281": {
205
- "content": "π•ž",
206
  "lstrip": false,
207
- "normalized": true,
208
  "rstrip": false,
209
  "single_word": false,
210
  "special": false
211
  },
212
  "100282": {
213
- "content": "𐔳",
214
  "lstrip": false,
215
- "normalized": true,
216
  "rstrip": false,
217
  "single_word": false,
218
  "special": false
219
  },
220
  "100283": {
221
- "content": "𐕃",
222
  "lstrip": false,
223
- "normalized": true,
224
  "rstrip": false,
225
  "single_word": false,
226
  "special": false
227
  },
228
  "100284": {
229
- "content": "Ο‡",
230
  "lstrip": false,
231
- "normalized": true,
232
  "rstrip": false,
233
  "single_word": false,
234
  "special": false
235
  },
236
  "100285": {
237
- "content": "\n",
238
  "lstrip": false,
239
- "normalized": true,
240
  "rstrip": false,
241
  "single_word": false,
242
  "special": false
243
  },
244
  "100286": {
245
- "content": "𐕂",
246
  "lstrip": false,
247
- "normalized": true,
248
  "rstrip": false,
249
  "single_word": false,
250
  "special": false
251
  },
252
  "100287": {
253
- "content": "π•Š",
254
  "lstrip": false,
255
- "normalized": true,
256
  "rstrip": false,
257
  "single_word": false,
258
  "special": false
@@ -260,231 +260,223 @@
260
  "100288": {
261
  "content": "π•Ÿ",
262
  "lstrip": false,
263
- "normalized": true,
264
  "rstrip": false,
265
  "single_word": false,
266
  "special": false
267
  },
268
  "100289": {
269
- "content": "β‰₯",
270
  "lstrip": false,
271
- "normalized": true,
272
  "rstrip": false,
273
  "single_word": false,
274
  "special": false
275
  },
276
  "100290": {
277
- "content": "𐕕",
278
  "lstrip": false,
279
- "normalized": true,
280
  "rstrip": false,
281
  "single_word": false,
282
  "special": false
283
  },
284
  "100291": {
285
- "content": "≀",
286
  "lstrip": false,
287
- "normalized": true,
288
  "rstrip": false,
289
  "single_word": false,
290
  "special": false
291
  },
292
  "100292": {
293
- "content": "𐔱",
294
  "lstrip": false,
295
- "normalized": true,
296
  "rstrip": false,
297
  "single_word": false,
298
  "special": false
299
  },
300
  "100293": {
301
- "content": "𐔾",
302
  "lstrip": false,
303
- "normalized": true,
304
  "rstrip": false,
305
  "single_word": false,
306
  "special": false
307
  },
308
  "100294": {
309
- "content": "𐕙",
310
  "lstrip": false,
311
- "normalized": true,
312
  "rstrip": false,
313
  "single_word": false,
314
  "special": false
315
  },
316
  "100295": {
317
- "content": "𐕛",
318
  "lstrip": false,
319
- "normalized": true,
320
  "rstrip": false,
321
  "single_word": false,
322
  "special": false
323
  },
324
  "100296": {
325
- "content": "π•œ",
326
  "lstrip": false,
327
- "normalized": true,
328
  "rstrip": false,
329
  "single_word": false,
330
  "special": false
331
  },
332
  "100297": {
333
- "content": "β†’",
334
  "lstrip": false,
335
- "normalized": true,
336
  "rstrip": false,
337
  "single_word": false,
338
  "special": false
339
  },
340
  "100298": {
341
- "content": "𐕣",
342
  "lstrip": false,
343
- "normalized": true,
344
  "rstrip": false,
345
  "single_word": false,
346
  "special": false
347
  },
348
  "100299": {
349
- "content": "π•Ž",
350
  "lstrip": false,
351
- "normalized": true,
352
  "rstrip": false,
353
  "single_word": false,
354
  "special": false
355
  },
356
  "100300": {
357
- "content": "𐕠",
358
  "lstrip": false,
359
- "normalized": true,
360
  "rstrip": false,
361
  "single_word": false,
362
  "special": false
363
  },
364
  "100301": {
365
- "content": "𐔲",
366
  "lstrip": false,
367
- "normalized": true,
368
  "rstrip": false,
369
  "single_word": false,
370
  "special": false
371
  },
372
  "100302": {
373
- "content": "⋁",
374
  "lstrip": false,
375
- "normalized": true,
376
  "rstrip": false,
377
  "single_word": false,
378
  "special": false
379
  },
380
  "100303": {
381
- "content": "βŒͺ",
382
  "lstrip": false,
383
- "normalized": true,
384
  "rstrip": false,
385
  "single_word": false,
386
  "special": false
387
  },
388
  "100304": {
389
- "content": "𐔰",
390
  "lstrip": false,
391
- "normalized": true,
392
  "rstrip": false,
393
  "single_word": false,
394
  "special": false
395
  },
396
  "100305": {
397
- "content": "βŸ‚",
398
  "lstrip": false,
399
- "normalized": true,
400
  "rstrip": false,
401
  "single_word": false,
402
  "special": false
403
  },
404
  "100306": {
405
- "content": "𐔷",
406
  "lstrip": false,
407
- "normalized": true,
408
  "rstrip": false,
409
  "single_word": false,
410
  "special": false
411
  },
412
  "100307": {
413
- "content": "𐕖",
414
  "lstrip": false,
415
- "normalized": true,
416
  "rstrip": false,
417
  "single_word": false,
418
  "special": false
419
  },
420
  "100308": {
421
- "content": "𐔻",
422
  "lstrip": false,
423
- "normalized": true,
424
  "rstrip": false,
425
  "single_word": false,
426
  "special": false
427
  },
428
  "100309": {
429
- "content": "𐔸",
430
  "lstrip": false,
431
- "normalized": true,
432
  "rstrip": false,
433
  "single_word": false,
434
  "special": false
435
  },
436
  "100310": {
437
- "content": "𝜎",
438
  "lstrip": false,
439
- "normalized": true,
440
  "rstrip": false,
441
  "single_word": false,
442
  "special": false
443
  },
444
  "100311": {
445
- "content": "Ξ΅",
446
  "lstrip": false,
447
- "normalized": true,
448
  "rstrip": false,
449
  "single_word": false,
450
  "special": false
451
  },
452
  "100312": {
453
- "content": "β‹€",
454
  "lstrip": false,
455
- "normalized": true,
456
  "rstrip": false,
457
  "single_word": false,
458
  "special": false
459
  },
460
  "100313": {
461
- "content": "𐕀",
462
  "lstrip": false,
463
- "normalized": true,
464
  "rstrip": false,
465
  "single_word": false,
466
  "special": false
467
  },
468
  "100314": {
469
- "content": "β‰ ",
470
  "lstrip": false,
471
- "normalized": true,
472
  "rstrip": false,
473
  "single_word": false,
474
  "special": false
475
  },
476
  "100315": {
477
- "content": "〈",
478
- "lstrip": false,
479
- "normalized": true,
480
- "rstrip": false,
481
- "single_word": false,
482
- "special": false
483
- },
484
- "100316": {
485
- "content": "∧",
486
  "lstrip": false,
487
- "normalized": true,
488
  "rstrip": false,
489
  "single_word": false,
490
  "special": false
 
178
  "special": true
179
  },
180
  "100278": {
181
+ "content": "𐔲",
182
  "lstrip": false,
183
+ "normalized": false,
184
  "rstrip": false,
185
  "single_word": false,
186
  "special": false
187
  },
188
  "100279": {
189
+ "content": "𐔾",
190
  "lstrip": false,
191
+ "normalized": false,
192
  "rstrip": false,
193
  "single_word": false,
194
  "special": false
195
  },
196
  "100280": {
197
+ "content": "〉",
198
  "lstrip": false,
199
+ "normalized": false,
200
  "rstrip": false,
201
  "single_word": false,
202
  "special": false
203
  },
204
  "100281": {
205
+ "content": "𝜎",
206
  "lstrip": false,
207
+ "normalized": false,
208
  "rstrip": false,
209
  "single_word": false,
210
  "special": false
211
  },
212
  "100282": {
213
+ "content": "⋁",
214
  "lstrip": false,
215
+ "normalized": false,
216
  "rstrip": false,
217
  "single_word": false,
218
  "special": false
219
  },
220
  "100283": {
221
+ "content": "𐕠",
222
  "lstrip": false,
223
+ "normalized": false,
224
  "rstrip": false,
225
  "single_word": false,
226
  "special": false
227
  },
228
  "100284": {
229
+ "content": "π•œ",
230
  "lstrip": false,
231
+ "normalized": false,
232
  "rstrip": false,
233
  "single_word": false,
234
  "special": false
235
  },
236
  "100285": {
237
+ "content": "𐔸",
238
  "lstrip": false,
239
+ "normalized": false,
240
  "rstrip": false,
241
  "single_word": false,
242
  "special": false
243
  },
244
  "100286": {
245
+ "content": "∧",
246
  "lstrip": false,
247
+ "normalized": false,
248
  "rstrip": false,
249
  "single_word": false,
250
  "special": false
251
  },
252
  "100287": {
253
+ "content": "β‰₯",
254
  "lstrip": false,
255
+ "normalized": false,
256
  "rstrip": false,
257
  "single_word": false,
258
  "special": false
 
260
  "100288": {
261
  "content": "π•Ÿ",
262
  "lstrip": false,
263
+ "normalized": false,
264
  "rstrip": false,
265
  "single_word": false,
266
  "special": false
267
  },
268
  "100289": {
269
+ "content": "𐕖",
270
  "lstrip": false,
271
+ "normalized": false,
272
  "rstrip": false,
273
  "single_word": false,
274
  "special": false
275
  },
276
  "100290": {
277
+ "content": "βŸ‚",
278
  "lstrip": false,
279
+ "normalized": false,
280
  "rstrip": false,
281
  "single_word": false,
282
  "special": false
283
  },
284
  "100291": {
285
+ "content": "𐕏",
286
  "lstrip": false,
287
+ "normalized": false,
288
  "rstrip": false,
289
  "single_word": false,
290
  "special": false
291
  },
292
  "100292": {
293
+ "content": "β‹€",
294
  "lstrip": false,
295
+ "normalized": false,
296
  "rstrip": false,
297
  "single_word": false,
298
  "special": false
299
  },
300
  "100293": {
301
+ "content": "𐕣",
302
  "lstrip": false,
303
+ "normalized": false,
304
  "rstrip": false,
305
  "single_word": false,
306
  "special": false
307
  },
308
  "100294": {
309
+ "content": "𐕃",
310
  "lstrip": false,
311
+ "normalized": false,
312
  "rstrip": false,
313
  "single_word": false,
314
  "special": false
315
  },
316
  "100295": {
317
+ "content": "𐕙",
318
  "lstrip": false,
319
+ "normalized": false,
320
  "rstrip": false,
321
  "single_word": false,
322
  "special": false
323
  },
324
  "100296": {
325
+ "content": "𐕕",
326
  "lstrip": false,
327
+ "normalized": false,
328
  "rstrip": false,
329
  "single_word": false,
330
  "special": false
331
  },
332
  "100297": {
333
+ "content": "Ο‡",
334
  "lstrip": false,
335
+ "normalized": false,
336
  "rstrip": false,
337
  "single_word": false,
338
  "special": false
339
  },
340
  "100298": {
341
+ "content": "π•Š",
342
  "lstrip": false,
343
+ "normalized": false,
344
  "rstrip": false,
345
  "single_word": false,
346
  "special": false
347
  },
348
  "100299": {
349
+ "content": "γ€ˆ",
350
  "lstrip": false,
351
+ "normalized": false,
352
  "rstrip": false,
353
  "single_word": false,
354
  "special": false
355
  },
356
  "100300": {
357
+ "content": "𐕐",
358
  "lstrip": false,
359
+ "normalized": false,
360
  "rstrip": false,
361
  "single_word": false,
362
  "special": false
363
  },
364
  "100301": {
365
+ "content": "𐔻",
366
  "lstrip": false,
367
+ "normalized": false,
368
  "rstrip": false,
369
  "single_word": false,
370
  "special": false
371
  },
372
  "100302": {
373
+ "content": "𐕀",
374
  "lstrip": false,
375
+ "normalized": false,
376
  "rstrip": false,
377
  "single_word": false,
378
  "special": false
379
  },
380
  "100303": {
381
+ "content": "𐔳",
382
  "lstrip": false,
383
+ "normalized": false,
384
  "rstrip": false,
385
  "single_word": false,
386
  "special": false
387
  },
388
  "100304": {
389
+ "content": "β‰ ",
390
  "lstrip": false,
391
+ "normalized": false,
392
  "rstrip": false,
393
  "single_word": false,
394
  "special": false
395
  },
396
  "100305": {
397
+ "content": "𐔷",
398
  "lstrip": false,
399
+ "normalized": false,
400
  "rstrip": false,
401
  "single_word": false,
402
  "special": false
403
  },
404
  "100306": {
405
+ "content": "≀",
406
  "lstrip": false,
407
+ "normalized": false,
408
  "rstrip": false,
409
  "single_word": false,
410
  "special": false
411
  },
412
  "100307": {
413
+ "content": "π•ž",
414
  "lstrip": false,
415
+ "normalized": false,
416
  "rstrip": false,
417
  "single_word": false,
418
  "special": false
419
  },
420
  "100308": {
421
+ "content": "𐔱",
422
  "lstrip": false,
423
+ "normalized": false,
424
  "rstrip": false,
425
  "single_word": false,
426
  "special": false
427
  },
428
  "100309": {
429
+ "content": "𐕂",
430
  "lstrip": false,
431
+ "normalized": false,
432
  "rstrip": false,
433
  "single_word": false,
434
  "special": false
435
  },
436
  "100310": {
437
+ "content": "↦",
438
  "lstrip": false,
439
+ "normalized": false,
440
  "rstrip": false,
441
  "single_word": false,
442
  "special": false
443
  },
444
  "100311": {
445
+ "content": "π•Ž",
446
  "lstrip": false,
447
+ "normalized": false,
448
  "rstrip": false,
449
  "single_word": false,
450
  "special": false
451
  },
452
  "100312": {
453
+ "content": "β†’",
454
  "lstrip": false,
455
+ "normalized": false,
456
  "rstrip": false,
457
  "single_word": false,
458
  "special": false
459
  },
460
  "100313": {
461
+ "content": "𐕛",
462
  "lstrip": false,
463
+ "normalized": false,
464
  "rstrip": false,
465
  "single_word": false,
466
  "special": false
467
  },
468
  "100314": {
469
+ "content": "𐔰",
470
  "lstrip": false,
471
+ "normalized": false,
472
  "rstrip": false,
473
  "single_word": false,
474
  "special": false
475
  },
476
  "100315": {
477
+ "content": "Ξ΅",
 
 
 
 
 
 
 
 
478
  "lstrip": false,
479
+ "normalized": false,
480
  "rstrip": false,
481
  "single_word": false,
482
  "special": false
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58a6622e9a04074b9fd1401b33af312041420cd7007e0c6af53acbc147f5591c
3
+ size 8913