parislo commited on
Commit
a70bb1f
·
verified ·
1 Parent(s): 97c86b4

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +174 -124
tokenizer.json CHANGED
@@ -293,123 +293,148 @@
293
  "Ġthe": 258,
294
  "Ġi": 259,
295
  "Ġa": 260,
296
- "en": 261,
297
- "re": 262,
298
- "Ġo": 263,
299
- "si": 264,
300
- "Ġis": 265,
301
- "al": 266,
302
- "ri": 267,
303
- "at": 268,
304
- "es": 269,
305
- "le": 270,
306
- "on": 271,
307
- "Ġf": 272,
308
- "Ġof": 273,
309
- "nd": 274,
310
- "an": 275,
311
- "he": 276,
312
- "Ġb": 277,
313
- "Ġc": 278,
314
- "Ġe": 279,
315
- "Ġs": 280,
316
- "Ġt": 281,
317
- "Eu": 282,
318
- "ion": 283,
319
- "la": 284,
320
- "mu": 285,
321
- "om": 286,
322
- "or": 287,
323
- "ore": 288,
324
- "se": 289,
325
- "ten": 290,
326
- "ĠT": 291,
327
- "Ġsi": 292,
328
- "ĠEu": 293,
329
- "Ġand": 294,
330
- "Ġfu": 295,
331
- "mula": 296,
332
- "ormula": 297,
333
- "Ġsid": 298,
334
- "'s": 299,
335
- "ag": 300,
336
- "et": 301,
337
- "hy": 302,
338
- "po": 303,
339
- "qu": 304,
340
- "use": 305,
341
- "Ġ-": 306,
342
- "Ġ2": 307,
343
- "Ġl": 308,
344
- "Ġw": 309,
345
- "ĠÎ": 310,
346
- "Ġth": 311,
347
- "Ġre": 312,
348
- "ther": 313,
349
- "Ġin": 314,
350
- "eng": 315,
351
- "ent": 316,
352
- "Ġother": 317,
353
- "rig": 318,
354
- "ler": 319,
355
- "Ġformula": 320,
356
- "ĠThe": 321,
357
- "ĠEuler": 322,
358
- "Ġsides": 323,
359
- "It": 324,
360
- "Py": 325,
361
- "am": 326,
362
- "are": 327,
363
- "ct": 328,
364
- "gle": 329,
365
- "hi": 330,
366
- "ht": 331,
367
- "in": 332,
368
- "li": 333,
369
- "lat": 334,
370
- "nct": 335,
371
- "ple": 336,
372
- "ry": 337,
373
- "um": 338,
374
- "wo": 339,
375
- "Ġ+": 340,
376
- "Ġ=": 341,
377
- "Ġg": 342,
378
- "Ġn": 343,
379
- "Ġhy": 344,
380
- "Ġrig": 345,
381
- "ĠIt": 346,
382
- "ĠPy": 347,
383
- "thag": 348,
384
- "Ġan": 349,
385
- "ndam": 350,
386
- "here": 351,
387
- "Ġcom": 352,
388
- "Ġex": 353,
389
- "Ġsqu": 354,
390
- "Ġtwo": 355,
391
- "ions": 356,
392
- "omet": 357,
393
- "orem": 358,
394
- "orean": 359,
395
- "tenuse": 360,
396
- "Ġfunct": 361,
397
- "Ġfundam": 362,
398
- "potenuse": 363,
399
- "Ġleng": 364,
400
- "Ġwhere": 365,
401
- "Ġthat": 366,
402
- "Ġrelat": 367,
403
- "ental": 368,
404
- "plex": 369,
405
- "Ġhypotenuse": 370,
406
- "Ġright": 371,
407
- "ĠPythag": 372,
408
- "Ġcomplex": 373,
409
- "Ġsquare": 374,
410
- "Ġfundamental": 375,
411
- "Ġlength": 376,
412
- "ĠPythagorean": 377
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  },
414
  "merges": [
415
  "t h",
@@ -417,6 +442,7 @@
417
  "Ġ the",
418
  "Ġ i",
419
  "Ġ a",
 
420
  "e n",
421
  "r e",
422
  "Ġ o",
@@ -428,9 +454,14 @@
428
  "e s",
429
  "l e",
430
  "o n",
 
 
431
  "Ġ f",
 
432
  "Ġo f",
433
  "n d",
 
 
434
  "a n",
435
  "h e",
436
  "Ġ b",
@@ -447,11 +478,14 @@
447
  "o re",
448
  "s e",
449
  "t en",
 
 
450
  "Ġ T",
451
  "Ġ si",
452
  "Ġ Eu",
453
  "Ġa nd",
454
  "Ġf u",
 
455
  "mu la",
456
  "or mula",
457
  "Ġsi d",
@@ -462,15 +496,22 @@
462
  "p o",
463
  "q u",
464
  "u se",
 
 
 
 
 
 
 
465
  "Ġ -",
466
- 2",
467
  "Ġ l",
468
- "Ġ w",
469
- "Ġ Î",
470
  "Ġ th",
471
  "Ġ re",
 
472
  "the r",
473
  "Ġi n",
 
474
  "en g",
475
  "en t",
476
  "Ġo ther",
@@ -496,18 +537,26 @@
496
  "r y",
497
  "u m",
498
  "w o",
499
- "Ġ +",
500
- "Ġ =",
 
 
 
501
  "Ġ g",
502
  "Ġ n",
503
  "Ġ hy",
504
  "Ġ rig",
505
  "Ġ It",
506
  "Ġ Py",
 
 
507
  "th ag",
508
  "Ġa n",
 
 
 
509
  "nd am",
510
- "he re",
511
  "Ġc om",
512
  "Ġe x",
513
  "Ġs qu",
@@ -517,15 +566,16 @@
517
  "ore m",
518
  "ore an",
519
  "ten use",
 
520
  "Ġfu nct",
521
  "Ġfu ndam",
522
  "po tenuse",
523
  "Ġl eng",
524
- "Ġw here",
525
  "Ġth at",
526
  "Ġre lat",
527
  "ent al",
528
  "ple x",
 
529
  "Ġhy potenuse",
530
  "Ġrig ht",
531
  "ĠPy thag",
 
293
  "Ġthe": 258,
294
  "Ġi": 259,
295
  "Ġa": 260,
296
+ "ĠÎ": 261,
297
+ "en": 262,
298
+ "re": 263,
299
+ "Ġo": 264,
300
+ "si": 265,
301
+ "Ġis": 266,
302
+ "al": 267,
303
+ "ri": 268,
304
+ "at": 269,
305
+ "es": 270,
306
+ "le": 271,
307
+ "on": 272,
308
+ "ν": 273,
309
+ "Ïģ": 274,
310
+ "Ġf": 275,
311
+ "ĠÏ": 276,
312
+ "Ġof": 277,
313
+ "nd": 278,
314
+ "Ġ2": 279,
315
+ "ÏģÎ": 280,
316
+ "an": 281,
317
+ "he": 282,
318
+ "Ġb": 283,
319
+ "Ġc": 284,
320
+ "Ġe": 285,
321
+ "Ġs": 286,
322
+ "Ġt": 287,
323
+ "Eu": 288,
324
+ "ion": 289,
325
+ "la": 290,
326
+ "mu": 291,
327
+ "om": 292,
328
+ "or": 293,
329
+ "ore": 294,
330
+ "se": 295,
331
+ "ten": 296,
332
+ "ε": 297,
333
+ "ο": 298,
334
+ "ĠT": 299,
335
+ "Ġsi": 300,
336
+ "ĠEu": 301,
337
+ "Ġand": 302,
338
+ "Ġfu": 303,
339
+ "ĠÏĦ": 304,
340
+ "mula": 305,
341
+ "ormula": 306,
342
+ "Ġsid": 307,
343
+ "'s": 308,
344
+ "ag": 309,
345
+ "et": 310,
346
+ "hy": 311,
347
+ "po": 312,
348
+ "qu": 313,
349
+ "use": 314,
350
+ "¹Ï": 315,
351
+ "ί": 316,
352
+ "α": 317,
353
+ "η": 318,
354
+ "ÏĤ": 319,
355
+ "Ïħ": 320,
356
+ "Ġ+": 321,
357
+ "Ġ-": 322,
358
+ "Ġ=": 323,
359
+ "Ġl": 324,
360
+ "Ġth": 325,
361
+ "Ġre": 326,
362
+ "İν": 327,
363
+ "ther": 328,
364
+ "Ġin": 329,
365
+ "Ġγ": 330,
366
+ "eng": 331,
367
+ "ent": 332,
368
+ "Ġother": 333,
369
+ "rig": 334,
370
+ "ler": 335,
371
+ "Ġformula": 336,
372
+ "ĠThe": 337,
373
+ "ĠEuler": 338,
374
+ "Ġsides": 339,
375
+ "It": 340,
376
+ "Py": 341,
377
+ "am": 342,
378
+ "are": 343,
379
+ "ct": 344,
380
+ "gle": 345,
381
+ "hi": 346,
382
+ "ht": 347,
383
+ "in": 348,
384
+ "li": 349,
385
+ "lat": 350,
386
+ "nct": 351,
387
+ "ple": 352,
388
+ "ry": 353,
389
+ "um": 354,
390
+ "wo": 355,
391
+ "whe": 356,
392
+ "³Ï": 357,
393
+ "γÏ": 358,
394
+ "ÏĢ": 359,
395
+ "ÏĦ": 360,
396
+ "Ġg": 361,
397
+ "Ġn": 362,
398
+ "Ġhy": 363,
399
+ "Ġrig": 364,
400
+ "ĠIt": 365,
401
+ "ĠPy": 366,
402
+ "ĥη": 367,
403
+ "īν": 368,
404
+ "thag": 369,
405
+ "Ġan": 370,
406
+ "Ġα": 371,
407
+ "Ġβ": 372,
408
+ "Ġμ": 373,
409
+ "ndam": 374,
410
+ "ÏģιÏ": 375,
411
+ "Ġcom": 376,
412
+ "Ġex": 377,
413
+ "Ġsqu": 378,
414
+ "Ġtwo": 379,
415
+ "ions": 380,
416
+ "omet": 381,
417
+ "orem": 382,
418
+ "orean": 383,
419
+ "tenuse": 384,
420
+ "οÏħ": 385,
421
+ "Ġfunct": 386,
422
+ "Ġfundam": 387,
423
+ "potenuse": 388,
424
+ "Ġleng": 389,
425
+ "Ġthat": 390,
426
+ "Ġrelat": 391,
427
+ "ental": 392,
428
+ "plex": 393,
429
+ "where": 394,
430
+ "Ġhypotenuse": 395,
431
+ "Ġright": 396,
432
+ "ĠPythag": 397,
433
+ "Ġcomplex": 398,
434
+ "Ġsquare": 399,
435
+ "Ġfundamental": 400,
436
+ "Ġlength": 401,
437
+ "ĠPythagorean": 402
438
  },
439
  "merges": [
440
  "t h",
 
442
  "Ġ the",
443
  "Ġ i",
444
  "Ġ a",
445
+ "Ġ Î",
446
  "e n",
447
  "r e",
448
  "Ġ o",
 
454
  "e s",
455
  "l e",
456
  "o n",
457
+ "Î ½",
458
+ "Ï ģ",
459
  "Ġ f",
460
+ "Ġ Ï",
461
  "Ġo f",
462
  "n d",
463
+ "Ġ 2",
464
+ "Ïģ Î",
465
  "a n",
466
  "h e",
467
  "Ġ b",
 
478
  "o re",
479
  "s e",
480
  "t en",
481
+ "Î µ",
482
+ "Î ¿",
483
  "Ġ T",
484
  "Ġ si",
485
  "Ġ Eu",
486
  "Ġa nd",
487
  "Ġf u",
488
+ "ĠÏ Ħ",
489
  "mu la",
490
  "or mula",
491
  "Ġsi d",
 
496
  "p o",
497
  "q u",
498
  "u se",
499
+ "¹ Ï",
500
+ "Î ¯",
501
+ "Î ±",
502
+ "Î ·",
503
+ "Ï Ĥ",
504
+ "Ï ħ",
505
+ "Ġ +",
506
  "Ġ -",
507
+ =",
508
  "Ġ l",
 
 
509
  "Ġ th",
510
  "Ġ re",
511
+ "İ Î½",
512
  "the r",
513
  "Ġi n",
514
+ "ĠÎ ³",
515
  "en g",
516
  "en t",
517
  "Ġo ther",
 
537
  "r y",
538
  "u m",
539
  "w o",
540
+ "w he",
541
+ "³ Ï",
542
+ "Î ³Ï",
543
+ "Ï Ģ",
544
+ "Ï Ħ",
545
  "Ġ g",
546
  "Ġ n",
547
  "Ġ hy",
548
  "Ġ rig",
549
  "Ġ It",
550
  "Ġ Py",
551
+ "ĥ η",
552
+ "ī ν",
553
  "th ag",
554
  "Ġa n",
555
+ "ĠÎ ±",
556
+ "ĠÎ ²",
557
+ "ĠÎ ¼",
558
  "nd am",
559
+ "ÏģÎ ¹Ï",
560
  "Ġc om",
561
  "Ġe x",
562
  "Ġs qu",
 
566
  "ore m",
567
  "ore an",
568
  "ten use",
569
+ "ο Ïħ",
570
  "Ġfu nct",
571
  "Ġfu ndam",
572
  "po tenuse",
573
  "Ġl eng",
 
574
  "Ġth at",
575
  "Ġre lat",
576
  "ent al",
577
  "ple x",
578
+ "whe re",
579
  "Ġhy potenuse",
580
  "Ġrig ht",
581
  "ĠPy thag",