shpotes commited on
Commit
d8bd706
1 Parent(s): d5975be

add tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +1 -1
  2. special_tokens_map.json +1 -1
  3. tokenizer.json +9 -0
added_tokens.json CHANGED
@@ -1 +1 @@
1
- {"\t\t\t\t\t": 50301, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50289, "\t\t\t": 50303, "\t\t\t\t\t\t\t\t\t\t": 50296, " ": 50279, " ": 50263, "\t\t\t\t\t\t\t": 50299, " ": 50258, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50287, "\t\t\t\t\t\t\t\t\t\t\t\t": 50294, "\t\t\t\t\t\t\t\t\t\t\t": 50295, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50290, " ": 50272, " ": 50269, " ": 50261, " ": 50282, "\t\t\t\t\t\t\t\t\t\t\t\t\t": 50293, " ": 50268, " ": 50275, "\t\t\t\t": 50302, " ": 50276, " ": 50260, "\t\t": 50304, " ": 50273, " ": 50259, " ": 50267, " ": 50277, " ": 50257, " ": 50270, " ": 50280, "\t\t\t\t\t\t\t\t\t": 50297, "\t\t\t\t\t\t\t\t": 50298, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50288, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50291, " ": 50283, " ": 50271, " ": 50265, " ": 50266, " ": 50264, "\t\t\t\t\t\t": 50300, " ": 50285, " ": 50274, " ": 50262, " ": 50284, " ": 50286, " ": 50281, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50292, " ": 50278}
 
1
+ {" ": 50283, " ": 50271, "\t\t": 50304, "\t\t\t\t\t\t\t\t\t": 50297, " ": 50264, " ": 50275, "\t\t\t\t": 50302, " ": 50284, " ": 50261, " ": 50270, " ": 50274, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50288, "\t\t\t\t\t\t\t": 50299, "\t\t\t\t\t\t\t\t\t\t\t\t": 50294, " ": 50280, " ": 50263, "\t\t\t\t\t": 50301, " ": 50268, " ": 50269, " ": 50257, " ": 50259, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50287, " ": 50282, " ": 50281, " ": 50260, "\t\t\t": 50303, "\t\t\t\t\t\t\t\t\t\t": 50296, " ": 50272, " ": 50279, "<pad>": 50305, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50291, " ": 50286, "\t\t\t\t\t\t\t\t\t\t\t": 50295, " ": 50266, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50292, " ": 50267, " ": 50273, "\t\t\t\t\t\t": 50300, " ": 50285, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50290, " ": 50278, "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t": 50289, " ": 50262, " ": 50258, " ": 50265, " ": 50276, "\t\t\t\t\t\t\t\t": 50298, "\t\t\t\t\t\t\t\t\t\t\t\t\t": 50293, " ": 50277}
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}
 
1
+ {"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>", "pad_token": "<pad>"}
tokenizer.json CHANGED
@@ -443,6 +443,15 @@
443
  "rstrip": false,
444
  "normalized": true,
445
  "special": false
 
 
 
 
 
 
 
 
 
446
  }
447
  ],
448
  "normalizer": null,
 
443
  "rstrip": false,
444
  "normalized": true,
445
  "special": false
446
+ },
447
+ {
448
+ "id": 50305,
449
+ "content": "<pad>",
450
+ "single_word": false,
451
+ "lstrip": false,
452
+ "rstrip": false,
453
+ "normalized": false,
454
+ "special": true
455
  }
456
  ],
457
  "normalizer": null,