imdatta0 commited on
Commit
f6116a7
1 Parent(s): d9f9f0a

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,7 +1 @@
1
- {
2
- "bos_token": "<|bos|>",
3
- "eos_token": "<|endoftext|>",
4
- "mask_token": "<|mask|>",
5
- "pad_token": "<|pad|>",
6
- "unk_token": "<|unk|>"
7
- }
 
1
+ {}
 
 
 
 
 
 
tokenizer.json CHANGED
@@ -5182,7 +5182,12 @@
5182
  "Ġalongside": 5111,
5183
  "ĠPass": 5112,
5184
  "itled": 5113,
5185
- "ĠNetherlands": 5114
 
 
 
 
 
5186
  },
5187
  "merges": [
5188
  [
@@ -24776,6 +24781,26 @@
24776
  [
24777
  "ĠN",
24778
  "etherlands"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24779
  ]
24780
  ]
24781
  }
 
5182
  "Ġalongside": 5111,
5183
  "ĠPass": 5112,
5184
  "itled": 5113,
5185
+ "ĠNetherlands": 5114,
5186
+ "ĠDer": 5115,
5187
+ "ĠFire": 5116,
5188
+ "Ġtouch": 5117,
5189
+ "astic": 5118,
5190
+ "ĠSenate": 5119
5191
  },
5192
  "merges": [
5193
  [
 
24781
  [
24782
  "ĠN",
24783
  "etherlands"
24784
+ ],
24785
+ [
24786
+ "ĠD",
24787
+ "er"
24788
+ ],
24789
+ [
24790
+ "ĠF",
24791
+ "ire"
24792
+ ],
24793
+ [
24794
+ "Ġto",
24795
+ "uch"
24796
+ ],
24797
+ [
24798
+ "ast",
24799
+ "ic"
24800
+ ],
24801
+ [
24802
+ "ĠSen",
24803
+ "ate"
24804
  ]
24805
  ]
24806
  }
tokenizer_config.json CHANGED
@@ -41,12 +41,7 @@
41
  "special": true
42
  }
43
  },
44
- "bos_token": "<|bos|>",
45
  "clean_up_tokenization_spaces": false,
46
- "eos_token": "<|endoftext|>",
47
- "mask_token": "<|mask|>",
48
  "model_max_length": 1000000000000000019884624838656,
49
- "pad_token": "<|pad|>",
50
- "tokenizer_class": "PreTrainedTokenizerFast",
51
- "unk_token": "<|unk|>"
52
  }
 
41
  "special": true
42
  }
43
  },
 
44
  "clean_up_tokenization_spaces": false,
 
 
45
  "model_max_length": 1000000000000000019884624838656,
46
+ "tokenizer_class": "PreTrainedTokenizerFast"
 
 
47
  }