snoop2head commited on
Commit
faa6ae2
1 Parent(s): 253c6db

add tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +62 -62
  2. tokenizer_config.json +1 -1
tokenizer.json CHANGED
@@ -129,67 +129,67 @@
129
  "y": 65,
130
  "z": 66,
131
  "##u": 67,
132
- "##l": 68,
133
- "##v": 69,
134
- "##e": 70,
135
- "##r": 71,
136
- "##i": 72,
137
- "##z": 73,
138
- "##s": 74,
139
- "##a": 75,
140
- "##g": 76,
141
- "##d": 77,
142
- "##o": 78,
143
- "##w": 79,
144
  "##m": 80,
145
- "##n": 81,
146
- "##t": 82,
147
- "##p": 83,
148
- "##k": 84,
149
- "##b": 85,
150
- "##T": 86,
151
- "##R": 87,
152
- "##I": 88,
153
- "##N": 89,
154
- "##8": 90,
155
- "##9": 91,
156
- "##0": 92,
157
- "##h": 93,
158
- "##c": 94,
159
- "##f": 95,
160
- "##5": 96,
161
- "##3": 97,
162
- "##y": 98,
163
- "##2": 99,
164
- "##1": 100,
165
- "##x": 101,
166
- "##7": 102,
167
- "##6": 103,
168
- "##4": 104,
169
- "##S": 105,
170
- "##D": 106,
171
- "##C": 107,
172
- "##j": 108,
173
- "##A": 109,
174
- "##F": 110,
175
  "##P": 111,
176
- "##L": 112,
177
- "##E": 113,
178
- "##M": 114,
179
- "##J": 115,
180
- "##U": 116,
181
- "##B": 117,
182
- "##H": 118,
183
- "##G": 119,
184
- "##K": 120,
185
- "##O": 121,
186
- "##q": 122,
187
- "##W": 123,
188
- "##V": 124,
189
- "##Y": 125,
190
  "##X": 126,
191
- "##Q": 127,
192
- "##Z": 128,
193
  "##he": 129,
194
  "the": 130,
195
  "##on": 131,
@@ -5493,8 +5493,8 @@
5493
  "colon": 5429,
5494
  "depression": 5430,
5495
  "##known": 5431,
5496
- "##men": 5432,
5497
- "##know": 5433,
5498
  "##insula": 5434,
5499
  "outdoor": 5435,
5500
  "##unk": 5436,
@@ -9005,8 +9005,8 @@
9005
  "IoT": 8941,
9006
  "tac": 8942,
9007
  "staying": 8943,
9008
- "##zens": 8944,
9009
- "##26": 8945,
9010
  "heated": 8946,
9011
  "none": 8947,
9012
  "temperatures": 8948,
 
129
  "y": 65,
130
  "z": 66,
131
  "##u": 67,
132
+ "##k": 68,
133
+ "##w": 69,
134
+ "##a": 70,
135
+ "##n": 71,
136
+ "##g": 72,
137
+ "##s": 73,
138
+ "##e": 74,
139
+ "##b": 75,
140
+ "##o": 76,
141
+ "##r": 77,
142
+ "##t": 78,
143
+ "##i": 79,
144
  "##m": 80,
145
+ "##d": 81,
146
+ "##x": 82,
147
+ "##O": 83,
148
+ "##l": 84,
149
+ "##c": 85,
150
+ "##v": 86,
151
+ "##A": 87,
152
+ "##C": 88,
153
+ "##2": 89,
154
+ "##R": 90,
155
+ "##1": 91,
156
+ "##h": 92,
157
+ "##j": 93,
158
+ "##p": 94,
159
+ "##S": 95,
160
+ "##y": 96,
161
+ "##7": 97,
162
+ "##6": 98,
163
+ "##5": 99,
164
+ "##0": 100,
165
+ "##9": 101,
166
+ "##4": 102,
167
+ "##3": 103,
168
+ "##E": 104,
169
+ "##L": 105,
170
+ "##M": 106,
171
+ "##D": 107,
172
+ "##U": 108,
173
+ "##B": 109,
174
+ "##f": 110,
175
  "##P": 111,
176
+ "##K": 112,
177
+ "##G": 113,
178
+ "##T": 114,
179
+ "##Z": 115,
180
+ "##8": 116,
181
+ "##q": 117,
182
+ "##Y": 118,
183
+ "##W": 119,
184
+ "##V": 120,
185
+ "##I": 121,
186
+ "##F": 122,
187
+ "##H": 123,
188
+ "##z": 124,
189
+ "##N": 125,
190
  "##X": 126,
191
+ "##J": 127,
192
+ "##Q": 128,
193
  "##he": 129,
194
  "the": 130,
195
  "##on": 131,
 
5493
  "colon": 5429,
5494
  "depression": 5430,
5495
  "##known": 5431,
5496
+ "##know": 5432,
5497
+ "##men": 5433,
5498
  "##insula": 5434,
5499
  "outdoor": 5435,
5500
  "##unk": 5436,
 
9005
  "IoT": 8941,
9006
  "tac": 8942,
9007
  "staying": 8943,
9008
+ "##26": 8944,
9009
+ "##zens": 8945,
9010
  "heated": 8946,
9011
  "none": 8947,
9012
  "temperatures": 8948,
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"name_or_path": "english-WPE.json", "tokenizer_class": "PreTrainedTokenizerFast"}
 
1
+ {"tokenizer_class": "PreTrainedTokenizerFast"}