snoop2head
commited on
Commit
•
faa6ae2
1
Parent(s):
253c6db
add tokenizer
Browse files- tokenizer.json +62 -62
- tokenizer_config.json +1 -1
tokenizer.json
CHANGED
@@ -129,67 +129,67 @@
|
|
129 |
"y": 65,
|
130 |
"z": 66,
|
131 |
"##u": 67,
|
132 |
-
"##
|
133 |
-
"##
|
134 |
-
"##
|
135 |
-
"##
|
136 |
-
"##
|
137 |
-
"##
|
138 |
-
"##
|
139 |
-
"##
|
140 |
-
"##
|
141 |
-
"##
|
142 |
-
"##
|
143 |
-
"##
|
144 |
"##m": 80,
|
145 |
-
"##
|
146 |
-
"##
|
147 |
-
"##
|
148 |
-
"##
|
149 |
-
"##
|
150 |
-
"##
|
151 |
-
"##
|
152 |
-
"##
|
153 |
-
"##
|
154 |
-
"##
|
155 |
-
"##
|
156 |
-
"##
|
157 |
-
"##
|
158 |
-
"##
|
159 |
-
"##
|
160 |
-
"##
|
161 |
-
"##
|
162 |
-
"##
|
163 |
-
"##
|
164 |
-
"##
|
165 |
-
"##
|
166 |
-
"##
|
167 |
-
"##
|
168 |
-
"##
|
169 |
-
"##
|
170 |
-
"##
|
171 |
-
"##
|
172 |
-
"##
|
173 |
-
"##
|
174 |
-
"##
|
175 |
"##P": 111,
|
176 |
-
"##
|
177 |
-
"##
|
178 |
-
"##
|
179 |
-
"##
|
180 |
-
"##
|
181 |
-
"##
|
182 |
-
"##
|
183 |
-
"##
|
184 |
-
"##
|
185 |
-
"##
|
186 |
-
"##
|
187 |
-
"##
|
188 |
-
"##
|
189 |
-
"##
|
190 |
"##X": 126,
|
191 |
-
"##
|
192 |
-
"##
|
193 |
"##he": 129,
|
194 |
"the": 130,
|
195 |
"##on": 131,
|
@@ -5493,8 +5493,8 @@
|
|
5493 |
"colon": 5429,
|
5494 |
"depression": 5430,
|
5495 |
"##known": 5431,
|
5496 |
-
"##
|
5497 |
-
"##
|
5498 |
"##insula": 5434,
|
5499 |
"outdoor": 5435,
|
5500 |
"##unk": 5436,
|
@@ -9005,8 +9005,8 @@
|
|
9005 |
"IoT": 8941,
|
9006 |
"tac": 8942,
|
9007 |
"staying": 8943,
|
9008 |
-
"##
|
9009 |
-
"##
|
9010 |
"heated": 8946,
|
9011 |
"none": 8947,
|
9012 |
"temperatures": 8948,
|
|
|
129 |
"y": 65,
|
130 |
"z": 66,
|
131 |
"##u": 67,
|
132 |
+
"##k": 68,
|
133 |
+
"##w": 69,
|
134 |
+
"##a": 70,
|
135 |
+
"##n": 71,
|
136 |
+
"##g": 72,
|
137 |
+
"##s": 73,
|
138 |
+
"##e": 74,
|
139 |
+
"##b": 75,
|
140 |
+
"##o": 76,
|
141 |
+
"##r": 77,
|
142 |
+
"##t": 78,
|
143 |
+
"##i": 79,
|
144 |
"##m": 80,
|
145 |
+
"##d": 81,
|
146 |
+
"##x": 82,
|
147 |
+
"##O": 83,
|
148 |
+
"##l": 84,
|
149 |
+
"##c": 85,
|
150 |
+
"##v": 86,
|
151 |
+
"##A": 87,
|
152 |
+
"##C": 88,
|
153 |
+
"##2": 89,
|
154 |
+
"##R": 90,
|
155 |
+
"##1": 91,
|
156 |
+
"##h": 92,
|
157 |
+
"##j": 93,
|
158 |
+
"##p": 94,
|
159 |
+
"##S": 95,
|
160 |
+
"##y": 96,
|
161 |
+
"##7": 97,
|
162 |
+
"##6": 98,
|
163 |
+
"##5": 99,
|
164 |
+
"##0": 100,
|
165 |
+
"##9": 101,
|
166 |
+
"##4": 102,
|
167 |
+
"##3": 103,
|
168 |
+
"##E": 104,
|
169 |
+
"##L": 105,
|
170 |
+
"##M": 106,
|
171 |
+
"##D": 107,
|
172 |
+
"##U": 108,
|
173 |
+
"##B": 109,
|
174 |
+
"##f": 110,
|
175 |
"##P": 111,
|
176 |
+
"##K": 112,
|
177 |
+
"##G": 113,
|
178 |
+
"##T": 114,
|
179 |
+
"##Z": 115,
|
180 |
+
"##8": 116,
|
181 |
+
"##q": 117,
|
182 |
+
"##Y": 118,
|
183 |
+
"##W": 119,
|
184 |
+
"##V": 120,
|
185 |
+
"##I": 121,
|
186 |
+
"##F": 122,
|
187 |
+
"##H": 123,
|
188 |
+
"##z": 124,
|
189 |
+
"##N": 125,
|
190 |
"##X": 126,
|
191 |
+
"##J": 127,
|
192 |
+
"##Q": 128,
|
193 |
"##he": 129,
|
194 |
"the": 130,
|
195 |
"##on": 131,
|
|
|
5493 |
"colon": 5429,
|
5494 |
"depression": 5430,
|
5495 |
"##known": 5431,
|
5496 |
+
"##know": 5432,
|
5497 |
+
"##men": 5433,
|
5498 |
"##insula": 5434,
|
5499 |
"outdoor": 5435,
|
5500 |
"##unk": 5436,
|
|
|
9005 |
"IoT": 8941,
|
9006 |
"tac": 8942,
|
9007 |
"staying": 8943,
|
9008 |
+
"##26": 8944,
|
9009 |
+
"##zens": 8945,
|
9010 |
"heated": 8946,
|
9011 |
"none": 8947,
|
9012 |
"temperatures": 8948,
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"tokenizer_class": "PreTrainedTokenizerFast"}
|