yvs007 commited on
Commit
afc904a
1 Parent(s): d191cb7

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +43 -86
  2. vocab.txt +19 -62
tokenizer.json CHANGED
@@ -150,93 +150,50 @@
150
  "[CLS]": 2,
151
  "[SEP]": 3,
152
  "[MASK]": 4,
153
- "/": 5,
154
- "a": 6,
155
- "c": 7,
156
- "d": 8,
157
- "e": 9,
158
- "g": 10,
159
- "h": 11,
160
- "i": 12,
161
- "k": 13,
162
- "l": 14,
163
- "m": 15,
164
- "n": 16,
165
- "o": 17,
166
- "p": 18,
167
- "r": 19,
168
- "s": 20,
169
- "t": 21,
170
- "u": 22,
171
- "w": 23,
172
- "y": 24,
173
  "##a": 25,
174
- "##s": 26,
175
- "##t": 27,
176
- "##g": 28,
177
- "##e": 29,
178
- "##r": 30,
179
- "##h": 31,
180
- "##n": 32,
181
- "##i": 33,
182
- "##c": 34,
183
- "##y": 35,
184
- "##o": 36,
185
- "##l": 37,
186
- "##u": 38,
187
- "##m": 39,
188
- "##k": 40,
189
- "pl": 41,
190
- "##as": 42,
191
- "##ar": 43,
192
- "##re": 44,
193
- "##it": 45,
194
- "##ot": 46,
195
- "##le": 47,
196
- "plot": 48,
197
- "cas": 49,
198
- "di": 50,
199
- "et": 51,
200
- "ge": 52,
201
- "or": 53,
202
- "pa": 54,
203
- "re": 55,
204
- "tit": 56,
205
- "wi": 57,
206
- "ye": 58,
207
- "##su": 59,
208
- "##to": 60,
209
- "##ge": 61,
210
- "##gi": 62,
211
- "##hn": 63,
212
- "##nre": 64,
213
- "##ic": 65,
214
- "##igi": 66,
215
- "##cto": 67,
216
- "##mm": 68,
217
- "##ki": 69,
218
- "##ase": 70,
219
- "##ary": 71,
220
- "##recto": 72,
221
- "##ity": 73,
222
- "##lease": 74,
223
- "plotsu": 75,
224
- "cast": 76,
225
- "directo": 77,
226
- "ethn": 78,
227
- "genre": 79,
228
- "origi": 80,
229
- "page": 81,
230
- "release": 82,
231
- "title": 83,
232
- "wiki": 84,
233
- "year": 85,
234
- "##icity": 86,
235
- "##mmary": 87,
236
- "plotsummary": 88,
237
- "director": 89,
238
- "ethnicity": 90,
239
- "origin": 91
240
  }
241
  }
242
  }
 
150
  "[CLS]": 2,
151
  "[SEP]": 3,
152
  "[MASK]": 4,
153
+ "a": 5,
154
+ "e": 6,
155
+ "g": 7,
156
+ "i": 8,
157
+ "l": 9,
158
+ "m": 10,
159
+ "n": 11,
160
+ "o": 12,
161
+ "p": 13,
162
+ "r": 14,
163
+ "s": 15,
164
+ "t": 16,
165
+ "u": 17,
166
+ "y": 18,
167
+ "##l": 19,
168
+ "##o": 20,
169
+ "##t": 21,
170
+ "##s": 22,
171
+ "##u": 23,
172
+ "##m": 24,
173
  "##a": 25,
174
+ "##r": 26,
175
+ "##y": 27,
176
+ "##e": 28,
177
+ "##n": 29,
178
+ "##i": 30,
179
+ "ge": 31,
180
+ "pl": 32,
181
+ "ti": 33,
182
+ "##le": 34,
183
+ "##ot": 35,
184
+ "##tle": 36,
185
+ "##su": 37,
186
+ "##mm": 38,
187
+ "##ar": 39,
188
+ "##re": 40,
189
+ "##nre": 41,
190
+ "genre": 42,
191
+ "plot": 43,
192
+ "title": 44,
193
+ "##summ": 45,
194
+ "##ary": 46,
195
+ "plotsumm": 47,
196
+ "plotsummary": 48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  }
198
  }
199
  }
vocab.txt CHANGED
@@ -3,15 +3,10 @@
3
  [CLS]
4
  [SEP]
5
  [MASK]
6
- /
7
  a
8
- c
9
- d
10
  e
11
  g
12
- h
13
  i
14
- k
15
  l
16
  m
17
  n
@@ -21,72 +16,34 @@ r
21
  s
22
  t
23
  u
24
- w
25
  y
26
- ##a
27
- ##s
28
  ##t
29
- ##g
30
- ##e
 
 
31
  ##r
32
- ##h
 
33
  ##n
34
  ##i
35
- ##c
36
- ##y
37
- ##o
38
- ##l
39
- ##u
40
- ##m
41
- ##k
42
  pl
43
- ##as
44
- ##ar
45
- ##re
46
- ##it
47
- ##ot
48
  ##le
49
- plot
50
- cas
51
- di
52
- et
53
- ge
54
- or
55
- pa
56
- re
57
- tit
58
- wi
59
- ye
60
  ##su
61
- ##to
62
- ##ge
63
- ##gi
64
- ##hn
65
- ##nre
66
- ##ic
67
- ##igi
68
- ##cto
69
  ##mm
70
- ##ki
71
- ##ase
72
- ##ary
73
- ##recto
74
- ##ity
75
- ##lease
76
- plotsu
77
- cast
78
- directo
79
- ethn
80
  genre
81
- origi
82
- page
83
- release
84
  title
85
- wiki
86
- year
87
- ##icity
88
- ##mmary
89
  plotsummary
90
- director
91
- ethnicity
92
- origin
 
3
  [CLS]
4
  [SEP]
5
  [MASK]
 
6
  a
 
 
7
  e
8
  g
 
9
  i
 
10
  l
11
  m
12
  n
 
16
  s
17
  t
18
  u
 
19
  y
20
+ ##l
21
+ ##o
22
  ##t
23
+ ##s
24
+ ##u
25
+ ##m
26
+ ##a
27
  ##r
28
+ ##y
29
+ ##e
30
  ##n
31
  ##i
32
+ ge
 
 
 
 
 
 
33
  pl
34
+ ti
 
 
 
 
35
  ##le
36
+ ##ot
37
+ ##tle
 
 
 
 
 
 
 
 
 
38
  ##su
 
 
 
 
 
 
 
 
39
  ##mm
40
+ ##ar
41
+ ##re
42
+ ##nre
 
 
 
 
 
 
 
43
  genre
44
+ plot
 
 
45
  title
46
+ ##summ
47
+ ##ary
48
+ plotsumm
 
49
  plotsummary