jeremylegrand commited on
Commit
1acb2e4
·
1 Parent(s): bda0331

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +12 -12
  2. vocab.txt +8 -8
tokenizer.json CHANGED
@@ -156,20 +156,20 @@
156
  "l": 8,
157
  "m": 9,
158
  "t": 10,
159
- "##i": 11,
160
- "##t": 12,
161
- "##l": 13,
162
- "##e": 14,
163
- "##m": 15,
164
- "##a": 16,
165
  "em": 17,
166
  "ti": 18,
167
- "##il": 19,
168
- "##tl": 20,
169
- "##ail": 21,
170
- "email": 22,
171
- "titl": 23,
172
- "title": 24
173
  }
174
  }
175
  }
 
156
  "l": 8,
157
  "m": 9,
158
  "t": 10,
159
+ "##m": 11,
160
+ "##a": 12,
161
+ "##i": 13,
162
+ "##l": 14,
163
+ "##t": 15,
164
+ "##e": 16,
165
  "em": 17,
166
  "ti": 18,
167
+ "##ai": 19,
168
+ "##le": 20,
169
+ "##tle": 21,
170
+ "emai": 22,
171
+ "title": 23,
172
+ "email": 24
173
  }
174
  }
175
  }
vocab.txt CHANGED
@@ -9,17 +9,17 @@ i
9
  l
10
  m
11
  t
 
 
12
  ##i
13
- ##t
14
  ##l
 
15
  ##e
16
- ##m
17
- ##a
18
  em
19
  ti
20
- ##il
21
- ##tl
22
- ##ail
23
- email
24
- titl
25
  title
 
 
9
  l
10
  m
11
  t
12
+ ##m
13
+ ##a
14
  ##i
 
15
  ##l
16
+ ##t
17
  ##e
 
 
18
  em
19
  ti
20
+ ##ai
21
+ ##le
22
+ ##tle
23
+ emai
 
24
  title
25
+ email