justtherightsize
commited on
Commit
•
437beb0
1
Parent(s):
c4e6174
Upload tokenizer
Browse files- merges.txt +1 -1
- tokenizer.json +2 -2
- vocab.json +0 -0
merges.txt
CHANGED
@@ -62,7 +62,7 @@ l o
|
|
62 |
n i
|
63 |
o r
|
64 |
d e
|
65 |
-
Ġ
|
66 |
ÅĻ i
|
67 |
c i
|
68 |
o n
|
|
|
62 |
n i
|
63 |
o r
|
64 |
d e
|
65 |
+
Ġ þ
|
66 |
ÅĻ i
|
67 |
c i
|
68 |
o n
|
tokenizer.json
CHANGED
@@ -87,7 +87,7 @@
|
|
87 |
"[CLS]": 0,
|
88 |
"[PAD]": 1,
|
89 |
"[SEP]": 2,
|
90 |
-
"
|
91 |
"Ġ,": 4,
|
92 |
"Ġ.": 5,
|
93 |
"Ġa": 6,
|
@@ -52109,7 +52109,7 @@
|
|
52109 |
"n i",
|
52110 |
"o r",
|
52111 |
"d e",
|
52112 |
-
"Ġ
|
52113 |
"ÅĻ i",
|
52114 |
"c i",
|
52115 |
"o n",
|
|
|
87 |
"[CLS]": 0,
|
88 |
"[PAD]": 1,
|
89 |
"[SEP]": 2,
|
90 |
+
"þ": 3,
|
91 |
"Ġ,": 4,
|
92 |
"Ġ.": 5,
|
93 |
"Ġa": 6,
|
|
|
52109 |
"n i",
|
52110 |
"o r",
|
52111 |
"d e",
|
52112 |
+
"Ġ þ",
|
52113 |
"ÅĻ i",
|
52114 |
"c i",
|
52115 |
"o n",
|
vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|