Upload processor
Browse files- added_tokens.json +1 -0
- special_tokens_map.json +3 -21
- tokenizer_config.json +0 -0
added_tokens.json
CHANGED
@@ -1518,6 +1518,7 @@
|
|
1518 |
"<|da|>": 50285,
|
1519 |
"<|de|>": 50261,
|
1520 |
"<|el|>": 50281,
|
|
|
1521 |
"<|en|>": 50259,
|
1522 |
"<|es|>": 50262,
|
1523 |
"<|et|>": 50307,
|
|
|
1518 |
"<|da|>": 50285,
|
1519 |
"<|de|>": 50261,
|
1520 |
"<|el|>": 50281,
|
1521 |
+
"<|endoftext|>": 50257,
|
1522 |
"<|en|>": 50259,
|
1523 |
"<|es|>": 50262,
|
1524 |
"<|et|>": 50307,
|
special_tokens_map.json
CHANGED
@@ -108,26 +108,8 @@
|
|
108 |
"<|nocaptions|>",
|
109 |
"<|notimestamps|>"
|
110 |
],
|
111 |
-
"bos_token":
|
112 |
-
|
113 |
-
"lstrip": false,
|
114 |
-
"normalized": true,
|
115 |
-
"rstrip": false,
|
116 |
-
"single_word": false
|
117 |
-
},
|
118 |
-
"eos_token": {
|
119 |
-
"content": "<|endoftext|>",
|
120 |
-
"lstrip": false,
|
121 |
-
"normalized": true,
|
122 |
-
"rstrip": false,
|
123 |
-
"single_word": false
|
124 |
-
},
|
125 |
"pad_token": "<|endoftext|>",
|
126 |
-
"unk_token":
|
127 |
-
"content": "<|endoftext|>",
|
128 |
-
"lstrip": false,
|
129 |
-
"normalized": true,
|
130 |
-
"rstrip": false,
|
131 |
-
"single_word": false
|
132 |
-
}
|
133 |
}
|
|
|
108 |
"<|nocaptions|>",
|
109 |
"<|notimestamps|>"
|
110 |
],
|
111 |
+
"bos_token": "<|endoftext|>",
|
112 |
+
"eos_token": "<|endoftext|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
"pad_token": "<|endoftext|>",
|
114 |
+
"unk_token": "<|endoftext|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
}
|
tokenizer_config.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|