ankur310794
commited on
Commit
•
4e4dc5e
1
Parent(s):
dd9ce62
Update from
Browse files- added_tokens.json +1 -1
- tokenizer_config.json +1 -1
added_tokens.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"``": 30524, "<table>": 30536, "<h2>": 30531, "</td>": 30551, "</dd>": 30541, "<td>": 30537, "--": 30526, "</ol>": 30548, "<ul>": 30540, "<h3>": 30532, "</tr>": 30553, "<h1>": 30530, "</h3>": 30546, "</ul>": 30554, "<th>": 30538, "<dl>": 30528, "</dt>": 30543, "''": 30525, "<dt>": 30529, "</p>": 30549, "<p>": 30535, "<li>": 30533, "<dd>": 30527, "</li>": 30547, "</h1>": 30544, "<ol>": 30534, "</table>": 30550, "</dl>": 30542, "</th>": 30552, "<tr>": 30539, "</h2>": 30545, "td_colspan": 30522, "th_colspan": 30523}
|
tokenizer_config.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"do_lower_case": true, "
|
|
|
1 |
+
{"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "bert-large-uncased"}
|