finiteautomata commited on
Commit
d9fc116
1 Parent(s): 4738326

improve tokenization

Browse files
added_tokens.json CHANGED
@@ -1 +1 @@
1
- {"[EMOJI]": 31004, "[USER]": 31002, "[HASHTAG]": 31003}
1
+ {"url": 31003, "@usuario": 31002, "emoji": 31005, "hashtag": 31004}
config.json CHANGED
@@ -32,5 +32,5 @@
32
  "transformers_version": "4.6.1",
33
  "type_vocab_size": 2,
34
  "use_cache": true,
35
- "vocab_size": 31005
36
  }
32
  "transformers_version": "4.6.1",
33
  "type_vocab_size": 2,
34
  "use_cache": true,
35
+ "vocab_size": 31006
36
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae62aae323fb95e9a89a10783c6d54565251135d6e5e3ebfbaaa1cede9b26c8d
3
- size 439508881
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6301fc83af4c375b6b6c1d8916ec87bd853c2715250b91645da1e859ba6e000a
3
+ size 439512342
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "additional_special_tokens": ["[USER]", "[HASHTAG]", "[EMOJI]"]}
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff