nielsr HF staff commited on
Commit
4015ee2
1 Parent(s): dbf5333

Add tokenizer files

Browse files
config.json CHANGED
@@ -3,9 +3,9 @@
3
  "CanineModel"
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
- "bos_token_id": 0,
7
  "downsampling_rate": 4,
8
- "eos_token_id": 2,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
@@ -19,7 +19,7 @@
19
  "num_hash_buckets": 16384,
20
  "num_hash_functions": 8,
21
  "num_hidden_layers": 12,
22
- "pad_token_id": 1,
23
  "transformers_version": "4.7.0.dev0",
24
  "type_vocab_size": 16,
25
  "upsampling_kernel_size": 4,
 
3
  "CanineModel"
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
+ "bos_token_id": 57344,
7
  "downsampling_rate": 4,
8
+ "eos_token_id": 57345,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
 
19
  "num_hash_buckets": 16384,
20
  "num_hash_functions": 8,
21
  "num_hidden_layers": 12,
22
+ "pad_token_id": 0,
23
  "transformers_version": "4.7.0.dev0",
24
  "type_vocab_size": 16,
25
  "upsampling_kernel_size": 4,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6086b787010105aee1c8c2bc3048271b58c81adc0021d898bc2830b271556301
3
  size 528561767
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2828cdf64dc63f7de96cab8a952395245ac3e011223b9bcd5e4f3679b810ae7a
3
  size 528561767
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {}
 
1
+ {"bos_token": {"content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": {"content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "pad_token": {"content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "cls_token": {"content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}}
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {}
 
1
+ {"bos_token": {"content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sep_token": {"content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "cls_token": {"content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048}