m-ric HF staff commited on
Commit
9f3e00e
1 Parent(s): 01d3610

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,4 +1,3 @@
1
  {
2
- "<image>": 100353,
3
  "<pad>": 100352
4
  }
 
1
  {
 
2
  "<pad>": 100352
3
  }
special_tokens_map.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
- "image_token": "<image>",
3
- "pad_token": "<pad>",
 
 
 
 
 
4
  "unk_token": {
5
  "content": "<unk>",
6
  "lstrip": false,
 
1
  {
2
+ "pad_token": {
3
+ "content": "<pad>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
  "unk_token": {
10
  "content": "<unk>",
11
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4cab625c7efc4eea85b5cead9707d897291df9e02749ab16988086218c6589b
3
- size 11091666
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02702cce6c4de786d52a3dc624b39e86134c159b7490ea30630739c6f723e7f8
3
+ size 11091481
tokenizer_config.json CHANGED
@@ -81,7 +81,7 @@
81
  "normalized": false,
82
  "rstrip": false,
83
  "single_word": false,
84
- "special": false
85
  },
86
  "10": {
87
  "content": "<|reserved008|>",
@@ -4938,25 +4938,13 @@
4938
  "rstrip": false,
4939
  "single_word": false,
4940
  "special": true
4941
- },
4942
- "100353": {
4943
- "content": "<image>",
4944
- "lstrip": false,
4945
- "normalized": false,
4946
- "rstrip": false,
4947
- "single_word": false,
4948
- "special": true
4949
  }
4950
  },
4951
  "bos_token": null,
4952
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}{% elif message['content'] is iterable %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<fim_prefix><|img|><fim_suffix>{% endif %}{% endfor %}{% endif %}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
4953
  "clean_up_tokenization_spaces": false,
4954
  "eos_token": null,
4955
- "extra_special_tokens": {
4956
- "image_token": "<image>",
4957
- "pad_token": "<pad>"
4958
- },
4959
- "image_token": "<image>",
4960
  "legacy": true,
4961
  "model_max_length": 1000000000000000019884624838656,
4962
  "pad_token": "<pad>",
 
81
  "normalized": false,
82
  "rstrip": false,
83
  "single_word": false,
84
+ "special": true
85
  },
86
  "10": {
87
  "content": "<|reserved008|>",
 
4938
  "rstrip": false,
4939
  "single_word": false,
4940
  "special": true
 
 
 
 
 
 
 
 
4941
  }
4942
  },
4943
  "bos_token": null,
4944
  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}{% elif message['content'] is iterable %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<fim_prefix><|img|><fim_suffix>{% endif %}{% endfor %}{% endif %}<|im_end|>\n{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
4945
  "clean_up_tokenization_spaces": false,
4946
  "eos_token": null,
4947
+ "extra_special_tokens": {},
 
 
 
 
4948
  "legacy": true,
4949
  "model_max_length": 1000000000000000019884624838656,
4950
  "pad_token": "<pad>",