huseinzol05 commited on
Commit
03201a6
1 Parent(s): 3db598d

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -20,7 +20,7 @@
20
  "single_word": false
21
  },
22
  "pad_token": {
23
- "content": "<pad>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
 
20
  "single_word": false
21
  },
22
  "pad_token": {
23
+ "content": "<unk>",
24
  "lstrip": false,
25
  "normalized": false,
26
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 20480,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
tokenizer_config.json CHANGED
@@ -40,19 +40,15 @@
40
  "<unk>"
41
  ],
42
  "bos_token": "<s>",
43
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
44
  "clean_up_tokenization_spaces": true,
45
  "eos_token": "</s>",
46
- "max_length": 20480,
47
  "model_input_names": [
48
  "input_ids",
49
  "attention_mask"
50
  ],
51
  "model_max_length": 1000000000000000019884624838656,
52
- "pad_token": "<pad>",
53
- "stride": 0,
54
  "tokenizer_class": "PreTrainedTokenizerFast",
55
- "truncation_side": "right",
56
- "truncation_strategy": "longest_first",
57
  "unk_token": "<unk>"
58
  }
 
40
  "<unk>"
41
  ],
42
  "bos_token": "<s>",
43
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
44
  "clean_up_tokenization_spaces": true,
45
  "eos_token": "</s>",
 
46
  "model_input_names": [
47
  "input_ids",
48
  "attention_mask"
49
  ],
50
  "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "<unk>",
 
52
  "tokenizer_class": "PreTrainedTokenizerFast",
 
 
53
  "unk_token": "<unk>"
54
  }