perlthoughts commited on
Commit
9476abb
1 Parent(s): 492c3f8

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "<|end_of_turn|>": 32000,
3
- "<|pad_0|>": 32001
4
  }
 
1
  {
2
+ "<|im_end|>": 32000,
3
+ "<|im_start|>": 32001
4
  }
special_tokens_map.json CHANGED
@@ -2,9 +2,7 @@
2
  "additional_special_tokens": [
3
  "<unk>",
4
  "<s>",
5
- "</s>",
6
- "<|end_of_turn|>",
7
- "<|pad_0|>"
8
  ],
9
  "bos_token": {
10
  "content": "<s>",
@@ -14,7 +12,7 @@
14
  "single_word": false
15
  },
16
  "eos_token": {
17
- "content": "<|end_of_turn|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
2
  "additional_special_tokens": [
3
  "<unk>",
4
  "<s>",
5
+ "</s>"
 
 
6
  ],
7
  "bos_token": {
8
  "content": "<s>",
 
12
  "single_word": false
13
  },
14
  "eos_token": {
15
+ "content": "</s>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
tokenizer.json CHANGED
@@ -32,7 +32,7 @@
32
  },
33
  {
34
  "id": 32000,
35
- "content": "<|end_of_turn|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
@@ -41,7 +41,7 @@
41
  },
42
  {
43
  "id": 32001,
44
- "content": "<|pad_0|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
 
32
  },
33
  {
34
  "id": 32000,
35
+ "content": "<|im_end|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
 
41
  },
42
  {
43
  "id": 32001,
44
+ "content": "<|im_start|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -25,7 +25,7 @@
25
  "special": true
26
  },
27
  "32000": {
28
- "content": "<|end_of_turn|>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
@@ -33,7 +33,7 @@
33
  "special": true
34
  },
35
  "32001": {
36
- "content": "<|pad_0|>",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
@@ -44,18 +44,17 @@
44
  "additional_special_tokens": [
45
  "<unk>",
46
  "<s>",
47
- "</s>",
48
- "<|end_of_turn|>",
49
- "<|pad_0|>"
50
  ],
51
  "bos_token": "<s>",
52
  "clean_up_tokenization_spaces": false,
53
- "device_map": "auto",
54
- "eos_token": "<|end_of_turn|>",
 
 
55
  "legacy": true,
56
- "model_max_length": 16384,
57
  "pad_token": null,
58
- "padding_side": "right",
59
  "sp_model_kwargs": {},
60
  "spaces_between_special_tokens": false,
61
  "tokenizer_class": "LlamaTokenizer",
 
25
  "special": true
26
  },
27
  "32000": {
28
+ "content": "<|im_end|>",
29
  "lstrip": false,
30
  "normalized": false,
31
  "rstrip": false,
 
33
  "special": true
34
  },
35
  "32001": {
36
+ "content": "<|im_start|>",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
 
44
  "additional_special_tokens": [
45
  "<unk>",
46
  "<s>",
47
+ "</s>"
 
 
48
  ],
49
  "bos_token": "<s>",
50
  "clean_up_tokenization_spaces": false,
51
+ "device_map": {
52
+ "": "cuda"
53
+ },
54
+ "eos_token": "</s>",
55
  "legacy": true,
56
+ "model_max_length": 1000000000000000019884624838656,
57
  "pad_token": null,
 
58
  "sp_model_kwargs": {},
59
  "spaces_between_special_tokens": false,
60
  "tokenizer_class": "LlamaTokenizer",