perlthoughts commited on
Commit
ae20703
1 Parent(s): 5e572c2

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +1 -8
  2. tokenizer.json +0 -63
  3. tokenizer_config.json +0 -56
added_tokens.json CHANGED
@@ -1,11 +1,4 @@
1
  {
2
- "<|assistant|>": 32003,
3
  "<|end_of_turn|>": 32000,
4
- "<|im_end|>": 32006,
5
- "<|im_start|>": 32007,
6
- "<|pad_0|>": 32001,
7
- "<|prompter|>": 32005,
8
- "<|system|>": 32004,
9
- "<|user|>": 32002,
10
- "[PAD]": 32008
11
  }
 
1
  {
 
2
  "<|end_of_turn|>": 32000,
3
+ "<|pad_0|>": 32001
 
 
 
 
 
 
4
  }
tokenizer.json CHANGED
@@ -47,69 +47,6 @@
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
50
- },
51
- {
52
- "id": 32002,
53
- "content": "<|user|>",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
- },
60
- {
61
- "id": 32003,
62
- "content": "<|assistant|>",
63
- "single_word": false,
64
- "lstrip": false,
65
- "rstrip": false,
66
- "normalized": false,
67
- "special": true
68
- },
69
- {
70
- "id": 32004,
71
- "content": "<|system|>",
72
- "single_word": false,
73
- "lstrip": false,
74
- "rstrip": false,
75
- "normalized": false,
76
- "special": true
77
- },
78
- {
79
- "id": 32005,
80
- "content": "<|prompter|>",
81
- "single_word": false,
82
- "lstrip": false,
83
- "rstrip": false,
84
- "normalized": false,
85
- "special": true
86
- },
87
- {
88
- "id": 32006,
89
- "content": "<|im_end|>",
90
- "single_word": false,
91
- "lstrip": false,
92
- "rstrip": false,
93
- "normalized": false,
94
- "special": true
95
- },
96
- {
97
- "id": 32007,
98
- "content": "<|im_start|>",
99
- "single_word": false,
100
- "lstrip": false,
101
- "rstrip": false,
102
- "normalized": false,
103
- "special": true
104
- },
105
- {
106
- "id": 32008,
107
- "content": "[PAD]",
108
- "single_word": false,
109
- "lstrip": false,
110
- "rstrip": false,
111
- "normalized": false,
112
- "special": true
113
  }
114
  ],
115
  "normalizer": {
 
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "normalizer": {
tokenizer_config.json CHANGED
@@ -39,62 +39,6 @@
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
- },
43
- "32002": {
44
- "content": "<|user|>",
45
- "lstrip": false,
46
- "normalized": false,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": true
50
- },
51
- "32003": {
52
- "content": "<|assistant|>",
53
- "lstrip": false,
54
- "normalized": false,
55
- "rstrip": false,
56
- "single_word": false,
57
- "special": true
58
- },
59
- "32004": {
60
- "content": "<|system|>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false,
65
- "special": true
66
- },
67
- "32005": {
68
- "content": "<|prompter|>",
69
- "lstrip": false,
70
- "normalized": false,
71
- "rstrip": false,
72
- "single_word": false,
73
- "special": true
74
- },
75
- "32006": {
76
- "content": "<|im_end|>",
77
- "lstrip": false,
78
- "normalized": false,
79
- "rstrip": false,
80
- "single_word": false,
81
- "special": true
82
- },
83
- "32007": {
84
- "content": "<|im_start|>",
85
- "lstrip": false,
86
- "normalized": false,
87
- "rstrip": false,
88
- "single_word": false,
89
- "special": true
90
- },
91
- "32008": {
92
- "content": "[PAD]",
93
- "lstrip": false,
94
- "normalized": false,
95
- "rstrip": false,
96
- "single_word": false,
97
- "special": true
98
  }
99
  },
100
  "additional_special_tokens": [
 
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
  "additional_special_tokens": [