i4never commited on
Commit
d2b2f67
1 Parent(s): 3e674be

update tigerbot-7b-base-v3-tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "\n\n### Instruction:\n": 60512,
3
+ "\n\n### Response:\n": 60513,
4
+ "<pad>": 60514
5
+ }
special_tokens_map.json CHANGED
@@ -1,4 +1,8 @@
1
  {
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
@@ -13,6 +17,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
16
  "unk_token": {
17
  "content": "<unk>",
18
  "lstrip": false,
 
1
  {
2
+ "additional_special_tokens": [
3
+ "\n\n### Instruction:\n",
4
+ "\n\n### Response:\n"
5
+ ],
6
  "bos_token": {
7
  "content": "<s>",
8
  "lstrip": false,
 
17
  "rstrip": false,
18
  "single_word": false
19
  },
20
+ "pad_token": "<pad>",
21
  "unk_token": {
22
  "content": "<unk>",
23
  "lstrip": false,
tokenizer.json CHANGED
@@ -29,6 +29,33 @@
29
  "rstrip": false,
30
  "normalized": true,
31
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
@@ -51,12 +78,6 @@
51
  "post_processor": {
52
  "type": "TemplateProcessing",
53
  "single": [
54
- {
55
- "SpecialToken": {
56
- "id": "<s>",
57
- "type_id": 0
58
- }
59
- },
60
  {
61
  "Sequence": {
62
  "id": "A",
@@ -65,24 +86,12 @@
65
  }
66
  ],
67
  "pair": [
68
- {
69
- "SpecialToken": {
70
- "id": "<s>",
71
- "type_id": 0
72
- }
73
- },
74
  {
75
  "Sequence": {
76
  "id": "A",
77
  "type_id": 0
78
  }
79
  },
80
- {
81
- "SpecialToken": {
82
- "id": "<s>",
83
- "type_id": 1
84
- }
85
- },
86
  {
87
  "Sequence": {
88
  "id": "B",
@@ -90,17 +99,7 @@
90
  }
91
  }
92
  ],
93
- "special_tokens": {
94
- "<s>": {
95
- "id": "<s>",
96
- "ids": [
97
- 1
98
- ],
99
- "tokens": [
100
- "<s>"
101
- ]
102
- }
103
- }
104
  },
105
  "decoder": {
106
  "type": "Sequence",
 
29
  "rstrip": false,
30
  "normalized": true,
31
  "special": true
32
+ },
33
+ {
34
+ "id": 60512,
35
+ "content": "\n\n### Instruction:\n",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 60513,
44
+ "content": "\n\n### Response:\n",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 60514,
53
+ "content": "<pad>",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
  }
60
  ],
61
  "normalizer": {
 
78
  "post_processor": {
79
  "type": "TemplateProcessing",
80
  "single": [
 
 
 
 
 
 
81
  {
82
  "Sequence": {
83
  "id": "A",
 
86
  }
87
  ],
88
  "pair": [
 
 
 
 
 
 
89
  {
90
  "Sequence": {
91
  "id": "A",
92
  "type_id": 0
93
  }
94
  },
 
 
 
 
 
 
95
  {
96
  "Sequence": {
97
  "id": "B",
 
99
  }
100
  }
101
  ],
102
+ "special_tokens": {}
 
 
 
 
 
 
 
 
 
 
103
  },
104
  "decoder": {
105
  "type": "Sequence",
tokenizer_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
  "bos_token": {
5
  "__type": "AddedToken",
6
  "content": "<s>",
@@ -30,5 +28,6 @@
30
  "normalized": true,
31
  "rstrip": false,
32
  "single_word": false
33
- }
 
34
  }
 
1
  {
 
 
2
  "bos_token": {
3
  "__type": "AddedToken",
4
  "content": "<s>",
 
28
  "normalized": true,
29
  "rstrip": false,
30
  "single_word": false
31
+ },
32
+ "use_fast": true
33
  }