ming030890 commited on
Commit
3011db9
1 Parent(s): b7ddfa3

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -7
  2. tokenizer.json +8 -69
  3. tokenizer_config.json +0 -16
special_tokens_map.json CHANGED
@@ -15,13 +15,7 @@
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
- "sep_token": {
19
- "content": "</s>",
20
- "lstrip": false,
21
- "normalized": false,
22
- "rstrip": false,
23
- "single_word": false
24
- },
25
  "unk_token": {
26
  "content": "<unk>",
27
  "lstrip": false,
 
15
  "rstrip": false,
16
  "single_word": false
17
  },
18
+ "sep_token": "</s>",
 
 
 
 
 
 
19
  "unk_token": {
20
  "content": "<unk>",
21
  "lstrip": false,
tokenizer.json CHANGED
@@ -17,15 +17,6 @@
17
  "normalized": false,
18
  "special": true
19
  },
20
- {
21
- "id": 1,
22
- "content": "<s>",
23
- "single_word": false,
24
- "lstrip": false,
25
- "rstrip": false,
26
- "normalized": false,
27
- "special": true
28
- },
29
  {
30
  "id": 2,
31
  "content": "</s>",
@@ -35,15 +26,6 @@
35
  "normalized": false,
36
  "special": true
37
  },
38
- {
39
- "id": 3,
40
- "content": "<mask>",
41
- "single_word": false,
42
- "lstrip": false,
43
- "rstrip": false,
44
- "normalized": false,
45
- "special": true
46
- },
47
  {
48
  "id": 15999,
49
  "content": "<pad>",
@@ -82,58 +64,15 @@
82
  "split": true
83
  },
84
  "post_processor": {
85
- "type": "TemplateProcessing",
86
- "single": [
87
- {
88
- "Sequence": {
89
- "id": "A",
90
- "type_id": 0
91
- }
92
- },
93
- {
94
- "SpecialToken": {
95
- "id": "</s>",
96
- "type_id": 0
97
- }
98
- }
99
  ],
100
- "pair": [
101
- {
102
- "Sequence": {
103
- "id": "A",
104
- "type_id": 0
105
- }
106
- },
107
- {
108
- "SpecialToken": {
109
- "id": "</s>",
110
- "type_id": 0
111
- }
112
- },
113
- {
114
- "Sequence": {
115
- "id": "B",
116
- "type_id": 0
117
- }
118
- },
119
- {
120
- "SpecialToken": {
121
- "id": "</s>",
122
- "type_id": 0
123
- }
124
- }
125
- ],
126
- "special_tokens": {
127
- "</s>": {
128
- "id": "</s>",
129
- "ids": [
130
- 2
131
- ],
132
- "tokens": [
133
- "</s>"
134
- ]
135
- }
136
- }
137
  },
138
  "decoder": {
139
  "type": "Metaspace",
 
17
  "normalized": false,
18
  "special": true
19
  },
 
 
 
 
 
 
 
 
 
20
  {
21
  "id": 2,
22
  "content": "</s>",
 
26
  "normalized": false,
27
  "special": true
28
  },
 
 
 
 
 
 
 
 
 
29
  {
30
  "id": 15999,
31
  "content": "<pad>",
 
64
  "split": true
65
  },
66
  "post_processor": {
67
+ "type": "BertProcessing",
68
+ "sep": [
69
+ "<s>",
70
+ 1
 
 
 
 
 
 
 
 
 
 
71
  ],
72
+ "cls": [
73
+ "</s>",
74
+ 2
75
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  },
77
  "decoder": {
78
  "type": "Metaspace",
tokenizer_config.json CHANGED
@@ -9,14 +9,6 @@
9
  "single_word": false,
10
  "special": true
11
  },
12
- "1": {
13
- "content": "<s>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
  "2": {
21
  "content": "</s>",
22
  "lstrip": false,
@@ -25,14 +17,6 @@
25
  "single_word": false,
26
  "special": true
27
  },
28
- "3": {
29
- "content": "<mask>",
30
- "lstrip": false,
31
- "normalized": false,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
  "15999": {
37
  "content": "<pad>",
38
  "lstrip": false,
 
9
  "single_word": false,
10
  "special": true
11
  },
 
 
 
 
 
 
 
 
12
  "2": {
13
  "content": "</s>",
14
  "lstrip": false,
 
17
  "single_word": false,
18
  "special": true
19
  },
 
 
 
 
 
 
 
 
20
  "15999": {
21
  "content": "<pad>",
22
  "lstrip": false,