baskra commited on
Commit
771dea5
·
verified ·
1 Parent(s): 78c8b80

Upload tokenizer

Browse files
Files changed (3) hide show
  1. added_tokens.json +13 -12
  2. tokenizer.json +21 -12
  3. tokenizer_config.json +20 -12
added_tokens.json CHANGED
@@ -1,17 +1,18 @@
1
  {
2
- "<ADULT>": 50273,
3
- "<BLACK>": 50267,
 
4
  "<CHILD>": 50274,
5
- "<HISPANIC>": 50278,
6
- "<MAN>": 50279,
7
- "<MIDDLE_AGED>": 50272,
8
- "<NATIVE_AMERICAN>": 50268,
9
- "<NON_BINARY>": 50269,
10
- "<PACIFIC_ISLANDER>": 50271,
11
  "<PERT_SEP>": 50266,
12
- "<SENIOR>": 50270,
13
  "<SEP>": 50265,
14
- "<WHITE>": 50277,
15
- "<WOMAN>": 50275,
16
- "<YOUNG>": 50276
17
  }
 
1
  {
2
+ "<ADULT>": 50276,
3
+ "<ASIAN>": 50268,
4
+ "<BLACK>": 50272,
5
  "<CHILD>": 50274,
6
+ "<HISPANIC>": 50277,
7
+ "<MAN>": 50267,
8
+ "<MIDDLE_AGED>": 50275,
9
+ "<NATIVE_AMERICAN>": 50273,
10
+ "<NON_BINARY>": 50280,
11
+ "<PACIFIC_ISLANDER>": 50269,
12
  "<PERT_SEP>": 50266,
13
+ "<SENIOR>": 50278,
14
  "<SEP>": 50265,
15
+ "<WHITE>": 50270,
16
+ "<WOMAN>": 50271,
17
+ "<YOUNG>": 50279
18
  }
tokenizer.json CHANGED
@@ -68,7 +68,7 @@
68
  },
69
  {
70
  "id": 50267,
71
- "content": "<BLACK>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
@@ -77,7 +77,7 @@
77
  },
78
  {
79
  "id": 50268,
80
- "content": "<NATIVE_AMERICAN>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
@@ -86,7 +86,7 @@
86
  },
87
  {
88
  "id": 50269,
89
- "content": "<NON_BINARY>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
@@ -95,7 +95,7 @@
95
  },
96
  {
97
  "id": 50270,
98
- "content": "<SENIOR>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
@@ -104,7 +104,7 @@
104
  },
105
  {
106
  "id": 50271,
107
- "content": "<PACIFIC_ISLANDER>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  },
114
  {
115
  "id": 50272,
116
- "content": "<MIDDLE_AGED>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
@@ -122,7 +122,7 @@
122
  },
123
  {
124
  "id": 50273,
125
- "content": "<ADULT>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
@@ -140,7 +140,7 @@
140
  },
141
  {
142
  "id": 50275,
143
- "content": "<WOMAN>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
@@ -149,7 +149,7 @@
149
  },
150
  {
151
  "id": 50276,
152
- "content": "<YOUNG>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
@@ -158,7 +158,7 @@
158
  },
159
  {
160
  "id": 50277,
161
- "content": "<WHITE>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
@@ -167,7 +167,7 @@
167
  },
168
  {
169
  "id": 50278,
170
- "content": "<HISPANIC>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
@@ -176,7 +176,16 @@
176
  },
177
  {
178
  "id": 50279,
179
- "content": "<MAN>",
 
 
 
 
 
 
 
 
 
180
  "single_word": false,
181
  "lstrip": false,
182
  "rstrip": false,
 
68
  },
69
  {
70
  "id": 50267,
71
+ "content": "<MAN>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
 
77
  },
78
  {
79
  "id": 50268,
80
+ "content": "<ASIAN>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
 
86
  },
87
  {
88
  "id": 50269,
89
+ "content": "<PACIFIC_ISLANDER>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
 
95
  },
96
  {
97
  "id": 50270,
98
+ "content": "<WHITE>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
 
104
  },
105
  {
106
  "id": 50271,
107
+ "content": "<WOMAN>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
 
113
  },
114
  {
115
  "id": 50272,
116
+ "content": "<BLACK>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
 
122
  },
123
  {
124
  "id": 50273,
125
+ "content": "<NATIVE_AMERICAN>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
 
140
  },
141
  {
142
  "id": 50275,
143
+ "content": "<MIDDLE_AGED>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
 
149
  },
150
  {
151
  "id": 50276,
152
+ "content": "<ADULT>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
 
158
  },
159
  {
160
  "id": 50277,
161
+ "content": "<HISPANIC>",
162
  "single_word": false,
163
  "lstrip": false,
164
  "rstrip": false,
 
167
  },
168
  {
169
  "id": 50278,
170
+ "content": "<SENIOR>",
171
  "single_word": false,
172
  "lstrip": false,
173
  "rstrip": false,
 
176
  },
177
  {
178
  "id": 50279,
179
+ "content": "<YOUNG>",
180
+ "single_word": false,
181
+ "lstrip": false,
182
+ "rstrip": false,
183
+ "normalized": false,
184
+ "special": true
185
+ },
186
+ {
187
+ "id": 50280,
188
+ "content": "<NON_BINARY>",
189
  "single_word": false,
190
  "lstrip": false,
191
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -58,7 +58,7 @@
58
  "special": true
59
  },
60
  "50267": {
61
- "content": "<BLACK>",
62
  "lstrip": false,
63
  "normalized": false,
64
  "rstrip": false,
@@ -66,7 +66,7 @@
66
  "special": true
67
  },
68
  "50268": {
69
- "content": "<NATIVE_AMERICAN>",
70
  "lstrip": false,
71
  "normalized": false,
72
  "rstrip": false,
@@ -74,7 +74,7 @@
74
  "special": true
75
  },
76
  "50269": {
77
- "content": "<NON_BINARY>",
78
  "lstrip": false,
79
  "normalized": false,
80
  "rstrip": false,
@@ -82,7 +82,7 @@
82
  "special": true
83
  },
84
  "50270": {
85
- "content": "<SENIOR>",
86
  "lstrip": false,
87
  "normalized": false,
88
  "rstrip": false,
@@ -90,7 +90,7 @@
90
  "special": true
91
  },
92
  "50271": {
93
- "content": "<PACIFIC_ISLANDER>",
94
  "lstrip": false,
95
  "normalized": false,
96
  "rstrip": false,
@@ -98,7 +98,7 @@
98
  "special": true
99
  },
100
  "50272": {
101
- "content": "<MIDDLE_AGED>",
102
  "lstrip": false,
103
  "normalized": false,
104
  "rstrip": false,
@@ -106,7 +106,7 @@
106
  "special": true
107
  },
108
  "50273": {
109
- "content": "<ADULT>",
110
  "lstrip": false,
111
  "normalized": false,
112
  "rstrip": false,
@@ -122,7 +122,7 @@
122
  "special": true
123
  },
124
  "50275": {
125
- "content": "<WOMAN>",
126
  "lstrip": false,
127
  "normalized": false,
128
  "rstrip": false,
@@ -130,7 +130,7 @@
130
  "special": true
131
  },
132
  "50276": {
133
- "content": "<YOUNG>",
134
  "lstrip": false,
135
  "normalized": false,
136
  "rstrip": false,
@@ -138,7 +138,7 @@
138
  "special": true
139
  },
140
  "50277": {
141
- "content": "<WHITE>",
142
  "lstrip": false,
143
  "normalized": false,
144
  "rstrip": false,
@@ -146,7 +146,7 @@
146
  "special": true
147
  },
148
  "50278": {
149
- "content": "<HISPANIC>",
150
  "lstrip": false,
151
  "normalized": false,
152
  "rstrip": false,
@@ -154,7 +154,15 @@
154
  "special": true
155
  },
156
  "50279": {
157
- "content": "<MAN>",
 
 
 
 
 
 
 
 
158
  "lstrip": false,
159
  "normalized": false,
160
  "rstrip": false,
 
58
  "special": true
59
  },
60
  "50267": {
61
+ "content": "<MAN>",
62
  "lstrip": false,
63
  "normalized": false,
64
  "rstrip": false,
 
66
  "special": true
67
  },
68
  "50268": {
69
+ "content": "<ASIAN>",
70
  "lstrip": false,
71
  "normalized": false,
72
  "rstrip": false,
 
74
  "special": true
75
  },
76
  "50269": {
77
+ "content": "<PACIFIC_ISLANDER>",
78
  "lstrip": false,
79
  "normalized": false,
80
  "rstrip": false,
 
82
  "special": true
83
  },
84
  "50270": {
85
+ "content": "<WHITE>",
86
  "lstrip": false,
87
  "normalized": false,
88
  "rstrip": false,
 
90
  "special": true
91
  },
92
  "50271": {
93
+ "content": "<WOMAN>",
94
  "lstrip": false,
95
  "normalized": false,
96
  "rstrip": false,
 
98
  "special": true
99
  },
100
  "50272": {
101
+ "content": "<BLACK>",
102
  "lstrip": false,
103
  "normalized": false,
104
  "rstrip": false,
 
106
  "special": true
107
  },
108
  "50273": {
109
+ "content": "<NATIVE_AMERICAN>",
110
  "lstrip": false,
111
  "normalized": false,
112
  "rstrip": false,
 
122
  "special": true
123
  },
124
  "50275": {
125
+ "content": "<MIDDLE_AGED>",
126
  "lstrip": false,
127
  "normalized": false,
128
  "rstrip": false,
 
130
  "special": true
131
  },
132
  "50276": {
133
+ "content": "<ADULT>",
134
  "lstrip": false,
135
  "normalized": false,
136
  "rstrip": false,
 
138
  "special": true
139
  },
140
  "50277": {
141
+ "content": "<HISPANIC>",
142
  "lstrip": false,
143
  "normalized": false,
144
  "rstrip": false,
 
146
  "special": true
147
  },
148
  "50278": {
149
+ "content": "<SENIOR>",
150
  "lstrip": false,
151
  "normalized": false,
152
  "rstrip": false,
 
154
  "special": true
155
  },
156
  "50279": {
157
+ "content": "<YOUNG>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "50280": {
165
+ "content": "<NON_BINARY>",
166
  "lstrip": false,
167
  "normalized": false,
168
  "rstrip": false,