sreejith8100 commited on
Commit
84c276a
1 Parent(s): 714758c

End of training

Browse files
Files changed (5) hide show
  1. README.md +7 -7
  2. added_tokens.json +54 -16
  3. special_tokens_map.json +283 -17
  4. tokenizer.json +358 -16
  5. tokenizer_config.json +375 -33
README.md CHANGED
@@ -17,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on the imagefolder dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 2.5746
21
 
22
  ## Model description
23
 
@@ -48,12 +48,12 @@ The following hyperparameters were used during training:
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
- | 6.9386 | 1.0 | 40 | 6.1254 |
52
- | 3.8907 | 2.0 | 80 | 3.3437 |
53
- | 3.0701 | 3.0 | 120 | 2.8495 |
54
- | 2.6156 | 4.0 | 160 | 2.7129 |
55
- | 2.3074 | 5.0 | 200 | 2.6461 |
56
- | 2.2406 | 6.0 | 240 | 2.5746 |
57
 
58
 
59
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on the imagefolder dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.5870
21
 
22
  ## Model description
23
 
 
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
+ | 5.0795 | 1.0 | 69 | 4.7780 |
52
+ | 3.0477 | 2.0 | 138 | 2.4758 |
53
+ | 1.3046 | 3.0 | 207 | 1.9672 |
54
+ | 1.1231 | 4.0 | 276 | 1.6499 |
55
+ | 1.8067 | 5.0 | 345 | 1.5982 |
56
+ | 2.1638 | 6.0 | 414 | 1.5870 |
57
 
58
 
59
  ### Framework versions
added_tokens.json CHANGED
@@ -1,21 +1,59 @@
1
  {
2
- "</s_address>": 57530,
3
- "</s_father_name>": 57537,
4
- "</s_first_name>": 57534,
5
- "</s_last_name>": 57539,
6
- "</s_mother_name>": 57531,
7
- "</s_place_of_birth>": 57540,
8
- "</s_random_number>": 57532,
9
- "</s_title>": 57536,
10
- "<s_address>": 57538,
11
- "<s_father_name>": 57526,
12
- "<s_first_name>": 57527,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "<s_iitcdip>": 57523,
14
- "<s_last_name>": 57535,
15
- "<s_mother_name>": 57525,
16
- "<s_place_of_birth>": 57529,
17
- "<s_random_number>": 57528,
 
 
 
 
 
 
 
18
  "<s_synthdog>": 57524,
19
- "<s_title>": 57533,
20
  "<sep/>": 57522
21
  }
 
1
  {
2
+ "</s_DOB>": 57570,
3
+ "</s_child_first_name>": 57532,
4
+ "</s_child_last_name>": 57574,
5
+ "</s_child_middle_name>": 57565,
6
+ "</s_city>": 57535,
7
+ "</s_father_age>": 57543,
8
+ "</s_father_birthplace>": 57563,
9
+ "</s_father_first_name>": 57530,
10
+ "</s_father_last_name>": 57562,
11
+ "</s_father_middle_name>": 57578,
12
+ "</s_father_nationality>": 57553,
13
+ "</s_father_nationalty>": 57555,
14
+ "</s_father_occupaation>": 57568,
15
+ "</s_father_occupation>": 57531,
16
+ "</s_father_race>": 57560,
17
+ "</s_father_religion>": 57528,
18
+ "</s_mother-nationality>": 57556,
19
+ "</s_mother_age>": 57554,
20
+ "</s_mother_birthplace>": 57552,
21
+ "</s_mother_first_name>": 57551,
22
+ "</s_mother_last_name>": 57548,
23
+ "</s_mother_middle_name>": 57558,
24
+ "</s_mother_nationality>": 57537,
25
+ "</s_mother_nationalty>": 57566,
26
+ "</s_mother_race>": 57534,
27
+ "</s_mother_religion>": 57575,
28
+ "</s_province>": 57576,
29
+ "<s_DOB>": 57546,
30
+ "<s_child_first_name>": 57557,
31
+ "<s_child_last_name>": 57544,
32
+ "<s_child_middle_name>": 57545,
33
+ "<s_city>": 57550,
34
+ "<s_father_age>": 57577,
35
+ "<s_father_birthplace>": 57573,
36
+ "<s_father_first_name>": 57538,
37
+ "<s_father_last_name>": 57527,
38
+ "<s_father_middle_name>": 57559,
39
+ "<s_father_nationality>": 57539,
40
+ "<s_father_nationalty>": 57571,
41
+ "<s_father_occupaation>": 57541,
42
+ "<s_father_occupation>": 57561,
43
+ "<s_father_race>": 57549,
44
+ "<s_father_religion>": 57564,
45
  "<s_iitcdip>": 57523,
46
+ "<s_mother-nationality>": 57540,
47
+ "<s_mother_age>": 57567,
48
+ "<s_mother_birthplace>": 57547,
49
+ "<s_mother_first_name>": 57572,
50
+ "<s_mother_last_name>": 57526,
51
+ "<s_mother_middle_name>": 57542,
52
+ "<s_mother_nationality>": 57529,
53
+ "<s_mother_nationalty>": 57569,
54
+ "<s_mother_race>": 57533,
55
+ "<s_mother_religion>": 57536,
56
+ "<s_province>": 57525,
57
  "<s_synthdog>": 57524,
 
58
  "<sep/>": 57522
59
  }
special_tokens_map.json CHANGED
@@ -1,105 +1,189 @@
1
  {
2
  "additional_special_tokens": [
3
  {
4
- "content": "<s_mother_name>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
- "content": "<s_father_name>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
  {
18
- "content": "<s_first_name>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
  {
25
- "content": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
29
  "single_word": false
30
  },
31
  {
32
- "content": "<s_random_number>",
33
  "lstrip": false,
34
  "normalized": false,
35
  "rstrip": false,
36
  "single_word": false
37
  },
38
  {
39
- "content": "<s_place_of_birth>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
43
  "single_word": false
44
  },
45
  {
46
- "content": "</s_address>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false
51
  },
52
  {
53
- "content": "</s_mother_name>",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
57
  "single_word": false
58
  },
59
  {
60
- "content": "</s_random_number>",
61
  "lstrip": false,
62
  "normalized": false,
63
  "rstrip": false,
64
  "single_word": false
65
  },
66
  {
67
- "content": "<s_title>",
68
  "lstrip": false,
69
  "normalized": false,
70
  "rstrip": false,
71
  "single_word": false
72
  },
73
  {
74
- "content": "</s_first_name>",
75
  "lstrip": false,
76
  "normalized": false,
77
  "rstrip": false,
78
  "single_word": false
79
  },
80
  {
81
- "content": "<s_last_name>",
82
  "lstrip": false,
83
  "normalized": false,
84
  "rstrip": false,
85
  "single_word": false
86
  },
87
  {
88
- "content": "</s_title>",
89
  "lstrip": false,
90
  "normalized": false,
91
  "rstrip": false,
92
  "single_word": false
93
  },
94
  {
95
- "content": "</s_father_name>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
99
  "single_word": false
100
  },
101
  {
102
- "content": "<s_address>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
@@ -113,14 +197,196 @@
113
  "single_word": false
114
  },
115
  {
116
- "content": "</s_last_name>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
120
  "single_word": false
121
  },
122
  {
123
- "content": "</s_place_of_birth>",
124
  "lstrip": false,
125
  "normalized": false,
126
  "rstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
  {
4
+ "content": "<s_province>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
+ "content": "<s_mother_last_name>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
  {
18
+ "content": "<s_father_last_name>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
  {
25
+ "content": "</s_father_religion>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ {
32
+ "content": "<s_mother_nationality>",
33
+ "lstrip": false,
34
+ "normalized": false,
35
+ "rstrip": false,
36
+ "single_word": false
37
+ },
38
+ {
39
+ "content": "</s_father_first_name>",
40
+ "lstrip": false,
41
+ "normalized": false,
42
+ "rstrip": false,
43
+ "single_word": false
44
+ },
45
+ {
46
+ "content": "</s_father_occupation>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false
51
+ },
52
+ {
53
+ "content": "</s_child_first_name>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false
58
+ },
59
+ {
60
+ "content": "<s_mother_race>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false
65
+ },
66
+ {
67
+ "content": "</s_mother_race>",
68
  "lstrip": false,
69
  "normalized": false,
70
  "rstrip": false,
71
  "single_word": false
72
  },
73
  {
74
+ "content": "</s_city>",
75
  "lstrip": false,
76
  "normalized": false,
77
  "rstrip": false,
78
  "single_word": false
79
  },
80
  {
81
+ "content": "<s_mother_religion>",
82
  "lstrip": false,
83
  "normalized": false,
84
  "rstrip": false,
85
  "single_word": false
86
  },
87
  {
88
+ "content": "</s_mother_nationality>",
89
  "lstrip": false,
90
  "normalized": false,
91
  "rstrip": false,
92
  "single_word": false
93
  },
94
  {
95
+ "content": "<s_father_first_name>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
99
  "single_word": false
100
  },
101
  {
102
+ "content": "<s_father_nationality>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
106
  "single_word": false
107
  },
108
  {
109
+ "content": "<s_mother-nationality>",
110
  "lstrip": false,
111
  "normalized": false,
112
  "rstrip": false,
113
  "single_word": false
114
  },
115
  {
116
+ "content": "<s_father_occupaation>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
120
  "single_word": false
121
  },
122
  {
123
+ "content": "<s_mother_middle_name>",
124
  "lstrip": false,
125
  "normalized": false,
126
  "rstrip": false,
127
  "single_word": false
128
  },
129
  {
130
+ "content": "</s_father_age>",
131
  "lstrip": false,
132
  "normalized": false,
133
  "rstrip": false,
134
  "single_word": false
135
  },
136
  {
137
+ "content": "<s_child_last_name>",
138
  "lstrip": false,
139
  "normalized": false,
140
  "rstrip": false,
141
  "single_word": false
142
  },
143
  {
144
+ "content": "<s_child_middle_name>",
145
+ "lstrip": false,
146
+ "normalized": false,
147
+ "rstrip": false,
148
+ "single_word": false
149
+ },
150
+ {
151
+ "content": "<s_DOB>",
152
+ "lstrip": false,
153
+ "normalized": false,
154
+ "rstrip": false,
155
+ "single_word": false
156
+ },
157
+ {
158
+ "content": "<s_mother_birthplace>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false
163
+ },
164
+ {
165
+ "content": "</s_mother_last_name>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false
170
+ },
171
+ {
172
+ "content": "<s_father_race>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false
177
+ },
178
+ {
179
+ "content": "<s_city>",
180
+ "lstrip": false,
181
+ "normalized": false,
182
+ "rstrip": false,
183
+ "single_word": false
184
+ },
185
+ {
186
+ "content": "</s_mother_first_name>",
187
  "lstrip": false,
188
  "normalized": false,
189
  "rstrip": false,
 
197
  "single_word": false
198
  },
199
  {
200
+ "content": "</s_mother_birthplace>",
201
+ "lstrip": false,
202
+ "normalized": false,
203
+ "rstrip": false,
204
+ "single_word": false
205
+ },
206
+ {
207
+ "content": "</s_father_nationality>",
208
+ "lstrip": false,
209
+ "normalized": false,
210
+ "rstrip": false,
211
+ "single_word": false
212
+ },
213
+ {
214
+ "content": "</s_mother_age>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": false,
218
+ "single_word": false
219
+ },
220
+ {
221
+ "content": "</s_father_nationalty>",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false
226
+ },
227
+ {
228
+ "content": "</s_mother-nationality>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false
233
+ },
234
+ {
235
+ "content": "<s_child_first_name>",
236
+ "lstrip": false,
237
+ "normalized": false,
238
+ "rstrip": false,
239
+ "single_word": false
240
+ },
241
+ {
242
+ "content": "</s_mother_middle_name>",
243
+ "lstrip": false,
244
+ "normalized": false,
245
+ "rstrip": false,
246
+ "single_word": false
247
+ },
248
+ {
249
+ "content": "<s_father_middle_name>",
250
+ "lstrip": false,
251
+ "normalized": false,
252
+ "rstrip": false,
253
+ "single_word": false
254
+ },
255
+ {
256
+ "content": "</s_father_race>",
257
+ "lstrip": false,
258
+ "normalized": false,
259
+ "rstrip": false,
260
+ "single_word": false
261
+ },
262
+ {
263
+ "content": "<s_father_occupation>",
264
+ "lstrip": false,
265
+ "normalized": false,
266
+ "rstrip": false,
267
+ "single_word": false
268
+ },
269
+ {
270
+ "content": "</s_father_last_name>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": false,
274
+ "single_word": false
275
+ },
276
+ {
277
+ "content": "</s_father_birthplace>",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false
282
+ },
283
+ {
284
+ "content": "<s_father_religion>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false
289
+ },
290
+ {
291
+ "content": "</s_child_middle_name>",
292
+ "lstrip": false,
293
+ "normalized": false,
294
+ "rstrip": false,
295
+ "single_word": false
296
+ },
297
+ {
298
+ "content": "</s_mother_nationalty>",
299
+ "lstrip": false,
300
+ "normalized": false,
301
+ "rstrip": false,
302
+ "single_word": false
303
+ },
304
+ {
305
+ "content": "<s_mother_age>",
306
+ "lstrip": false,
307
+ "normalized": false,
308
+ "rstrip": false,
309
+ "single_word": false
310
+ },
311
+ {
312
+ "content": "</s>",
313
+ "lstrip": false,
314
+ "normalized": false,
315
+ "rstrip": false,
316
+ "single_word": false
317
+ },
318
+ {
319
+ "content": "</s_father_occupaation>",
320
+ "lstrip": false,
321
+ "normalized": false,
322
+ "rstrip": false,
323
+ "single_word": false
324
+ },
325
+ {
326
+ "content": "<s_mother_nationalty>",
327
+ "lstrip": false,
328
+ "normalized": false,
329
+ "rstrip": false,
330
+ "single_word": false
331
+ },
332
+ {
333
+ "content": "</s_DOB>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false
338
+ },
339
+ {
340
+ "content": "<s_father_nationalty>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false
345
+ },
346
+ {
347
+ "content": "<s_mother_first_name>",
348
+ "lstrip": false,
349
+ "normalized": false,
350
+ "rstrip": false,
351
+ "single_word": false
352
+ },
353
+ {
354
+ "content": "<s_father_birthplace>",
355
+ "lstrip": false,
356
+ "normalized": false,
357
+ "rstrip": false,
358
+ "single_word": false
359
+ },
360
+ {
361
+ "content": "</s_child_last_name>",
362
+ "lstrip": false,
363
+ "normalized": false,
364
+ "rstrip": false,
365
+ "single_word": false
366
+ },
367
+ {
368
+ "content": "</s_mother_religion>",
369
+ "lstrip": false,
370
+ "normalized": false,
371
+ "rstrip": false,
372
+ "single_word": false
373
+ },
374
+ {
375
+ "content": "</s_province>",
376
+ "lstrip": false,
377
+ "normalized": false,
378
+ "rstrip": false,
379
+ "single_word": false
380
+ },
381
+ {
382
+ "content": "<s_father_age>",
383
  "lstrip": false,
384
  "normalized": false,
385
  "rstrip": false,
386
  "single_word": false
387
  },
388
  {
389
+ "content": "</s_father_middle_name>",
390
  "lstrip": false,
391
  "normalized": false,
392
  "rstrip": false,
tokenizer.json CHANGED
@@ -91,7 +91,7 @@
91
  },
92
  {
93
  "id": 57525,
94
- "content": "<s_mother_name>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
@@ -100,7 +100,7 @@
100
  },
101
  {
102
  "id": 57526,
103
- "content": "<s_father_name>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
@@ -109,7 +109,7 @@
109
  },
110
  {
111
  "id": 57527,
112
- "content": "<s_first_name>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
@@ -118,7 +118,7 @@
118
  },
119
  {
120
  "id": 57528,
121
- "content": "<s_random_number>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
@@ -127,7 +127,7 @@
127
  },
128
  {
129
  "id": 57529,
130
- "content": "<s_place_of_birth>",
131
  "single_word": false,
132
  "lstrip": false,
133
  "rstrip": false,
@@ -136,7 +136,7 @@
136
  },
137
  {
138
  "id": 57530,
139
- "content": "</s_address>",
140
  "single_word": false,
141
  "lstrip": false,
142
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  },
146
  {
147
  "id": 57531,
148
- "content": "</s_mother_name>",
149
  "single_word": false,
150
  "lstrip": false,
151
  "rstrip": false,
@@ -154,7 +154,7 @@
154
  },
155
  {
156
  "id": 57532,
157
- "content": "</s_random_number>",
158
  "single_word": false,
159
  "lstrip": false,
160
  "rstrip": false,
@@ -163,7 +163,7 @@
163
  },
164
  {
165
  "id": 57533,
166
- "content": "<s_title>",
167
  "single_word": false,
168
  "lstrip": false,
169
  "rstrip": false,
@@ -172,7 +172,7 @@
172
  },
173
  {
174
  "id": 57534,
175
- "content": "</s_first_name>",
176
  "single_word": false,
177
  "lstrip": false,
178
  "rstrip": false,
@@ -181,7 +181,7 @@
181
  },
182
  {
183
  "id": 57535,
184
- "content": "<s_last_name>",
185
  "single_word": false,
186
  "lstrip": false,
187
  "rstrip": false,
@@ -190,7 +190,7 @@
190
  },
191
  {
192
  "id": 57536,
193
- "content": "</s_title>",
194
  "single_word": false,
195
  "lstrip": false,
196
  "rstrip": false,
@@ -199,7 +199,7 @@
199
  },
200
  {
201
  "id": 57537,
202
- "content": "</s_father_name>",
203
  "single_word": false,
204
  "lstrip": false,
205
  "rstrip": false,
@@ -208,7 +208,7 @@
208
  },
209
  {
210
  "id": 57538,
211
- "content": "<s_address>",
212
  "single_word": false,
213
  "lstrip": false,
214
  "rstrip": false,
@@ -217,7 +217,7 @@
217
  },
218
  {
219
  "id": 57539,
220
- "content": "</s_last_name>",
221
  "single_word": false,
222
  "lstrip": false,
223
  "rstrip": false,
@@ -226,7 +226,349 @@
226
  },
227
  {
228
  "id": 57540,
229
- "content": "</s_place_of_birth>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  "single_word": false,
231
  "lstrip": false,
232
  "rstrip": false,
 
91
  },
92
  {
93
  "id": 57525,
94
+ "content": "<s_province>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
 
100
  },
101
  {
102
  "id": 57526,
103
+ "content": "<s_mother_last_name>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
 
109
  },
110
  {
111
  "id": 57527,
112
+ "content": "<s_father_last_name>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
 
118
  },
119
  {
120
  "id": 57528,
121
+ "content": "</s_father_religion>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
 
127
  },
128
  {
129
  "id": 57529,
130
+ "content": "<s_mother_nationality>",
131
  "single_word": false,
132
  "lstrip": false,
133
  "rstrip": false,
 
136
  },
137
  {
138
  "id": 57530,
139
+ "content": "</s_father_first_name>",
140
  "single_word": false,
141
  "lstrip": false,
142
  "rstrip": false,
 
145
  },
146
  {
147
  "id": 57531,
148
+ "content": "</s_father_occupation>",
149
  "single_word": false,
150
  "lstrip": false,
151
  "rstrip": false,
 
154
  },
155
  {
156
  "id": 57532,
157
+ "content": "</s_child_first_name>",
158
  "single_word": false,
159
  "lstrip": false,
160
  "rstrip": false,
 
163
  },
164
  {
165
  "id": 57533,
166
+ "content": "<s_mother_race>",
167
  "single_word": false,
168
  "lstrip": false,
169
  "rstrip": false,
 
172
  },
173
  {
174
  "id": 57534,
175
+ "content": "</s_mother_race>",
176
  "single_word": false,
177
  "lstrip": false,
178
  "rstrip": false,
 
181
  },
182
  {
183
  "id": 57535,
184
+ "content": "</s_city>",
185
  "single_word": false,
186
  "lstrip": false,
187
  "rstrip": false,
 
190
  },
191
  {
192
  "id": 57536,
193
+ "content": "<s_mother_religion>",
194
  "single_word": false,
195
  "lstrip": false,
196
  "rstrip": false,
 
199
  },
200
  {
201
  "id": 57537,
202
+ "content": "</s_mother_nationality>",
203
  "single_word": false,
204
  "lstrip": false,
205
  "rstrip": false,
 
208
  },
209
  {
210
  "id": 57538,
211
+ "content": "<s_father_first_name>",
212
  "single_word": false,
213
  "lstrip": false,
214
  "rstrip": false,
 
217
  },
218
  {
219
  "id": 57539,
220
+ "content": "<s_father_nationality>",
221
  "single_word": false,
222
  "lstrip": false,
223
  "rstrip": false,
 
226
  },
227
  {
228
  "id": 57540,
229
+ "content": "<s_mother-nationality>",
230
+ "single_word": false,
231
+ "lstrip": false,
232
+ "rstrip": false,
233
+ "normalized": false,
234
+ "special": true
235
+ },
236
+ {
237
+ "id": 57541,
238
+ "content": "<s_father_occupaation>",
239
+ "single_word": false,
240
+ "lstrip": false,
241
+ "rstrip": false,
242
+ "normalized": false,
243
+ "special": true
244
+ },
245
+ {
246
+ "id": 57542,
247
+ "content": "<s_mother_middle_name>",
248
+ "single_word": false,
249
+ "lstrip": false,
250
+ "rstrip": false,
251
+ "normalized": false,
252
+ "special": true
253
+ },
254
+ {
255
+ "id": 57543,
256
+ "content": "</s_father_age>",
257
+ "single_word": false,
258
+ "lstrip": false,
259
+ "rstrip": false,
260
+ "normalized": false,
261
+ "special": true
262
+ },
263
+ {
264
+ "id": 57544,
265
+ "content": "<s_child_last_name>",
266
+ "single_word": false,
267
+ "lstrip": false,
268
+ "rstrip": false,
269
+ "normalized": false,
270
+ "special": true
271
+ },
272
+ {
273
+ "id": 57545,
274
+ "content": "<s_child_middle_name>",
275
+ "single_word": false,
276
+ "lstrip": false,
277
+ "rstrip": false,
278
+ "normalized": false,
279
+ "special": true
280
+ },
281
+ {
282
+ "id": 57546,
283
+ "content": "<s_DOB>",
284
+ "single_word": false,
285
+ "lstrip": false,
286
+ "rstrip": false,
287
+ "normalized": false,
288
+ "special": true
289
+ },
290
+ {
291
+ "id": 57547,
292
+ "content": "<s_mother_birthplace>",
293
+ "single_word": false,
294
+ "lstrip": false,
295
+ "rstrip": false,
296
+ "normalized": false,
297
+ "special": true
298
+ },
299
+ {
300
+ "id": 57548,
301
+ "content": "</s_mother_last_name>",
302
+ "single_word": false,
303
+ "lstrip": false,
304
+ "rstrip": false,
305
+ "normalized": false,
306
+ "special": true
307
+ },
308
+ {
309
+ "id": 57549,
310
+ "content": "<s_father_race>",
311
+ "single_word": false,
312
+ "lstrip": false,
313
+ "rstrip": false,
314
+ "normalized": false,
315
+ "special": true
316
+ },
317
+ {
318
+ "id": 57550,
319
+ "content": "<s_city>",
320
+ "single_word": false,
321
+ "lstrip": false,
322
+ "rstrip": false,
323
+ "normalized": false,
324
+ "special": true
325
+ },
326
+ {
327
+ "id": 57551,
328
+ "content": "</s_mother_first_name>",
329
+ "single_word": false,
330
+ "lstrip": false,
331
+ "rstrip": false,
332
+ "normalized": false,
333
+ "special": true
334
+ },
335
+ {
336
+ "id": 57552,
337
+ "content": "</s_mother_birthplace>",
338
+ "single_word": false,
339
+ "lstrip": false,
340
+ "rstrip": false,
341
+ "normalized": false,
342
+ "special": true
343
+ },
344
+ {
345
+ "id": 57553,
346
+ "content": "</s_father_nationality>",
347
+ "single_word": false,
348
+ "lstrip": false,
349
+ "rstrip": false,
350
+ "normalized": false,
351
+ "special": true
352
+ },
353
+ {
354
+ "id": 57554,
355
+ "content": "</s_mother_age>",
356
+ "single_word": false,
357
+ "lstrip": false,
358
+ "rstrip": false,
359
+ "normalized": false,
360
+ "special": true
361
+ },
362
+ {
363
+ "id": 57555,
364
+ "content": "</s_father_nationalty>",
365
+ "single_word": false,
366
+ "lstrip": false,
367
+ "rstrip": false,
368
+ "normalized": false,
369
+ "special": true
370
+ },
371
+ {
372
+ "id": 57556,
373
+ "content": "</s_mother-nationality>",
374
+ "single_word": false,
375
+ "lstrip": false,
376
+ "rstrip": false,
377
+ "normalized": false,
378
+ "special": true
379
+ },
380
+ {
381
+ "id": 57557,
382
+ "content": "<s_child_first_name>",
383
+ "single_word": false,
384
+ "lstrip": false,
385
+ "rstrip": false,
386
+ "normalized": false,
387
+ "special": true
388
+ },
389
+ {
390
+ "id": 57558,
391
+ "content": "</s_mother_middle_name>",
392
+ "single_word": false,
393
+ "lstrip": false,
394
+ "rstrip": false,
395
+ "normalized": false,
396
+ "special": true
397
+ },
398
+ {
399
+ "id": 57559,
400
+ "content": "<s_father_middle_name>",
401
+ "single_word": false,
402
+ "lstrip": false,
403
+ "rstrip": false,
404
+ "normalized": false,
405
+ "special": true
406
+ },
407
+ {
408
+ "id": 57560,
409
+ "content": "</s_father_race>",
410
+ "single_word": false,
411
+ "lstrip": false,
412
+ "rstrip": false,
413
+ "normalized": false,
414
+ "special": true
415
+ },
416
+ {
417
+ "id": 57561,
418
+ "content": "<s_father_occupation>",
419
+ "single_word": false,
420
+ "lstrip": false,
421
+ "rstrip": false,
422
+ "normalized": false,
423
+ "special": true
424
+ },
425
+ {
426
+ "id": 57562,
427
+ "content": "</s_father_last_name>",
428
+ "single_word": false,
429
+ "lstrip": false,
430
+ "rstrip": false,
431
+ "normalized": false,
432
+ "special": true
433
+ },
434
+ {
435
+ "id": 57563,
436
+ "content": "</s_father_birthplace>",
437
+ "single_word": false,
438
+ "lstrip": false,
439
+ "rstrip": false,
440
+ "normalized": false,
441
+ "special": true
442
+ },
443
+ {
444
+ "id": 57564,
445
+ "content": "<s_father_religion>",
446
+ "single_word": false,
447
+ "lstrip": false,
448
+ "rstrip": false,
449
+ "normalized": false,
450
+ "special": true
451
+ },
452
+ {
453
+ "id": 57565,
454
+ "content": "</s_child_middle_name>",
455
+ "single_word": false,
456
+ "lstrip": false,
457
+ "rstrip": false,
458
+ "normalized": false,
459
+ "special": true
460
+ },
461
+ {
462
+ "id": 57566,
463
+ "content": "</s_mother_nationalty>",
464
+ "single_word": false,
465
+ "lstrip": false,
466
+ "rstrip": false,
467
+ "normalized": false,
468
+ "special": true
469
+ },
470
+ {
471
+ "id": 57567,
472
+ "content": "<s_mother_age>",
473
+ "single_word": false,
474
+ "lstrip": false,
475
+ "rstrip": false,
476
+ "normalized": false,
477
+ "special": true
478
+ },
479
+ {
480
+ "id": 57568,
481
+ "content": "</s_father_occupaation>",
482
+ "single_word": false,
483
+ "lstrip": false,
484
+ "rstrip": false,
485
+ "normalized": false,
486
+ "special": true
487
+ },
488
+ {
489
+ "id": 57569,
490
+ "content": "<s_mother_nationalty>",
491
+ "single_word": false,
492
+ "lstrip": false,
493
+ "rstrip": false,
494
+ "normalized": false,
495
+ "special": true
496
+ },
497
+ {
498
+ "id": 57570,
499
+ "content": "</s_DOB>",
500
+ "single_word": false,
501
+ "lstrip": false,
502
+ "rstrip": false,
503
+ "normalized": false,
504
+ "special": true
505
+ },
506
+ {
507
+ "id": 57571,
508
+ "content": "<s_father_nationalty>",
509
+ "single_word": false,
510
+ "lstrip": false,
511
+ "rstrip": false,
512
+ "normalized": false,
513
+ "special": true
514
+ },
515
+ {
516
+ "id": 57572,
517
+ "content": "<s_mother_first_name>",
518
+ "single_word": false,
519
+ "lstrip": false,
520
+ "rstrip": false,
521
+ "normalized": false,
522
+ "special": true
523
+ },
524
+ {
525
+ "id": 57573,
526
+ "content": "<s_father_birthplace>",
527
+ "single_word": false,
528
+ "lstrip": false,
529
+ "rstrip": false,
530
+ "normalized": false,
531
+ "special": true
532
+ },
533
+ {
534
+ "id": 57574,
535
+ "content": "</s_child_last_name>",
536
+ "single_word": false,
537
+ "lstrip": false,
538
+ "rstrip": false,
539
+ "normalized": false,
540
+ "special": true
541
+ },
542
+ {
543
+ "id": 57575,
544
+ "content": "</s_mother_religion>",
545
+ "single_word": false,
546
+ "lstrip": false,
547
+ "rstrip": false,
548
+ "normalized": false,
549
+ "special": true
550
+ },
551
+ {
552
+ "id": 57576,
553
+ "content": "</s_province>",
554
+ "single_word": false,
555
+ "lstrip": false,
556
+ "rstrip": false,
557
+ "normalized": false,
558
+ "special": true
559
+ },
560
+ {
561
+ "id": 57577,
562
+ "content": "<s_father_age>",
563
+ "single_word": false,
564
+ "lstrip": false,
565
+ "rstrip": false,
566
+ "normalized": false,
567
+ "special": true
568
+ },
569
+ {
570
+ "id": 57578,
571
+ "content": "</s_father_middle_name>",
572
  "single_word": false,
573
  "lstrip": false,
574
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -65,7 +65,7 @@
65
  "special": true
66
  },
67
  "57525": {
68
- "content": "<s_mother_name>",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
@@ -73,7 +73,7 @@
73
  "special": true
74
  },
75
  "57526": {
76
- "content": "<s_father_name>",
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
@@ -81,7 +81,7 @@
81
  "special": true
82
  },
83
  "57527": {
84
- "content": "<s_first_name>",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": true
90
  },
91
  "57528": {
92
- "content": "<s_random_number>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
@@ -97,7 +97,7 @@
97
  "special": true
98
  },
99
  "57529": {
100
- "content": "<s_place_of_birth>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": true
106
  },
107
  "57530": {
108
- "content": "</s_address>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  "special": true
114
  },
115
  "57531": {
116
- "content": "</s_mother_name>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
@@ -121,7 +121,7 @@
121
  "special": true
122
  },
123
  "57532": {
124
- "content": "</s_random_number>",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
@@ -129,7 +129,7 @@
129
  "special": true
130
  },
131
  "57533": {
132
- "content": "<s_title>",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
@@ -137,7 +137,7 @@
137
  "special": true
138
  },
139
  "57534": {
140
- "content": "</s_first_name>",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  "special": true
146
  },
147
  "57535": {
148
- "content": "<s_last_name>",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
@@ -153,7 +153,7 @@
153
  "special": true
154
  },
155
  "57536": {
156
- "content": "</s_title>",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
@@ -161,7 +161,7 @@
161
  "special": true
162
  },
163
  "57537": {
164
- "content": "</s_father_name>",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
@@ -169,7 +169,7 @@
169
  "special": true
170
  },
171
  "57538": {
172
- "content": "<s_address>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
@@ -177,7 +177,7 @@
177
  "special": true
178
  },
179
  "57539": {
180
- "content": "</s_last_name>",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
@@ -185,7 +185,311 @@
185
  "special": true
186
  },
187
  "57540": {
188
- "content": "</s_place_of_birth>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  "lstrip": false,
190
  "normalized": false,
191
  "rstrip": false,
@@ -194,24 +498,62 @@
194
  }
195
  },
196
  "additional_special_tokens": [
197
- "<s_mother_name>",
198
- "<s_father_name>",
199
- "<s_first_name>",
200
- "</s>",
201
- "<s_random_number>",
202
- "<s_place_of_birth>",
203
- "</s_address>",
204
- "</s_mother_name>",
205
- "</s_random_number>",
206
- "<s_title>",
207
- "</s_first_name>",
208
- "<s_last_name>",
209
- "</s_title>",
210
- "</s_father_name>",
211
- "<s_address>",
 
 
 
 
 
 
 
 
 
 
 
 
212
  "<s>",
213
- "</s_last_name>",
214
- "</s_place_of_birth>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  ],
216
  "bos_token": "<s>",
217
  "clean_up_tokenization_spaces": true,
 
65
  "special": true
66
  },
67
  "57525": {
68
+ "content": "<s_province>",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
 
73
  "special": true
74
  },
75
  "57526": {
76
+ "content": "<s_mother_last_name>",
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
 
81
  "special": true
82
  },
83
  "57527": {
84
+ "content": "<s_father_last_name>",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
 
89
  "special": true
90
  },
91
  "57528": {
92
+ "content": "</s_father_religion>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
 
97
  "special": true
98
  },
99
  "57529": {
100
+ "content": "<s_mother_nationality>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
 
105
  "special": true
106
  },
107
  "57530": {
108
+ "content": "</s_father_first_name>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
 
113
  "special": true
114
  },
115
  "57531": {
116
+ "content": "</s_father_occupation>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
 
121
  "special": true
122
  },
123
  "57532": {
124
+ "content": "</s_child_first_name>",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
 
129
  "special": true
130
  },
131
  "57533": {
132
+ "content": "<s_mother_race>",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
 
137
  "special": true
138
  },
139
  "57534": {
140
+ "content": "</s_mother_race>",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
 
145
  "special": true
146
  },
147
  "57535": {
148
+ "content": "</s_city>",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
 
153
  "special": true
154
  },
155
  "57536": {
156
+ "content": "<s_mother_religion>",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
 
161
  "special": true
162
  },
163
  "57537": {
164
+ "content": "</s_mother_nationality>",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
 
169
  "special": true
170
  },
171
  "57538": {
172
+ "content": "<s_father_first_name>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
 
177
  "special": true
178
  },
179
  "57539": {
180
+ "content": "<s_father_nationality>",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
 
185
  "special": true
186
  },
187
  "57540": {
188
+ "content": "<s_mother-nationality>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "57541": {
196
+ "content": "<s_father_occupaation>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "57542": {
204
+ "content": "<s_mother_middle_name>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "57543": {
212
+ "content": "</s_father_age>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "57544": {
220
+ "content": "<s_child_last_name>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "57545": {
228
+ "content": "<s_child_middle_name>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "57546": {
236
+ "content": "<s_DOB>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "57547": {
244
+ "content": "<s_mother_birthplace>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "57548": {
252
+ "content": "</s_mother_last_name>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "57549": {
260
+ "content": "<s_father_race>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "57550": {
268
+ "content": "<s_city>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "57551": {
276
+ "content": "</s_mother_first_name>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "57552": {
284
+ "content": "</s_mother_birthplace>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "57553": {
292
+ "content": "</s_father_nationality>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "57554": {
300
+ "content": "</s_mother_age>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "57555": {
308
+ "content": "</s_father_nationalty>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "57556": {
316
+ "content": "</s_mother-nationality>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "57557": {
324
+ "content": "<s_child_first_name>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "57558": {
332
+ "content": "</s_mother_middle_name>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "57559": {
340
+ "content": "<s_father_middle_name>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "57560": {
348
+ "content": "</s_father_race>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "57561": {
356
+ "content": "<s_father_occupation>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "57562": {
364
+ "content": "</s_father_last_name>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "57563": {
372
+ "content": "</s_father_birthplace>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "57564": {
380
+ "content": "<s_father_religion>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "57565": {
388
+ "content": "</s_child_middle_name>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "57566": {
396
+ "content": "</s_mother_nationalty>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "57567": {
404
+ "content": "<s_mother_age>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "57568": {
412
+ "content": "</s_father_occupaation>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "57569": {
420
+ "content": "<s_mother_nationalty>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "57570": {
428
+ "content": "</s_DOB>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "57571": {
436
+ "content": "<s_father_nationalty>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "57572": {
444
+ "content": "<s_mother_first_name>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "57573": {
452
+ "content": "<s_father_birthplace>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "57574": {
460
+ "content": "</s_child_last_name>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "57575": {
468
+ "content": "</s_mother_religion>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "57576": {
476
+ "content": "</s_province>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "57577": {
484
+ "content": "<s_father_age>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "57578": {
492
+ "content": "</s_father_middle_name>",
493
  "lstrip": false,
494
  "normalized": false,
495
  "rstrip": false,
 
498
  }
499
  },
500
  "additional_special_tokens": [
501
+ "<s_province>",
502
+ "<s_mother_last_name>",
503
+ "<s_father_last_name>",
504
+ "</s_father_religion>",
505
+ "<s_mother_nationality>",
506
+ "</s_father_first_name>",
507
+ "</s_father_occupation>",
508
+ "</s_child_first_name>",
509
+ "<s_mother_race>",
510
+ "</s_mother_race>",
511
+ "</s_city>",
512
+ "<s_mother_religion>",
513
+ "</s_mother_nationality>",
514
+ "<s_father_first_name>",
515
+ "<s_father_nationality>",
516
+ "<s_mother-nationality>",
517
+ "<s_father_occupaation>",
518
+ "<s_mother_middle_name>",
519
+ "</s_father_age>",
520
+ "<s_child_last_name>",
521
+ "<s_child_middle_name>",
522
+ "<s_DOB>",
523
+ "<s_mother_birthplace>",
524
+ "</s_mother_last_name>",
525
+ "<s_father_race>",
526
+ "<s_city>",
527
+ "</s_mother_first_name>",
528
  "<s>",
529
+ "</s_mother_birthplace>",
530
+ "</s_father_nationality>",
531
+ "</s_mother_age>",
532
+ "</s_father_nationalty>",
533
+ "</s_mother-nationality>",
534
+ "<s_child_first_name>",
535
+ "</s_mother_middle_name>",
536
+ "<s_father_middle_name>",
537
+ "</s_father_race>",
538
+ "<s_father_occupation>",
539
+ "</s_father_last_name>",
540
+ "</s_father_birthplace>",
541
+ "<s_father_religion>",
542
+ "</s_child_middle_name>",
543
+ "</s_mother_nationalty>",
544
+ "<s_mother_age>",
545
+ "</s>",
546
+ "</s_father_occupaation>",
547
+ "<s_mother_nationalty>",
548
+ "</s_DOB>",
549
+ "<s_father_nationalty>",
550
+ "<s_mother_first_name>",
551
+ "<s_father_birthplace>",
552
+ "</s_child_last_name>",
553
+ "</s_mother_religion>",
554
+ "</s_province>",
555
+ "<s_father_age>",
556
+ "</s_father_middle_name>"
557
  ],
558
  "bos_token": "<s>",
559
  "clean_up_tokenization_spaces": true,