sreejith8100 commited on
Commit
3ec53c1
1 Parent(s): d609a52

End of training

Browse files
Files changed (5) hide show
  1. README.md +8 -12
  2. added_tokens.json +16 -54
  3. special_tokens_map.json +17 -283
  4. tokenizer.json +16 -358
  5. tokenizer_config.json +33 -375
README.md CHANGED
@@ -17,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on the imagefolder dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.1461
21
 
22
  ## Model description
23
 
@@ -42,22 +42,18 @@ The following hyperparameters were used during training:
42
  - seed: 42
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
- - num_epochs: 10
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
- | 6.8948 | 1.0 | 45 | 5.7211 |
52
- | 3.2707 | 2.0 | 90 | 2.8623 |
53
- | 2.1611 | 3.0 | 135 | 1.7889 |
54
- | 1.3332 | 4.0 | 180 | 1.4790 |
55
- | 0.6255 | 5.0 | 225 | 1.3553 |
56
- | 1.2248 | 6.0 | 270 | 1.2488 |
57
- | 0.9245 | 7.0 | 315 | 1.1895 |
58
- | 0.4144 | 8.0 | 360 | 1.2128 |
59
- | 0.4087 | 9.0 | 405 | 1.1811 |
60
- | 0.4926 | 10.0 | 450 | 1.1461 |
61
 
62
 
63
  ### Framework versions
 
17
 
18
  This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on the imagefolder dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 2.5746
21
 
22
  ## Model description
23
 
 
42
  - seed: 42
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
45
+ - num_epochs: 6
46
 
47
  ### Training results
48
 
49
  | Training Loss | Epoch | Step | Validation Loss |
50
  |:-------------:|:-----:|:----:|:---------------:|
51
+ | 6.9386 | 1.0 | 40 | 6.1254 |
52
+ | 3.8907 | 2.0 | 80 | 3.3437 |
53
+ | 3.0701 | 3.0 | 120 | 2.8495 |
54
+ | 2.6156 | 4.0 | 160 | 2.7129 |
55
+ | 2.3074 | 5.0 | 200 | 2.6461 |
56
+ | 2.2406 | 6.0 | 240 | 2.5746 |
 
 
 
 
57
 
58
 
59
  ### Framework versions
added_tokens.json CHANGED
@@ -1,59 +1,21 @@
1
  {
2
- "</s_DOB>": 57570,
3
- "</s_child_first_name>": 57532,
4
- "</s_child_last_name>": 57574,
5
- "</s_child_middle_name>": 57565,
6
- "</s_city>": 57535,
7
- "</s_father_age>": 57543,
8
- "</s_father_birthplace>": 57563,
9
- "</s_father_first_name>": 57530,
10
- "</s_father_last_name>": 57562,
11
- "</s_father_middle_name>": 57578,
12
- "</s_father_nationality>": 57553,
13
- "</s_father_nationalty>": 57555,
14
- "</s_father_occupaation>": 57568,
15
- "</s_father_occupation>": 57531,
16
- "</s_father_race>": 57560,
17
- "</s_father_religion>": 57528,
18
- "</s_mother-nationality>": 57556,
19
- "</s_mother_age>": 57554,
20
- "</s_mother_birthplace>": 57552,
21
- "</s_mother_first_name>": 57551,
22
- "</s_mother_last_name>": 57548,
23
- "</s_mother_middle_name>": 57558,
24
- "</s_mother_nationality>": 57537,
25
- "</s_mother_nationalty>": 57566,
26
- "</s_mother_race>": 57534,
27
- "</s_mother_religion>": 57575,
28
- "</s_province>": 57576,
29
- "<s_DOB>": 57546,
30
- "<s_child_first_name>": 57557,
31
- "<s_child_last_name>": 57544,
32
- "<s_child_middle_name>": 57545,
33
- "<s_city>": 57550,
34
- "<s_father_age>": 57577,
35
- "<s_father_birthplace>": 57573,
36
- "<s_father_first_name>": 57538,
37
- "<s_father_last_name>": 57527,
38
- "<s_father_middle_name>": 57559,
39
- "<s_father_nationality>": 57539,
40
- "<s_father_nationalty>": 57571,
41
- "<s_father_occupaation>": 57541,
42
- "<s_father_occupation>": 57561,
43
- "<s_father_race>": 57549,
44
- "<s_father_religion>": 57564,
45
  "<s_iitcdip>": 57523,
46
- "<s_mother-nationality>": 57540,
47
- "<s_mother_age>": 57567,
48
- "<s_mother_birthplace>": 57547,
49
- "<s_mother_first_name>": 57572,
50
- "<s_mother_last_name>": 57526,
51
- "<s_mother_middle_name>": 57542,
52
- "<s_mother_nationality>": 57529,
53
- "<s_mother_nationalty>": 57569,
54
- "<s_mother_race>": 57533,
55
- "<s_mother_religion>": 57536,
56
- "<s_province>": 57525,
57
  "<s_synthdog>": 57524,
 
58
  "<sep/>": 57522
59
  }
 
1
  {
2
+ "</s_address>": 57530,
3
+ "</s_father_name>": 57537,
4
+ "</s_first_name>": 57534,
5
+ "</s_last_name>": 57539,
6
+ "</s_mother_name>": 57531,
7
+ "</s_place_of_birth>": 57540,
8
+ "</s_random_number>": 57532,
9
+ "</s_title>": 57536,
10
+ "<s_address>": 57538,
11
+ "<s_father_name>": 57526,
12
+ "<s_first_name>": 57527,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "<s_iitcdip>": 57523,
14
+ "<s_last_name>": 57535,
15
+ "<s_mother_name>": 57525,
16
+ "<s_place_of_birth>": 57529,
17
+ "<s_random_number>": 57528,
 
 
 
 
 
 
 
18
  "<s_synthdog>": 57524,
19
+ "<s_title>": 57533,
20
  "<sep/>": 57522
21
  }
special_tokens_map.json CHANGED
@@ -1,189 +1,105 @@
1
  {
2
  "additional_special_tokens": [
3
  {
4
- "content": "<s_province>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
- "content": "<s_mother_last_name>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
  {
18
- "content": "<s_father_last_name>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
  {
25
- "content": "</s_father_religion>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- },
31
- {
32
- "content": "<s_mother_nationality>",
33
- "lstrip": false,
34
- "normalized": false,
35
- "rstrip": false,
36
- "single_word": false
37
- },
38
- {
39
- "content": "</s_father_first_name>",
40
- "lstrip": false,
41
- "normalized": false,
42
- "rstrip": false,
43
- "single_word": false
44
- },
45
- {
46
- "content": "</s_father_occupation>",
47
- "lstrip": false,
48
- "normalized": false,
49
- "rstrip": false,
50
- "single_word": false
51
- },
52
- {
53
- "content": "</s_child_first_name>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false
58
- },
59
- {
60
- "content": "<s_mother_race>",
61
- "lstrip": false,
62
- "normalized": false,
63
- "rstrip": false,
64
- "single_word": false
65
- },
66
- {
67
- "content": "</s_mother_race>",
68
- "lstrip": false,
69
- "normalized": false,
70
- "rstrip": false,
71
- "single_word": false
72
- },
73
- {
74
- "content": "</s_city>",
75
- "lstrip": false,
76
- "normalized": false,
77
- "rstrip": false,
78
- "single_word": false
79
- },
80
- {
81
- "content": "<s_mother_religion>",
82
- "lstrip": false,
83
- "normalized": false,
84
- "rstrip": false,
85
- "single_word": false
86
- },
87
- {
88
- "content": "</s_mother_nationality>",
89
- "lstrip": false,
90
- "normalized": false,
91
- "rstrip": false,
92
- "single_word": false
93
- },
94
- {
95
- "content": "<s_father_first_name>",
96
- "lstrip": false,
97
- "normalized": false,
98
- "rstrip": false,
99
- "single_word": false
100
- },
101
- {
102
- "content": "<s_father_nationality>",
103
- "lstrip": false,
104
- "normalized": false,
105
- "rstrip": false,
106
- "single_word": false
107
- },
108
- {
109
- "content": "<s_mother-nationality>",
110
  "lstrip": false,
111
  "normalized": false,
112
  "rstrip": false,
113
  "single_word": false
114
  },
115
  {
116
- "content": "<s_father_occupaation>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
120
  "single_word": false
121
  },
122
  {
123
- "content": "<s_mother_middle_name>",
124
  "lstrip": false,
125
  "normalized": false,
126
  "rstrip": false,
127
  "single_word": false
128
  },
129
  {
130
- "content": "</s_father_age>",
131
  "lstrip": false,
132
  "normalized": false,
133
  "rstrip": false,
134
  "single_word": false
135
  },
136
  {
137
- "content": "<s_child_last_name>",
138
  "lstrip": false,
139
  "normalized": false,
140
  "rstrip": false,
141
  "single_word": false
142
  },
143
  {
144
- "content": "<s_child_middle_name>",
145
  "lstrip": false,
146
  "normalized": false,
147
  "rstrip": false,
148
  "single_word": false
149
  },
150
  {
151
- "content": "<s_DOB>",
152
  "lstrip": false,
153
  "normalized": false,
154
  "rstrip": false,
155
  "single_word": false
156
  },
157
  {
158
- "content": "<s_mother_birthplace>",
159
  "lstrip": false,
160
  "normalized": false,
161
  "rstrip": false,
162
  "single_word": false
163
  },
164
  {
165
- "content": "</s_mother_last_name>",
166
  "lstrip": false,
167
  "normalized": false,
168
  "rstrip": false,
169
  "single_word": false
170
  },
171
  {
172
- "content": "<s_father_race>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
176
  "single_word": false
177
  },
178
  {
179
- "content": "<s_city>",
180
  "lstrip": false,
181
  "normalized": false,
182
  "rstrip": false,
183
  "single_word": false
184
  },
185
  {
186
- "content": "</s_mother_first_name>",
187
  "lstrip": false,
188
  "normalized": false,
189
  "rstrip": false,
@@ -197,196 +113,14 @@
197
  "single_word": false
198
  },
199
  {
200
- "content": "</s_mother_birthplace>",
201
- "lstrip": false,
202
- "normalized": false,
203
- "rstrip": false,
204
- "single_word": false
205
- },
206
- {
207
- "content": "</s_father_nationality>",
208
- "lstrip": false,
209
- "normalized": false,
210
- "rstrip": false,
211
- "single_word": false
212
- },
213
- {
214
- "content": "</s_mother_age>",
215
- "lstrip": false,
216
- "normalized": false,
217
- "rstrip": false,
218
- "single_word": false
219
- },
220
- {
221
- "content": "</s_father_nationalty>",
222
- "lstrip": false,
223
- "normalized": false,
224
- "rstrip": false,
225
- "single_word": false
226
- },
227
- {
228
- "content": "</s_mother-nationality>",
229
- "lstrip": false,
230
- "normalized": false,
231
- "rstrip": false,
232
- "single_word": false
233
- },
234
- {
235
- "content": "<s_child_first_name>",
236
- "lstrip": false,
237
- "normalized": false,
238
- "rstrip": false,
239
- "single_word": false
240
- },
241
- {
242
- "content": "</s_mother_middle_name>",
243
- "lstrip": false,
244
- "normalized": false,
245
- "rstrip": false,
246
- "single_word": false
247
- },
248
- {
249
- "content": "<s_father_middle_name>",
250
- "lstrip": false,
251
- "normalized": false,
252
- "rstrip": false,
253
- "single_word": false
254
- },
255
- {
256
- "content": "</s_father_race>",
257
- "lstrip": false,
258
- "normalized": false,
259
- "rstrip": false,
260
- "single_word": false
261
- },
262
- {
263
- "content": "<s_father_occupation>",
264
- "lstrip": false,
265
- "normalized": false,
266
- "rstrip": false,
267
- "single_word": false
268
- },
269
- {
270
- "content": "</s_father_last_name>",
271
- "lstrip": false,
272
- "normalized": false,
273
- "rstrip": false,
274
- "single_word": false
275
- },
276
- {
277
- "content": "</s_father_birthplace>",
278
- "lstrip": false,
279
- "normalized": false,
280
- "rstrip": false,
281
- "single_word": false
282
- },
283
- {
284
- "content": "<s_father_religion>",
285
- "lstrip": false,
286
- "normalized": false,
287
- "rstrip": false,
288
- "single_word": false
289
- },
290
- {
291
- "content": "</s_child_middle_name>",
292
- "lstrip": false,
293
- "normalized": false,
294
- "rstrip": false,
295
- "single_word": false
296
- },
297
- {
298
- "content": "</s_mother_nationalty>",
299
- "lstrip": false,
300
- "normalized": false,
301
- "rstrip": false,
302
- "single_word": false
303
- },
304
- {
305
- "content": "<s_mother_age>",
306
- "lstrip": false,
307
- "normalized": false,
308
- "rstrip": false,
309
- "single_word": false
310
- },
311
- {
312
- "content": "</s>",
313
- "lstrip": false,
314
- "normalized": false,
315
- "rstrip": false,
316
- "single_word": false
317
- },
318
- {
319
- "content": "</s_father_occupaation>",
320
- "lstrip": false,
321
- "normalized": false,
322
- "rstrip": false,
323
- "single_word": false
324
- },
325
- {
326
- "content": "<s_mother_nationalty>",
327
- "lstrip": false,
328
- "normalized": false,
329
- "rstrip": false,
330
- "single_word": false
331
- },
332
- {
333
- "content": "</s_DOB>",
334
- "lstrip": false,
335
- "normalized": false,
336
- "rstrip": false,
337
- "single_word": false
338
- },
339
- {
340
- "content": "<s_father_nationalty>",
341
- "lstrip": false,
342
- "normalized": false,
343
- "rstrip": false,
344
- "single_word": false
345
- },
346
- {
347
- "content": "<s_mother_first_name>",
348
- "lstrip": false,
349
- "normalized": false,
350
- "rstrip": false,
351
- "single_word": false
352
- },
353
- {
354
- "content": "<s_father_birthplace>",
355
- "lstrip": false,
356
- "normalized": false,
357
- "rstrip": false,
358
- "single_word": false
359
- },
360
- {
361
- "content": "</s_child_last_name>",
362
- "lstrip": false,
363
- "normalized": false,
364
- "rstrip": false,
365
- "single_word": false
366
- },
367
- {
368
- "content": "</s_mother_religion>",
369
- "lstrip": false,
370
- "normalized": false,
371
- "rstrip": false,
372
- "single_word": false
373
- },
374
- {
375
- "content": "</s_province>",
376
- "lstrip": false,
377
- "normalized": false,
378
- "rstrip": false,
379
- "single_word": false
380
- },
381
- {
382
- "content": "<s_father_age>",
383
  "lstrip": false,
384
  "normalized": false,
385
  "rstrip": false,
386
  "single_word": false
387
  },
388
  {
389
- "content": "</s_father_middle_name>",
390
  "lstrip": false,
391
  "normalized": false,
392
  "rstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
  {
4
+ "content": "<s_mother_name>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
+ "content": "<s_father_name>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
15
  "single_word": false
16
  },
17
  {
18
+ "content": "<s_first_name>",
19
  "lstrip": false,
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
  },
24
  {
25
+ "content": "</s>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "lstrip": false,
27
  "normalized": false,
28
  "rstrip": false,
29
  "single_word": false
30
  },
31
  {
32
+ "content": "<s_random_number>",
33
  "lstrip": false,
34
  "normalized": false,
35
  "rstrip": false,
36
  "single_word": false
37
  },
38
  {
39
+ "content": "<s_place_of_birth>",
40
  "lstrip": false,
41
  "normalized": false,
42
  "rstrip": false,
43
  "single_word": false
44
  },
45
  {
46
+ "content": "</s_address>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
50
  "single_word": false
51
  },
52
  {
53
+ "content": "</s_mother_name>",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
57
  "single_word": false
58
  },
59
  {
60
+ "content": "</s_random_number>",
61
  "lstrip": false,
62
  "normalized": false,
63
  "rstrip": false,
64
  "single_word": false
65
  },
66
  {
67
+ "content": "<s_title>",
68
  "lstrip": false,
69
  "normalized": false,
70
  "rstrip": false,
71
  "single_word": false
72
  },
73
  {
74
+ "content": "</s_first_name>",
75
  "lstrip": false,
76
  "normalized": false,
77
  "rstrip": false,
78
  "single_word": false
79
  },
80
  {
81
+ "content": "<s_last_name>",
82
  "lstrip": false,
83
  "normalized": false,
84
  "rstrip": false,
85
  "single_word": false
86
  },
87
  {
88
+ "content": "</s_title>",
89
  "lstrip": false,
90
  "normalized": false,
91
  "rstrip": false,
92
  "single_word": false
93
  },
94
  {
95
+ "content": "</s_father_name>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
99
  "single_word": false
100
  },
101
  {
102
+ "content": "<s_address>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
 
113
  "single_word": false
114
  },
115
  {
116
+ "content": "</s_last_name>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
120
  "single_word": false
121
  },
122
  {
123
+ "content": "</s_place_of_birth>",
124
  "lstrip": false,
125
  "normalized": false,
126
  "rstrip": false,
tokenizer.json CHANGED
@@ -91,7 +91,7 @@
91
  },
92
  {
93
  "id": 57525,
94
- "content": "<s_province>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
@@ -100,7 +100,7 @@
100
  },
101
  {
102
  "id": 57526,
103
- "content": "<s_mother_last_name>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
@@ -109,7 +109,7 @@
109
  },
110
  {
111
  "id": 57527,
112
- "content": "<s_father_last_name>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
@@ -118,7 +118,7 @@
118
  },
119
  {
120
  "id": 57528,
121
- "content": "</s_father_religion>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
@@ -127,7 +127,7 @@
127
  },
128
  {
129
  "id": 57529,
130
- "content": "<s_mother_nationality>",
131
  "single_word": false,
132
  "lstrip": false,
133
  "rstrip": false,
@@ -136,7 +136,7 @@
136
  },
137
  {
138
  "id": 57530,
139
- "content": "</s_father_first_name>",
140
  "single_word": false,
141
  "lstrip": false,
142
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  },
146
  {
147
  "id": 57531,
148
- "content": "</s_father_occupation>",
149
  "single_word": false,
150
  "lstrip": false,
151
  "rstrip": false,
@@ -154,7 +154,7 @@
154
  },
155
  {
156
  "id": 57532,
157
- "content": "</s_child_first_name>",
158
  "single_word": false,
159
  "lstrip": false,
160
  "rstrip": false,
@@ -163,7 +163,7 @@
163
  },
164
  {
165
  "id": 57533,
166
- "content": "<s_mother_race>",
167
  "single_word": false,
168
  "lstrip": false,
169
  "rstrip": false,
@@ -172,7 +172,7 @@
172
  },
173
  {
174
  "id": 57534,
175
- "content": "</s_mother_race>",
176
  "single_word": false,
177
  "lstrip": false,
178
  "rstrip": false,
@@ -181,7 +181,7 @@
181
  },
182
  {
183
  "id": 57535,
184
- "content": "</s_city>",
185
  "single_word": false,
186
  "lstrip": false,
187
  "rstrip": false,
@@ -190,7 +190,7 @@
190
  },
191
  {
192
  "id": 57536,
193
- "content": "<s_mother_religion>",
194
  "single_word": false,
195
  "lstrip": false,
196
  "rstrip": false,
@@ -199,7 +199,7 @@
199
  },
200
  {
201
  "id": 57537,
202
- "content": "</s_mother_nationality>",
203
  "single_word": false,
204
  "lstrip": false,
205
  "rstrip": false,
@@ -208,7 +208,7 @@
208
  },
209
  {
210
  "id": 57538,
211
- "content": "<s_father_first_name>",
212
  "single_word": false,
213
  "lstrip": false,
214
  "rstrip": false,
@@ -217,7 +217,7 @@
217
  },
218
  {
219
  "id": 57539,
220
- "content": "<s_father_nationality>",
221
  "single_word": false,
222
  "lstrip": false,
223
  "rstrip": false,
@@ -226,349 +226,7 @@
226
  },
227
  {
228
  "id": 57540,
229
- "content": "<s_mother-nationality>",
230
- "single_word": false,
231
- "lstrip": false,
232
- "rstrip": false,
233
- "normalized": false,
234
- "special": true
235
- },
236
- {
237
- "id": 57541,
238
- "content": "<s_father_occupaation>",
239
- "single_word": false,
240
- "lstrip": false,
241
- "rstrip": false,
242
- "normalized": false,
243
- "special": true
244
- },
245
- {
246
- "id": 57542,
247
- "content": "<s_mother_middle_name>",
248
- "single_word": false,
249
- "lstrip": false,
250
- "rstrip": false,
251
- "normalized": false,
252
- "special": true
253
- },
254
- {
255
- "id": 57543,
256
- "content": "</s_father_age>",
257
- "single_word": false,
258
- "lstrip": false,
259
- "rstrip": false,
260
- "normalized": false,
261
- "special": true
262
- },
263
- {
264
- "id": 57544,
265
- "content": "<s_child_last_name>",
266
- "single_word": false,
267
- "lstrip": false,
268
- "rstrip": false,
269
- "normalized": false,
270
- "special": true
271
- },
272
- {
273
- "id": 57545,
274
- "content": "<s_child_middle_name>",
275
- "single_word": false,
276
- "lstrip": false,
277
- "rstrip": false,
278
- "normalized": false,
279
- "special": true
280
- },
281
- {
282
- "id": 57546,
283
- "content": "<s_DOB>",
284
- "single_word": false,
285
- "lstrip": false,
286
- "rstrip": false,
287
- "normalized": false,
288
- "special": true
289
- },
290
- {
291
- "id": 57547,
292
- "content": "<s_mother_birthplace>",
293
- "single_word": false,
294
- "lstrip": false,
295
- "rstrip": false,
296
- "normalized": false,
297
- "special": true
298
- },
299
- {
300
- "id": 57548,
301
- "content": "</s_mother_last_name>",
302
- "single_word": false,
303
- "lstrip": false,
304
- "rstrip": false,
305
- "normalized": false,
306
- "special": true
307
- },
308
- {
309
- "id": 57549,
310
- "content": "<s_father_race>",
311
- "single_word": false,
312
- "lstrip": false,
313
- "rstrip": false,
314
- "normalized": false,
315
- "special": true
316
- },
317
- {
318
- "id": 57550,
319
- "content": "<s_city>",
320
- "single_word": false,
321
- "lstrip": false,
322
- "rstrip": false,
323
- "normalized": false,
324
- "special": true
325
- },
326
- {
327
- "id": 57551,
328
- "content": "</s_mother_first_name>",
329
- "single_word": false,
330
- "lstrip": false,
331
- "rstrip": false,
332
- "normalized": false,
333
- "special": true
334
- },
335
- {
336
- "id": 57552,
337
- "content": "</s_mother_birthplace>",
338
- "single_word": false,
339
- "lstrip": false,
340
- "rstrip": false,
341
- "normalized": false,
342
- "special": true
343
- },
344
- {
345
- "id": 57553,
346
- "content": "</s_father_nationality>",
347
- "single_word": false,
348
- "lstrip": false,
349
- "rstrip": false,
350
- "normalized": false,
351
- "special": true
352
- },
353
- {
354
- "id": 57554,
355
- "content": "</s_mother_age>",
356
- "single_word": false,
357
- "lstrip": false,
358
- "rstrip": false,
359
- "normalized": false,
360
- "special": true
361
- },
362
- {
363
- "id": 57555,
364
- "content": "</s_father_nationalty>",
365
- "single_word": false,
366
- "lstrip": false,
367
- "rstrip": false,
368
- "normalized": false,
369
- "special": true
370
- },
371
- {
372
- "id": 57556,
373
- "content": "</s_mother-nationality>",
374
- "single_word": false,
375
- "lstrip": false,
376
- "rstrip": false,
377
- "normalized": false,
378
- "special": true
379
- },
380
- {
381
- "id": 57557,
382
- "content": "<s_child_first_name>",
383
- "single_word": false,
384
- "lstrip": false,
385
- "rstrip": false,
386
- "normalized": false,
387
- "special": true
388
- },
389
- {
390
- "id": 57558,
391
- "content": "</s_mother_middle_name>",
392
- "single_word": false,
393
- "lstrip": false,
394
- "rstrip": false,
395
- "normalized": false,
396
- "special": true
397
- },
398
- {
399
- "id": 57559,
400
- "content": "<s_father_middle_name>",
401
- "single_word": false,
402
- "lstrip": false,
403
- "rstrip": false,
404
- "normalized": false,
405
- "special": true
406
- },
407
- {
408
- "id": 57560,
409
- "content": "</s_father_race>",
410
- "single_word": false,
411
- "lstrip": false,
412
- "rstrip": false,
413
- "normalized": false,
414
- "special": true
415
- },
416
- {
417
- "id": 57561,
418
- "content": "<s_father_occupation>",
419
- "single_word": false,
420
- "lstrip": false,
421
- "rstrip": false,
422
- "normalized": false,
423
- "special": true
424
- },
425
- {
426
- "id": 57562,
427
- "content": "</s_father_last_name>",
428
- "single_word": false,
429
- "lstrip": false,
430
- "rstrip": false,
431
- "normalized": false,
432
- "special": true
433
- },
434
- {
435
- "id": 57563,
436
- "content": "</s_father_birthplace>",
437
- "single_word": false,
438
- "lstrip": false,
439
- "rstrip": false,
440
- "normalized": false,
441
- "special": true
442
- },
443
- {
444
- "id": 57564,
445
- "content": "<s_father_religion>",
446
- "single_word": false,
447
- "lstrip": false,
448
- "rstrip": false,
449
- "normalized": false,
450
- "special": true
451
- },
452
- {
453
- "id": 57565,
454
- "content": "</s_child_middle_name>",
455
- "single_word": false,
456
- "lstrip": false,
457
- "rstrip": false,
458
- "normalized": false,
459
- "special": true
460
- },
461
- {
462
- "id": 57566,
463
- "content": "</s_mother_nationalty>",
464
- "single_word": false,
465
- "lstrip": false,
466
- "rstrip": false,
467
- "normalized": false,
468
- "special": true
469
- },
470
- {
471
- "id": 57567,
472
- "content": "<s_mother_age>",
473
- "single_word": false,
474
- "lstrip": false,
475
- "rstrip": false,
476
- "normalized": false,
477
- "special": true
478
- },
479
- {
480
- "id": 57568,
481
- "content": "</s_father_occupaation>",
482
- "single_word": false,
483
- "lstrip": false,
484
- "rstrip": false,
485
- "normalized": false,
486
- "special": true
487
- },
488
- {
489
- "id": 57569,
490
- "content": "<s_mother_nationalty>",
491
- "single_word": false,
492
- "lstrip": false,
493
- "rstrip": false,
494
- "normalized": false,
495
- "special": true
496
- },
497
- {
498
- "id": 57570,
499
- "content": "</s_DOB>",
500
- "single_word": false,
501
- "lstrip": false,
502
- "rstrip": false,
503
- "normalized": false,
504
- "special": true
505
- },
506
- {
507
- "id": 57571,
508
- "content": "<s_father_nationalty>",
509
- "single_word": false,
510
- "lstrip": false,
511
- "rstrip": false,
512
- "normalized": false,
513
- "special": true
514
- },
515
- {
516
- "id": 57572,
517
- "content": "<s_mother_first_name>",
518
- "single_word": false,
519
- "lstrip": false,
520
- "rstrip": false,
521
- "normalized": false,
522
- "special": true
523
- },
524
- {
525
- "id": 57573,
526
- "content": "<s_father_birthplace>",
527
- "single_word": false,
528
- "lstrip": false,
529
- "rstrip": false,
530
- "normalized": false,
531
- "special": true
532
- },
533
- {
534
- "id": 57574,
535
- "content": "</s_child_last_name>",
536
- "single_word": false,
537
- "lstrip": false,
538
- "rstrip": false,
539
- "normalized": false,
540
- "special": true
541
- },
542
- {
543
- "id": 57575,
544
- "content": "</s_mother_religion>",
545
- "single_word": false,
546
- "lstrip": false,
547
- "rstrip": false,
548
- "normalized": false,
549
- "special": true
550
- },
551
- {
552
- "id": 57576,
553
- "content": "</s_province>",
554
- "single_word": false,
555
- "lstrip": false,
556
- "rstrip": false,
557
- "normalized": false,
558
- "special": true
559
- },
560
- {
561
- "id": 57577,
562
- "content": "<s_father_age>",
563
- "single_word": false,
564
- "lstrip": false,
565
- "rstrip": false,
566
- "normalized": false,
567
- "special": true
568
- },
569
- {
570
- "id": 57578,
571
- "content": "</s_father_middle_name>",
572
  "single_word": false,
573
  "lstrip": false,
574
  "rstrip": false,
 
91
  },
92
  {
93
  "id": 57525,
94
+ "content": "<s_mother_name>",
95
  "single_word": false,
96
  "lstrip": false,
97
  "rstrip": false,
 
100
  },
101
  {
102
  "id": 57526,
103
+ "content": "<s_father_name>",
104
  "single_word": false,
105
  "lstrip": false,
106
  "rstrip": false,
 
109
  },
110
  {
111
  "id": 57527,
112
+ "content": "<s_first_name>",
113
  "single_word": false,
114
  "lstrip": false,
115
  "rstrip": false,
 
118
  },
119
  {
120
  "id": 57528,
121
+ "content": "<s_random_number>",
122
  "single_word": false,
123
  "lstrip": false,
124
  "rstrip": false,
 
127
  },
128
  {
129
  "id": 57529,
130
+ "content": "<s_place_of_birth>",
131
  "single_word": false,
132
  "lstrip": false,
133
  "rstrip": false,
 
136
  },
137
  {
138
  "id": 57530,
139
+ "content": "</s_address>",
140
  "single_word": false,
141
  "lstrip": false,
142
  "rstrip": false,
 
145
  },
146
  {
147
  "id": 57531,
148
+ "content": "</s_mother_name>",
149
  "single_word": false,
150
  "lstrip": false,
151
  "rstrip": false,
 
154
  },
155
  {
156
  "id": 57532,
157
+ "content": "</s_random_number>",
158
  "single_word": false,
159
  "lstrip": false,
160
  "rstrip": false,
 
163
  },
164
  {
165
  "id": 57533,
166
+ "content": "<s_title>",
167
  "single_word": false,
168
  "lstrip": false,
169
  "rstrip": false,
 
172
  },
173
  {
174
  "id": 57534,
175
+ "content": "</s_first_name>",
176
  "single_word": false,
177
  "lstrip": false,
178
  "rstrip": false,
 
181
  },
182
  {
183
  "id": 57535,
184
+ "content": "<s_last_name>",
185
  "single_word": false,
186
  "lstrip": false,
187
  "rstrip": false,
 
190
  },
191
  {
192
  "id": 57536,
193
+ "content": "</s_title>",
194
  "single_word": false,
195
  "lstrip": false,
196
  "rstrip": false,
 
199
  },
200
  {
201
  "id": 57537,
202
+ "content": "</s_father_name>",
203
  "single_word": false,
204
  "lstrip": false,
205
  "rstrip": false,
 
208
  },
209
  {
210
  "id": 57538,
211
+ "content": "<s_address>",
212
  "single_word": false,
213
  "lstrip": false,
214
  "rstrip": false,
 
217
  },
218
  {
219
  "id": 57539,
220
+ "content": "</s_last_name>",
221
  "single_word": false,
222
  "lstrip": false,
223
  "rstrip": false,
 
226
  },
227
  {
228
  "id": 57540,
229
+ "content": "</s_place_of_birth>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  "single_word": false,
231
  "lstrip": false,
232
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -65,7 +65,7 @@
65
  "special": true
66
  },
67
  "57525": {
68
- "content": "<s_province>",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
@@ -73,7 +73,7 @@
73
  "special": true
74
  },
75
  "57526": {
76
- "content": "<s_mother_last_name>",
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
@@ -81,7 +81,7 @@
81
  "special": true
82
  },
83
  "57527": {
84
- "content": "<s_father_last_name>",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
@@ -89,7 +89,7 @@
89
  "special": true
90
  },
91
  "57528": {
92
- "content": "</s_father_religion>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
@@ -97,7 +97,7 @@
97
  "special": true
98
  },
99
  "57529": {
100
- "content": "<s_mother_nationality>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
@@ -105,7 +105,7 @@
105
  "special": true
106
  },
107
  "57530": {
108
- "content": "</s_father_first_name>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  "special": true
114
  },
115
  "57531": {
116
- "content": "</s_father_occupation>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
@@ -121,7 +121,7 @@
121
  "special": true
122
  },
123
  "57532": {
124
- "content": "</s_child_first_name>",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
@@ -129,7 +129,7 @@
129
  "special": true
130
  },
131
  "57533": {
132
- "content": "<s_mother_race>",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
@@ -137,7 +137,7 @@
137
  "special": true
138
  },
139
  "57534": {
140
- "content": "</s_mother_race>",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
@@ -145,7 +145,7 @@
145
  "special": true
146
  },
147
  "57535": {
148
- "content": "</s_city>",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
@@ -153,7 +153,7 @@
153
  "special": true
154
  },
155
  "57536": {
156
- "content": "<s_mother_religion>",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
@@ -161,7 +161,7 @@
161
  "special": true
162
  },
163
  "57537": {
164
- "content": "</s_mother_nationality>",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
@@ -169,7 +169,7 @@
169
  "special": true
170
  },
171
  "57538": {
172
- "content": "<s_father_first_name>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
@@ -177,7 +177,7 @@
177
  "special": true
178
  },
179
  "57539": {
180
- "content": "<s_father_nationality>",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
@@ -185,311 +185,7 @@
185
  "special": true
186
  },
187
  "57540": {
188
- "content": "<s_mother-nationality>",
189
- "lstrip": false,
190
- "normalized": false,
191
- "rstrip": false,
192
- "single_word": false,
193
- "special": true
194
- },
195
- "57541": {
196
- "content": "<s_father_occupaation>",
197
- "lstrip": false,
198
- "normalized": false,
199
- "rstrip": false,
200
- "single_word": false,
201
- "special": true
202
- },
203
- "57542": {
204
- "content": "<s_mother_middle_name>",
205
- "lstrip": false,
206
- "normalized": false,
207
- "rstrip": false,
208
- "single_word": false,
209
- "special": true
210
- },
211
- "57543": {
212
- "content": "</s_father_age>",
213
- "lstrip": false,
214
- "normalized": false,
215
- "rstrip": false,
216
- "single_word": false,
217
- "special": true
218
- },
219
- "57544": {
220
- "content": "<s_child_last_name>",
221
- "lstrip": false,
222
- "normalized": false,
223
- "rstrip": false,
224
- "single_word": false,
225
- "special": true
226
- },
227
- "57545": {
228
- "content": "<s_child_middle_name>",
229
- "lstrip": false,
230
- "normalized": false,
231
- "rstrip": false,
232
- "single_word": false,
233
- "special": true
234
- },
235
- "57546": {
236
- "content": "<s_DOB>",
237
- "lstrip": false,
238
- "normalized": false,
239
- "rstrip": false,
240
- "single_word": false,
241
- "special": true
242
- },
243
- "57547": {
244
- "content": "<s_mother_birthplace>",
245
- "lstrip": false,
246
- "normalized": false,
247
- "rstrip": false,
248
- "single_word": false,
249
- "special": true
250
- },
251
- "57548": {
252
- "content": "</s_mother_last_name>",
253
- "lstrip": false,
254
- "normalized": false,
255
- "rstrip": false,
256
- "single_word": false,
257
- "special": true
258
- },
259
- "57549": {
260
- "content": "<s_father_race>",
261
- "lstrip": false,
262
- "normalized": false,
263
- "rstrip": false,
264
- "single_word": false,
265
- "special": true
266
- },
267
- "57550": {
268
- "content": "<s_city>",
269
- "lstrip": false,
270
- "normalized": false,
271
- "rstrip": false,
272
- "single_word": false,
273
- "special": true
274
- },
275
- "57551": {
276
- "content": "</s_mother_first_name>",
277
- "lstrip": false,
278
- "normalized": false,
279
- "rstrip": false,
280
- "single_word": false,
281
- "special": true
282
- },
283
- "57552": {
284
- "content": "</s_mother_birthplace>",
285
- "lstrip": false,
286
- "normalized": false,
287
- "rstrip": false,
288
- "single_word": false,
289
- "special": true
290
- },
291
- "57553": {
292
- "content": "</s_father_nationality>",
293
- "lstrip": false,
294
- "normalized": false,
295
- "rstrip": false,
296
- "single_word": false,
297
- "special": true
298
- },
299
- "57554": {
300
- "content": "</s_mother_age>",
301
- "lstrip": false,
302
- "normalized": false,
303
- "rstrip": false,
304
- "single_word": false,
305
- "special": true
306
- },
307
- "57555": {
308
- "content": "</s_father_nationalty>",
309
- "lstrip": false,
310
- "normalized": false,
311
- "rstrip": false,
312
- "single_word": false,
313
- "special": true
314
- },
315
- "57556": {
316
- "content": "</s_mother-nationality>",
317
- "lstrip": false,
318
- "normalized": false,
319
- "rstrip": false,
320
- "single_word": false,
321
- "special": true
322
- },
323
- "57557": {
324
- "content": "<s_child_first_name>",
325
- "lstrip": false,
326
- "normalized": false,
327
- "rstrip": false,
328
- "single_word": false,
329
- "special": true
330
- },
331
- "57558": {
332
- "content": "</s_mother_middle_name>",
333
- "lstrip": false,
334
- "normalized": false,
335
- "rstrip": false,
336
- "single_word": false,
337
- "special": true
338
- },
339
- "57559": {
340
- "content": "<s_father_middle_name>",
341
- "lstrip": false,
342
- "normalized": false,
343
- "rstrip": false,
344
- "single_word": false,
345
- "special": true
346
- },
347
- "57560": {
348
- "content": "</s_father_race>",
349
- "lstrip": false,
350
- "normalized": false,
351
- "rstrip": false,
352
- "single_word": false,
353
- "special": true
354
- },
355
- "57561": {
356
- "content": "<s_father_occupation>",
357
- "lstrip": false,
358
- "normalized": false,
359
- "rstrip": false,
360
- "single_word": false,
361
- "special": true
362
- },
363
- "57562": {
364
- "content": "</s_father_last_name>",
365
- "lstrip": false,
366
- "normalized": false,
367
- "rstrip": false,
368
- "single_word": false,
369
- "special": true
370
- },
371
- "57563": {
372
- "content": "</s_father_birthplace>",
373
- "lstrip": false,
374
- "normalized": false,
375
- "rstrip": false,
376
- "single_word": false,
377
- "special": true
378
- },
379
- "57564": {
380
- "content": "<s_father_religion>",
381
- "lstrip": false,
382
- "normalized": false,
383
- "rstrip": false,
384
- "single_word": false,
385
- "special": true
386
- },
387
- "57565": {
388
- "content": "</s_child_middle_name>",
389
- "lstrip": false,
390
- "normalized": false,
391
- "rstrip": false,
392
- "single_word": false,
393
- "special": true
394
- },
395
- "57566": {
396
- "content": "</s_mother_nationalty>",
397
- "lstrip": false,
398
- "normalized": false,
399
- "rstrip": false,
400
- "single_word": false,
401
- "special": true
402
- },
403
- "57567": {
404
- "content": "<s_mother_age>",
405
- "lstrip": false,
406
- "normalized": false,
407
- "rstrip": false,
408
- "single_word": false,
409
- "special": true
410
- },
411
- "57568": {
412
- "content": "</s_father_occupaation>",
413
- "lstrip": false,
414
- "normalized": false,
415
- "rstrip": false,
416
- "single_word": false,
417
- "special": true
418
- },
419
- "57569": {
420
- "content": "<s_mother_nationalty>",
421
- "lstrip": false,
422
- "normalized": false,
423
- "rstrip": false,
424
- "single_word": false,
425
- "special": true
426
- },
427
- "57570": {
428
- "content": "</s_DOB>",
429
- "lstrip": false,
430
- "normalized": false,
431
- "rstrip": false,
432
- "single_word": false,
433
- "special": true
434
- },
435
- "57571": {
436
- "content": "<s_father_nationalty>",
437
- "lstrip": false,
438
- "normalized": false,
439
- "rstrip": false,
440
- "single_word": false,
441
- "special": true
442
- },
443
- "57572": {
444
- "content": "<s_mother_first_name>",
445
- "lstrip": false,
446
- "normalized": false,
447
- "rstrip": false,
448
- "single_word": false,
449
- "special": true
450
- },
451
- "57573": {
452
- "content": "<s_father_birthplace>",
453
- "lstrip": false,
454
- "normalized": false,
455
- "rstrip": false,
456
- "single_word": false,
457
- "special": true
458
- },
459
- "57574": {
460
- "content": "</s_child_last_name>",
461
- "lstrip": false,
462
- "normalized": false,
463
- "rstrip": false,
464
- "single_word": false,
465
- "special": true
466
- },
467
- "57575": {
468
- "content": "</s_mother_religion>",
469
- "lstrip": false,
470
- "normalized": false,
471
- "rstrip": false,
472
- "single_word": false,
473
- "special": true
474
- },
475
- "57576": {
476
- "content": "</s_province>",
477
- "lstrip": false,
478
- "normalized": false,
479
- "rstrip": false,
480
- "single_word": false,
481
- "special": true
482
- },
483
- "57577": {
484
- "content": "<s_father_age>",
485
- "lstrip": false,
486
- "normalized": false,
487
- "rstrip": false,
488
- "single_word": false,
489
- "special": true
490
- },
491
- "57578": {
492
- "content": "</s_father_middle_name>",
493
  "lstrip": false,
494
  "normalized": false,
495
  "rstrip": false,
@@ -498,62 +194,24 @@
498
  }
499
  },
500
  "additional_special_tokens": [
501
- "<s_province>",
502
- "<s_mother_last_name>",
503
- "<s_father_last_name>",
504
- "</s_father_religion>",
505
- "<s_mother_nationality>",
506
- "</s_father_first_name>",
507
- "</s_father_occupation>",
508
- "</s_child_first_name>",
509
- "<s_mother_race>",
510
- "</s_mother_race>",
511
- "</s_city>",
512
- "<s_mother_religion>",
513
- "</s_mother_nationality>",
514
- "<s_father_first_name>",
515
- "<s_father_nationality>",
516
- "<s_mother-nationality>",
517
- "<s_father_occupaation>",
518
- "<s_mother_middle_name>",
519
- "</s_father_age>",
520
- "<s_child_last_name>",
521
- "<s_child_middle_name>",
522
- "<s_DOB>",
523
- "<s_mother_birthplace>",
524
- "</s_mother_last_name>",
525
- "<s_father_race>",
526
- "<s_city>",
527
- "</s_mother_first_name>",
528
- "<s>",
529
- "</s_mother_birthplace>",
530
- "</s_father_nationality>",
531
- "</s_mother_age>",
532
- "</s_father_nationalty>",
533
- "</s_mother-nationality>",
534
- "<s_child_first_name>",
535
- "</s_mother_middle_name>",
536
- "<s_father_middle_name>",
537
- "</s_father_race>",
538
- "<s_father_occupation>",
539
- "</s_father_last_name>",
540
- "</s_father_birthplace>",
541
- "<s_father_religion>",
542
- "</s_child_middle_name>",
543
- "</s_mother_nationalty>",
544
- "<s_mother_age>",
545
  "</s>",
546
- "</s_father_occupaation>",
547
- "<s_mother_nationalty>",
548
- "</s_DOB>",
549
- "<s_father_nationalty>",
550
- "<s_mother_first_name>",
551
- "<s_father_birthplace>",
552
- "</s_child_last_name>",
553
- "</s_mother_religion>",
554
- "</s_province>",
555
- "<s_father_age>",
556
- "</s_father_middle_name>"
 
 
 
557
  ],
558
  "bos_token": "<s>",
559
  "clean_up_tokenization_spaces": true,
 
65
  "special": true
66
  },
67
  "57525": {
68
+ "content": "<s_mother_name>",
69
  "lstrip": false,
70
  "normalized": false,
71
  "rstrip": false,
 
73
  "special": true
74
  },
75
  "57526": {
76
+ "content": "<s_father_name>",
77
  "lstrip": false,
78
  "normalized": false,
79
  "rstrip": false,
 
81
  "special": true
82
  },
83
  "57527": {
84
+ "content": "<s_first_name>",
85
  "lstrip": false,
86
  "normalized": false,
87
  "rstrip": false,
 
89
  "special": true
90
  },
91
  "57528": {
92
+ "content": "<s_random_number>",
93
  "lstrip": false,
94
  "normalized": false,
95
  "rstrip": false,
 
97
  "special": true
98
  },
99
  "57529": {
100
+ "content": "<s_place_of_birth>",
101
  "lstrip": false,
102
  "normalized": false,
103
  "rstrip": false,
 
105
  "special": true
106
  },
107
  "57530": {
108
+ "content": "</s_address>",
109
  "lstrip": false,
110
  "normalized": false,
111
  "rstrip": false,
 
113
  "special": true
114
  },
115
  "57531": {
116
+ "content": "</s_mother_name>",
117
  "lstrip": false,
118
  "normalized": false,
119
  "rstrip": false,
 
121
  "special": true
122
  },
123
  "57532": {
124
+ "content": "</s_random_number>",
125
  "lstrip": false,
126
  "normalized": false,
127
  "rstrip": false,
 
129
  "special": true
130
  },
131
  "57533": {
132
+ "content": "<s_title>",
133
  "lstrip": false,
134
  "normalized": false,
135
  "rstrip": false,
 
137
  "special": true
138
  },
139
  "57534": {
140
+ "content": "</s_first_name>",
141
  "lstrip": false,
142
  "normalized": false,
143
  "rstrip": false,
 
145
  "special": true
146
  },
147
  "57535": {
148
+ "content": "<s_last_name>",
149
  "lstrip": false,
150
  "normalized": false,
151
  "rstrip": false,
 
153
  "special": true
154
  },
155
  "57536": {
156
+ "content": "</s_title>",
157
  "lstrip": false,
158
  "normalized": false,
159
  "rstrip": false,
 
161
  "special": true
162
  },
163
  "57537": {
164
+ "content": "</s_father_name>",
165
  "lstrip": false,
166
  "normalized": false,
167
  "rstrip": false,
 
169
  "special": true
170
  },
171
  "57538": {
172
+ "content": "<s_address>",
173
  "lstrip": false,
174
  "normalized": false,
175
  "rstrip": false,
 
177
  "special": true
178
  },
179
  "57539": {
180
+ "content": "</s_last_name>",
181
  "lstrip": false,
182
  "normalized": false,
183
  "rstrip": false,
 
185
  "special": true
186
  },
187
  "57540": {
188
+ "content": "</s_place_of_birth>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  "lstrip": false,
190
  "normalized": false,
191
  "rstrip": false,
 
194
  }
195
  },
196
  "additional_special_tokens": [
197
+ "<s_mother_name>",
198
+ "<s_father_name>",
199
+ "<s_first_name>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  "</s>",
201
+ "<s_random_number>",
202
+ "<s_place_of_birth>",
203
+ "</s_address>",
204
+ "</s_mother_name>",
205
+ "</s_random_number>",
206
+ "<s_title>",
207
+ "</s_first_name>",
208
+ "<s_last_name>",
209
+ "</s_title>",
210
+ "</s_father_name>",
211
+ "<s_address>",
212
+ "<s>",
213
+ "</s_last_name>",
214
+ "</s_place_of_birth>"
215
  ],
216
  "bos_token": "<s>",
217
  "clean_up_tokenization_spaces": true,