woo2 commited on
Commit
2c26ebd
1 Parent(s): e552dc6

End of training

Browse files
added_tokens.json CHANGED
@@ -1,10 +1,26 @@
1
  {
2
  "</s>": 2,
 
 
 
 
 
 
 
 
3
  "<mask>": 57521,
4
  "<pad>": 1,
5
  "<s>": 0,
 
 
 
 
 
6
  "<s_iitcdip>": 57523,
 
7
  "<s_synthdog>": 57524,
 
 
8
  "<sep/>": 57522,
9
  "<unk>": 3
10
  }
 
1
  {
2
  "</s>": 2,
3
+ "</s_address>": 57528,
4
+ "</s_bill>": 57526,
5
+ "</s_company>": 57533,
6
+ "</s_date>": 57527,
7
+ "</s_description>": 57540,
8
+ "</s_item no>": 57537,
9
+ "</s_total amount>": 57529,
10
+ "</s_total>": 57530,
11
  "<mask>": 57521,
12
  "<pad>": 1,
13
  "<s>": 0,
14
+ "<s_address>": 57536,
15
+ "<s_bill>": 57534,
16
+ "<s_company>": 57539,
17
+ "<s_date>": 57525,
18
+ "<s_description>": 57535,
19
  "<s_iitcdip>": 57523,
20
+ "<s_item no>": 57532,
21
  "<s_synthdog>": 57524,
22
+ "<s_total amount>": 57531,
23
+ "<s_total>": 57538,
24
  "<sep/>": 57522,
25
  "<unk>": 3
26
  }
special_tokens_map.json CHANGED
@@ -2,8 +2,22 @@
2
  "additional_special_tokens": [
3
  "<s_iitcdip>",
4
  "<s_synthdog>",
5
- "</s>",
6
- "<s>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  ],
8
  "bos_token": "<s>",
9
  "cls_token": "<s>",
 
2
  "additional_special_tokens": [
3
  "<s_iitcdip>",
4
  "<s_synthdog>",
5
+ "<s_date>",
6
+ "</s_bill>",
7
+ "</s_date>",
8
+ "</s_address>",
9
+ "</s_total amount>",
10
+ "</s_total>",
11
+ "<s_total amount>",
12
+ "<s_item no>",
13
+ "</s_company>",
14
+ "<s_bill>",
15
+ "<s_description>",
16
+ "<s_address>",
17
+ "</s_item no>",
18
+ "<s_total>",
19
+ "<s_company>",
20
+ "</s_description>"
21
  ],
22
  "bos_token": "<s>",
23
  "cls_token": "<s>",
tokenizer.json CHANGED
@@ -21,8 +21,8 @@
21
  "id": 0,
22
  "content": "<s>",
23
  "single_word": false,
24
- "lstrip": true,
25
- "rstrip": true,
26
  "normalized": false,
27
  "special": true
28
  },
@@ -39,8 +39,8 @@
39
  "id": 2,
40
  "content": "</s>",
41
  "single_word": false,
42
- "lstrip": true,
43
- "rstrip": true,
44
  "normalized": false,
45
  "special": true
46
  },
@@ -88,6 +88,150 @@
88
  "rstrip": true,
89
  "normalized": false,
90
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  }
92
  ],
93
  "normalizer": {
 
21
  "id": 0,
22
  "content": "<s>",
23
  "single_word": false,
24
+ "lstrip": false,
25
+ "rstrip": false,
26
  "normalized": false,
27
  "special": true
28
  },
 
39
  "id": 2,
40
  "content": "</s>",
41
  "single_word": false,
42
+ "lstrip": false,
43
+ "rstrip": false,
44
  "normalized": false,
45
  "special": true
46
  },
 
88
  "rstrip": true,
89
  "normalized": false,
90
  "special": true
91
+ },
92
+ {
93
+ "id": 57525,
94
+ "content": "<s_date>",
95
+ "single_word": false,
96
+ "lstrip": true,
97
+ "rstrip": true,
98
+ "normalized": false,
99
+ "special": true
100
+ },
101
+ {
102
+ "id": 57526,
103
+ "content": "</s_bill>",
104
+ "single_word": false,
105
+ "lstrip": true,
106
+ "rstrip": true,
107
+ "normalized": false,
108
+ "special": true
109
+ },
110
+ {
111
+ "id": 57527,
112
+ "content": "</s_date>",
113
+ "single_word": false,
114
+ "lstrip": true,
115
+ "rstrip": true,
116
+ "normalized": false,
117
+ "special": true
118
+ },
119
+ {
120
+ "id": 57528,
121
+ "content": "</s_address>",
122
+ "single_word": false,
123
+ "lstrip": true,
124
+ "rstrip": true,
125
+ "normalized": false,
126
+ "special": true
127
+ },
128
+ {
129
+ "id": 57529,
130
+ "content": "</s_total amount>",
131
+ "single_word": false,
132
+ "lstrip": true,
133
+ "rstrip": true,
134
+ "normalized": false,
135
+ "special": true
136
+ },
137
+ {
138
+ "id": 57530,
139
+ "content": "</s_total>",
140
+ "single_word": false,
141
+ "lstrip": true,
142
+ "rstrip": true,
143
+ "normalized": false,
144
+ "special": true
145
+ },
146
+ {
147
+ "id": 57531,
148
+ "content": "<s_total amount>",
149
+ "single_word": false,
150
+ "lstrip": true,
151
+ "rstrip": true,
152
+ "normalized": false,
153
+ "special": true
154
+ },
155
+ {
156
+ "id": 57532,
157
+ "content": "<s_item no>",
158
+ "single_word": false,
159
+ "lstrip": true,
160
+ "rstrip": true,
161
+ "normalized": false,
162
+ "special": true
163
+ },
164
+ {
165
+ "id": 57533,
166
+ "content": "</s_company>",
167
+ "single_word": false,
168
+ "lstrip": true,
169
+ "rstrip": true,
170
+ "normalized": false,
171
+ "special": true
172
+ },
173
+ {
174
+ "id": 57534,
175
+ "content": "<s_bill>",
176
+ "single_word": false,
177
+ "lstrip": true,
178
+ "rstrip": true,
179
+ "normalized": false,
180
+ "special": true
181
+ },
182
+ {
183
+ "id": 57535,
184
+ "content": "<s_description>",
185
+ "single_word": false,
186
+ "lstrip": true,
187
+ "rstrip": true,
188
+ "normalized": false,
189
+ "special": true
190
+ },
191
+ {
192
+ "id": 57536,
193
+ "content": "<s_address>",
194
+ "single_word": false,
195
+ "lstrip": true,
196
+ "rstrip": true,
197
+ "normalized": false,
198
+ "special": true
199
+ },
200
+ {
201
+ "id": 57537,
202
+ "content": "</s_item no>",
203
+ "single_word": false,
204
+ "lstrip": true,
205
+ "rstrip": true,
206
+ "normalized": false,
207
+ "special": true
208
+ },
209
+ {
210
+ "id": 57538,
211
+ "content": "<s_total>",
212
+ "single_word": false,
213
+ "lstrip": true,
214
+ "rstrip": true,
215
+ "normalized": false,
216
+ "special": true
217
+ },
218
+ {
219
+ "id": 57539,
220
+ "content": "<s_company>",
221
+ "single_word": false,
222
+ "lstrip": true,
223
+ "rstrip": true,
224
+ "normalized": false,
225
+ "special": true
226
+ },
227
+ {
228
+ "id": 57540,
229
+ "content": "</s_description>",
230
+ "single_word": false,
231
+ "lstrip": true,
232
+ "rstrip": true,
233
+ "normalized": false,
234
+ "special": true
235
  }
236
  ],
237
  "normalizer": {
tokenizer_config.json CHANGED
@@ -2,9 +2,9 @@
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<s>",
5
- "lstrip": true,
6
  "normalized": false,
7
- "rstrip": true,
8
  "single_word": false,
9
  "special": true
10
  },
@@ -18,9 +18,9 @@
18
  },
19
  "2": {
20
  "content": "</s>",
21
- "lstrip": true,
22
  "normalized": false,
23
- "rstrip": true,
24
  "single_word": false,
25
  "special": true
26
  },
@@ -63,13 +63,155 @@
63
  "rstrip": true,
64
  "single_word": false,
65
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  },
68
  "additional_special_tokens": [
69
  "<s_iitcdip>",
70
  "<s_synthdog>",
71
- "</s>",
72
- "<s>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  ],
74
  "bos_token": "<s>",
75
  "clean_up_tokenization_spaces": true,
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<s>",
5
+ "lstrip": false,
6
  "normalized": false,
7
+ "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
 
18
  },
19
  "2": {
20
  "content": "</s>",
21
+ "lstrip": false,
22
  "normalized": false,
23
+ "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
 
63
  "rstrip": true,
64
  "single_word": false,
65
  "special": true
66
+ },
67
+ "57525": {
68
+ "content": "<s_date>",
69
+ "lstrip": true,
70
+ "normalized": false,
71
+ "rstrip": true,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "57526": {
76
+ "content": "</s_bill>",
77
+ "lstrip": true,
78
+ "normalized": false,
79
+ "rstrip": true,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "57527": {
84
+ "content": "</s_date>",
85
+ "lstrip": true,
86
+ "normalized": false,
87
+ "rstrip": true,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "57528": {
92
+ "content": "</s_address>",
93
+ "lstrip": true,
94
+ "normalized": false,
95
+ "rstrip": true,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "57529": {
100
+ "content": "</s_total amount>",
101
+ "lstrip": true,
102
+ "normalized": false,
103
+ "rstrip": true,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "57530": {
108
+ "content": "</s_total>",
109
+ "lstrip": true,
110
+ "normalized": false,
111
+ "rstrip": true,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "57531": {
116
+ "content": "<s_total amount>",
117
+ "lstrip": true,
118
+ "normalized": false,
119
+ "rstrip": true,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "57532": {
124
+ "content": "<s_item no>",
125
+ "lstrip": true,
126
+ "normalized": false,
127
+ "rstrip": true,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "57533": {
132
+ "content": "</s_company>",
133
+ "lstrip": true,
134
+ "normalized": false,
135
+ "rstrip": true,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "57534": {
140
+ "content": "<s_bill>",
141
+ "lstrip": true,
142
+ "normalized": false,
143
+ "rstrip": true,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "57535": {
148
+ "content": "<s_description>",
149
+ "lstrip": true,
150
+ "normalized": false,
151
+ "rstrip": true,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "57536": {
156
+ "content": "<s_address>",
157
+ "lstrip": true,
158
+ "normalized": false,
159
+ "rstrip": true,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "57537": {
164
+ "content": "</s_item no>",
165
+ "lstrip": true,
166
+ "normalized": false,
167
+ "rstrip": true,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "57538": {
172
+ "content": "<s_total>",
173
+ "lstrip": true,
174
+ "normalized": false,
175
+ "rstrip": true,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "57539": {
180
+ "content": "<s_company>",
181
+ "lstrip": true,
182
+ "normalized": false,
183
+ "rstrip": true,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "57540": {
188
+ "content": "</s_description>",
189
+ "lstrip": true,
190
+ "normalized": false,
191
+ "rstrip": true,
192
+ "single_word": false,
193
+ "special": true
194
  }
195
  },
196
  "additional_special_tokens": [
197
  "<s_iitcdip>",
198
  "<s_synthdog>",
199
+ "<s_date>",
200
+ "</s_bill>",
201
+ "</s_date>",
202
+ "</s_address>",
203
+ "</s_total amount>",
204
+ "</s_total>",
205
+ "<s_total amount>",
206
+ "<s_item no>",
207
+ "</s_company>",
208
+ "<s_bill>",
209
+ "<s_description>",
210
+ "<s_address>",
211
+ "</s_item no>",
212
+ "<s_total>",
213
+ "<s_company>",
214
+ "</s_description>"
215
  ],
216
  "bos_token": "<s>",
217
  "clean_up_tokenization_spaces": true,