JandC commited on
Commit
cdf3cc0
1 Parent(s): f1beebd

End of training

Browse files
README.md CHANGED
@@ -40,7 +40,7 @@ The following hyperparameters were used during training:
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
- - num_epochs: 3
44
 
45
  ### Training results
46
 
@@ -51,4 +51,4 @@ The following hyperparameters were used during training:
51
  - Transformers 4.34.0.dev0
52
  - Pytorch 2.0.1+cu118
53
  - Datasets 2.14.5
54
- - Tokenizers 0.13.3
 
40
  - seed: 42
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 5
44
 
45
  ### Training results
46
 
 
51
  - Transformers 4.34.0.dev0
52
  - Pytorch 2.0.1+cu118
53
  - Datasets 2.14.5
54
+ - Tokenizers 0.14.0
added_tokens.json CHANGED
@@ -1,13 +1,5 @@
1
  {
2
- "</s_address>": 57532,
3
- "</s_company>": 57530,
4
- "</s_date>": 57528,
5
- "</s_total>": 57526,
6
- "<s_address>": 57531,
7
- "<s_company>": 57529,
8
- "<s_date>": 57527,
9
  "<s_iitcdip>": 57523,
10
  "<s_synthdog>": 57524,
11
- "<s_total>": 57525,
12
  "<sep/>": 57522
13
  }
 
1
  {
 
 
 
 
 
 
 
2
  "<s_iitcdip>": 57523,
3
  "<s_synthdog>": 57524,
 
4
  "<sep/>": 57522
5
  }
special_tokens_map.json CHANGED
@@ -1,26 +1,14 @@
1
  {
2
  "additional_special_tokens": [
3
- "<s_total>",
4
- "</s_total>",
5
- "<s_date>",
6
- "</s_date>",
7
- "<s_company>",
8
- "</s_company>",
9
- "<s_address>",
10
- "</s_address>",
11
- "<s>",
12
- "</s>"
13
  ],
14
  "bos_token": "<s>",
15
  "cls_token": "<s>",
16
  "eos_token": "</s>",
17
- "mask_token": {
18
- "content": "<mask>",
19
- "lstrip": true,
20
- "normalized": true,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
  "pad_token": "<pad>",
25
  "sep_token": "</s>",
26
  "unk_token": "<unk>"
 
1
  {
2
  "additional_special_tokens": [
3
+ "<s_iitcdip>",
4
+ "<s_synthdog>",
5
+ "</s>",
6
+ "<s>"
 
 
 
 
 
 
7
  ],
8
  "bos_token": "<s>",
9
  "cls_token": "<s>",
10
  "eos_token": "</s>",
11
+ "mask_token": "<mask>",
 
 
 
 
 
 
12
  "pad_token": "<pad>",
13
  "sep_token": "</s>",
14
  "unk_token": "<unk>"
tokenizer.json CHANGED
@@ -21,8 +21,8 @@
21
  "id": 0,
22
  "content": "<s>",
23
  "single_word": false,
24
- "lstrip": false,
25
- "rstrip": false,
26
  "normalized": false,
27
  "special": true
28
  },
@@ -39,8 +39,8 @@
39
  "id": 2,
40
  "content": "</s>",
41
  "single_word": false,
42
- "lstrip": false,
43
- "rstrip": false,
44
  "normalized": false,
45
  "special": true
46
  },
@@ -66,8 +66,8 @@
66
  "id": 57522,
67
  "content": "<sep/>",
68
  "single_word": false,
69
- "lstrip": false,
70
- "rstrip": false,
71
  "normalized": true,
72
  "special": false
73
  },
@@ -75,8 +75,8 @@
75
  "id": 57523,
76
  "content": "<s_iitcdip>",
77
  "single_word": false,
78
- "lstrip": false,
79
- "rstrip": false,
80
  "normalized": false,
81
  "special": true
82
  },
@@ -84,80 +84,8 @@
84
  "id": 57524,
85
  "content": "<s_synthdog>",
86
  "single_word": false,
87
- "lstrip": false,
88
- "rstrip": false,
89
- "normalized": false,
90
- "special": true
91
- },
92
- {
93
- "id": 57525,
94
- "content": "<s_total>",
95
- "single_word": false,
96
- "lstrip": false,
97
- "rstrip": false,
98
- "normalized": false,
99
- "special": true
100
- },
101
- {
102
- "id": 57526,
103
- "content": "</s_total>",
104
- "single_word": false,
105
- "lstrip": false,
106
- "rstrip": false,
107
- "normalized": false,
108
- "special": true
109
- },
110
- {
111
- "id": 57527,
112
- "content": "<s_date>",
113
- "single_word": false,
114
- "lstrip": false,
115
- "rstrip": false,
116
- "normalized": false,
117
- "special": true
118
- },
119
- {
120
- "id": 57528,
121
- "content": "</s_date>",
122
- "single_word": false,
123
- "lstrip": false,
124
- "rstrip": false,
125
- "normalized": false,
126
- "special": true
127
- },
128
- {
129
- "id": 57529,
130
- "content": "<s_company>",
131
- "single_word": false,
132
- "lstrip": false,
133
- "rstrip": false,
134
- "normalized": false,
135
- "special": true
136
- },
137
- {
138
- "id": 57530,
139
- "content": "</s_company>",
140
- "single_word": false,
141
- "lstrip": false,
142
- "rstrip": false,
143
- "normalized": false,
144
- "special": true
145
- },
146
- {
147
- "id": 57531,
148
- "content": "<s_address>",
149
- "single_word": false,
150
- "lstrip": false,
151
- "rstrip": false,
152
- "normalized": false,
153
- "special": true
154
- },
155
- {
156
- "id": 57532,
157
- "content": "</s_address>",
158
- "single_word": false,
159
- "lstrip": false,
160
- "rstrip": false,
161
  "normalized": false,
162
  "special": true
163
  }
@@ -230361,6 +230289,7 @@
230361
  "<mask>",
230362
  0.0
230363
  ]
230364
- ]
 
230365
  }
230366
  }
 
21
  "id": 0,
22
  "content": "<s>",
23
  "single_word": false,
24
+ "lstrip": true,
25
+ "rstrip": true,
26
  "normalized": false,
27
  "special": true
28
  },
 
39
  "id": 2,
40
  "content": "</s>",
41
  "single_word": false,
42
+ "lstrip": true,
43
+ "rstrip": true,
44
  "normalized": false,
45
  "special": true
46
  },
 
66
  "id": 57522,
67
  "content": "<sep/>",
68
  "single_word": false,
69
+ "lstrip": true,
70
+ "rstrip": true,
71
  "normalized": true,
72
  "special": false
73
  },
 
75
  "id": 57523,
76
  "content": "<s_iitcdip>",
77
  "single_word": false,
78
+ "lstrip": true,
79
+ "rstrip": true,
80
  "normalized": false,
81
  "special": true
82
  },
 
84
  "id": 57524,
85
  "content": "<s_synthdog>",
86
  "single_word": false,
87
+ "lstrip": true,
88
+ "rstrip": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  "normalized": false,
90
  "special": true
91
  }
 
230289
  "<mask>",
230290
  0.0
230291
  ]
230292
+ ],
230293
+ "byte_fallback": false
230294
  }
230295
  }
tokenizer_config.json CHANGED
@@ -1,16 +1,81 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": "<s>",
3
  "clean_up_tokenization_spaces": true,
4
  "cls_token": "<s>",
5
  "eos_token": "</s>",
6
- "mask_token": {
7
- "__type": "AddedToken",
8
- "content": "<mask>",
9
- "lstrip": true,
10
- "normalized": true,
11
- "rstrip": false,
12
- "single_word": false
13
- },
14
  "model_max_length": 1000000000000000019884624838656,
15
  "pad_token": "<pad>",
16
  "processor_class": "DonutProcessor",
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": true,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "57521": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "57522": {
44
+ "content": "<sep/>",
45
+ "lstrip": true,
46
+ "normalized": true,
47
+ "rstrip": true,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "57523": {
52
+ "content": "<s_iitcdip>",
53
+ "lstrip": true,
54
+ "normalized": false,
55
+ "rstrip": true,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "57524": {
60
+ "content": "<s_synthdog>",
61
+ "lstrip": true,
62
+ "normalized": false,
63
+ "rstrip": true,
64
+ "single_word": false,
65
+ "special": true
66
+ }
67
+ },
68
+ "additional_special_tokens": [
69
+ "<s_iitcdip>",
70
+ "<s_synthdog>",
71
+ "</s>",
72
+ "<s>"
73
+ ],
74
  "bos_token": "<s>",
75
  "clean_up_tokenization_spaces": true,
76
  "cls_token": "<s>",
77
  "eos_token": "</s>",
78
+ "mask_token": "<mask>",
 
 
 
 
 
 
 
79
  "model_max_length": 1000000000000000019884624838656,
80
  "pad_token": "<pad>",
81
  "processor_class": "DonutProcessor",