dima806 commited on
Commit
501530f
1 Parent(s): 25a381b

Upload folder using huggingface_hub

Browse files
checkpoint-12573/config.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "ace of clubs",
13
+ "1": "ace of diamonds",
14
+ "2": "ace of hearts",
15
+ "3": "ace of spades",
16
+ "4": "eight of clubs",
17
+ "5": "eight of diamonds",
18
+ "6": "eight of hearts",
19
+ "7": "eight of spades",
20
+ "8": "five of clubs",
21
+ "9": "five of diamonds",
22
+ "10": "five of hearts",
23
+ "11": "five of spades",
24
+ "12": "four of clubs",
25
+ "13": "four of diamonds",
26
+ "14": "four of hearts",
27
+ "15": "four of spades",
28
+ "16": "jack of clubs",
29
+ "17": "jack of diamonds",
30
+ "18": "jack of hearts",
31
+ "19": "jack of spades",
32
+ "20": "joker",
33
+ "21": "king of clubs",
34
+ "22": "king of diamonds",
35
+ "23": "king of hearts",
36
+ "24": "king of spades",
37
+ "25": "nine of clubs",
38
+ "26": "nine of diamonds",
39
+ "27": "nine of hearts",
40
+ "28": "nine of spades",
41
+ "29": "queen of clubs",
42
+ "30": "queen of diamonds",
43
+ "31": "queen of hearts",
44
+ "32": "queen of spades",
45
+ "33": "seven of clubs",
46
+ "34": "seven of diamonds",
47
+ "35": "seven of hearts",
48
+ "36": "seven of spades",
49
+ "37": "six of clubs",
50
+ "38": "six of diamonds",
51
+ "39": "six of hearts",
52
+ "40": "six of spades",
53
+ "41": "ten of clubs",
54
+ "42": "ten of diamonds",
55
+ "43": "ten of hearts",
56
+ "44": "ten of spades",
57
+ "45": "three of clubs",
58
+ "46": "three of diamonds",
59
+ "47": "three of hearts",
60
+ "48": "three of spades",
61
+ "49": "two of clubs",
62
+ "50": "two of diamonds",
63
+ "51": "two of hearts",
64
+ "52": "two of spades"
65
+ },
66
+ "image_size": 224,
67
+ "initializer_range": 0.02,
68
+ "intermediate_size": 3072,
69
+ "label2id": {
70
+ "ace of clubs": 0,
71
+ "ace of diamonds": 1,
72
+ "ace of hearts": 2,
73
+ "ace of spades": 3,
74
+ "eight of clubs": 4,
75
+ "eight of diamonds": 5,
76
+ "eight of hearts": 6,
77
+ "eight of spades": 7,
78
+ "five of clubs": 8,
79
+ "five of diamonds": 9,
80
+ "five of hearts": 10,
81
+ "five of spades": 11,
82
+ "four of clubs": 12,
83
+ "four of diamonds": 13,
84
+ "four of hearts": 14,
85
+ "four of spades": 15,
86
+ "jack of clubs": 16,
87
+ "jack of diamonds": 17,
88
+ "jack of hearts": 18,
89
+ "jack of spades": 19,
90
+ "joker": 20,
91
+ "king of clubs": 21,
92
+ "king of diamonds": 22,
93
+ "king of hearts": 23,
94
+ "king of spades": 24,
95
+ "nine of clubs": 25,
96
+ "nine of diamonds": 26,
97
+ "nine of hearts": 27,
98
+ "nine of spades": 28,
99
+ "queen of clubs": 29,
100
+ "queen of diamonds": 30,
101
+ "queen of hearts": 31,
102
+ "queen of spades": 32,
103
+ "seven of clubs": 33,
104
+ "seven of diamonds": 34,
105
+ "seven of hearts": 35,
106
+ "seven of spades": 36,
107
+ "six of clubs": 37,
108
+ "six of diamonds": 38,
109
+ "six of hearts": 39,
110
+ "six of spades": 40,
111
+ "ten of clubs": 41,
112
+ "ten of diamonds": 42,
113
+ "ten of hearts": 43,
114
+ "ten of spades": 44,
115
+ "three of clubs": 45,
116
+ "three of diamonds": 46,
117
+ "three of hearts": 47,
118
+ "three of spades": 48,
119
+ "two of clubs": 49,
120
+ "two of diamonds": 50,
121
+ "two of hearts": 51,
122
+ "two of spades": 52
123
+ },
124
+ "layer_norm_eps": 1e-12,
125
+ "model_type": "vit",
126
+ "num_attention_heads": 12,
127
+ "num_channels": 3,
128
+ "num_hidden_layers": 12,
129
+ "patch_size": 16,
130
+ "problem_type": "single_label_classification",
131
+ "qkv_bias": true,
132
+ "torch_dtype": "float32",
133
+ "transformers_version": "4.35.2"
134
+ }
checkpoint-12573/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e67fe27f4a357a2b8bce1a9e565ac9aa096eb5fed005739ce9dd866069f41146
3
+ size 343380860
checkpoint-12573/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60df09a2bc821843db71c08038c9c7a7158ebc3ab78cdc726d3fdc81df26b80
3
+ size 686882181
checkpoint-12573/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-12573/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b08651b8766f027755554427495c802175585671567ae2d8e447cc15e645bcf3
3
+ size 14575
checkpoint-12573/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c79712c2aeb667d92ddfe915471307631ff08b6064be3501a25c1c88d1734e98
3
+ size 627
checkpoint-12573/trainer_state.json ADDED
@@ -0,0 +1,1060 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.2068374156951904,
3
+ "best_model_checkpoint": "card_type_image_detection/checkpoint-12573",
4
+ "epoch": 99.0,
5
+ "eval_steps": 500,
6
+ "global_step": 12573,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.023209876543209877,
14
+ "eval_loss": 3.959347724914551,
15
+ "eval_runtime": 16.3056,
16
+ "eval_samples_per_second": 124.191,
17
+ "eval_steps_per_second": 3.925,
18
+ "step": 127
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.047407407407407405,
23
+ "eval_loss": 3.927157163619995,
24
+ "eval_runtime": 16.3398,
25
+ "eval_samples_per_second": 123.93,
26
+ "eval_steps_per_second": 3.917,
27
+ "step": 254
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_accuracy": 0.0819753086419753,
32
+ "eval_loss": 3.8877668380737305,
33
+ "eval_runtime": 16.2853,
34
+ "eval_samples_per_second": 124.345,
35
+ "eval_steps_per_second": 3.93,
36
+ "step": 381
37
+ },
38
+ {
39
+ "epoch": 3.94,
40
+ "learning_rate": 1.9288537549407114e-06,
41
+ "loss": 3.9022,
42
+ "step": 500
43
+ },
44
+ {
45
+ "epoch": 4.0,
46
+ "eval_accuracy": 0.09876543209876543,
47
+ "eval_loss": 3.8418056964874268,
48
+ "eval_runtime": 16.2864,
49
+ "eval_samples_per_second": 124.337,
50
+ "eval_steps_per_second": 3.93,
51
+ "step": 508
52
+ },
53
+ {
54
+ "epoch": 5.0,
55
+ "eval_accuracy": 0.12,
56
+ "eval_loss": 3.7911477088928223,
57
+ "eval_runtime": 16.2971,
58
+ "eval_samples_per_second": 124.255,
59
+ "eval_steps_per_second": 3.927,
60
+ "step": 635
61
+ },
62
+ {
63
+ "epoch": 6.0,
64
+ "eval_accuracy": 0.1382716049382716,
65
+ "eval_loss": 3.7379021644592285,
66
+ "eval_runtime": 16.4902,
67
+ "eval_samples_per_second": 122.8,
68
+ "eval_steps_per_second": 3.881,
69
+ "step": 762
70
+ },
71
+ {
72
+ "epoch": 7.0,
73
+ "eval_accuracy": 0.1580246913580247,
74
+ "eval_loss": 3.6840596199035645,
75
+ "eval_runtime": 16.2622,
76
+ "eval_samples_per_second": 124.522,
77
+ "eval_steps_per_second": 3.936,
78
+ "step": 889
79
+ },
80
+ {
81
+ "epoch": 7.87,
82
+ "learning_rate": 1.849802371541502e-06,
83
+ "loss": 3.6821,
84
+ "step": 1000
85
+ },
86
+ {
87
+ "epoch": 8.0,
88
+ "eval_accuracy": 0.17777777777777778,
89
+ "eval_loss": 3.6320221424102783,
90
+ "eval_runtime": 16.3988,
91
+ "eval_samples_per_second": 123.485,
92
+ "eval_steps_per_second": 3.903,
93
+ "step": 1016
94
+ },
95
+ {
96
+ "epoch": 9.0,
97
+ "eval_accuracy": 0.19654320987654322,
98
+ "eval_loss": 3.580777883529663,
99
+ "eval_runtime": 16.325,
100
+ "eval_samples_per_second": 124.043,
101
+ "eval_steps_per_second": 3.92,
102
+ "step": 1143
103
+ },
104
+ {
105
+ "epoch": 10.0,
106
+ "eval_accuracy": 0.21580246913580248,
107
+ "eval_loss": 3.5295674800872803,
108
+ "eval_runtime": 16.3625,
109
+ "eval_samples_per_second": 123.758,
110
+ "eval_steps_per_second": 3.911,
111
+ "step": 1270
112
+ },
113
+ {
114
+ "epoch": 11.0,
115
+ "eval_accuracy": 0.23703703703703705,
116
+ "eval_loss": 3.479665994644165,
117
+ "eval_runtime": 16.5252,
118
+ "eval_samples_per_second": 122.54,
119
+ "eval_steps_per_second": 3.873,
120
+ "step": 1397
121
+ },
122
+ {
123
+ "epoch": 11.81,
124
+ "learning_rate": 1.7707509881422924e-06,
125
+ "loss": 3.4599,
126
+ "step": 1500
127
+ },
128
+ {
129
+ "epoch": 12.0,
130
+ "eval_accuracy": 0.24592592592592594,
131
+ "eval_loss": 3.431598663330078,
132
+ "eval_runtime": 16.3048,
133
+ "eval_samples_per_second": 124.197,
134
+ "eval_steps_per_second": 3.925,
135
+ "step": 1524
136
+ },
137
+ {
138
+ "epoch": 13.0,
139
+ "eval_accuracy": 0.25728395061728393,
140
+ "eval_loss": 3.38520884513855,
141
+ "eval_runtime": 16.3716,
142
+ "eval_samples_per_second": 123.69,
143
+ "eval_steps_per_second": 3.909,
144
+ "step": 1651
145
+ },
146
+ {
147
+ "epoch": 14.0,
148
+ "eval_accuracy": 0.2730864197530864,
149
+ "eval_loss": 3.341092348098755,
150
+ "eval_runtime": 16.4208,
151
+ "eval_samples_per_second": 123.319,
152
+ "eval_steps_per_second": 3.897,
153
+ "step": 1778
154
+ },
155
+ {
156
+ "epoch": 15.0,
157
+ "eval_accuracy": 0.291358024691358,
158
+ "eval_loss": 3.299830198287964,
159
+ "eval_runtime": 16.349,
160
+ "eval_samples_per_second": 123.861,
161
+ "eval_steps_per_second": 3.915,
162
+ "step": 1905
163
+ },
164
+ {
165
+ "epoch": 15.75,
166
+ "learning_rate": 1.691699604743083e-06,
167
+ "loss": 3.2748,
168
+ "step": 2000
169
+ },
170
+ {
171
+ "epoch": 16.0,
172
+ "eval_accuracy": 0.305679012345679,
173
+ "eval_loss": 3.2592811584472656,
174
+ "eval_runtime": 16.2664,
175
+ "eval_samples_per_second": 124.49,
176
+ "eval_steps_per_second": 3.934,
177
+ "step": 2032
178
+ },
179
+ {
180
+ "epoch": 17.0,
181
+ "eval_accuracy": 0.32641975308641974,
182
+ "eval_loss": 3.2209341526031494,
183
+ "eval_runtime": 16.4355,
184
+ "eval_samples_per_second": 123.209,
185
+ "eval_steps_per_second": 3.894,
186
+ "step": 2159
187
+ },
188
+ {
189
+ "epoch": 18.0,
190
+ "eval_accuracy": 0.3437037037037037,
191
+ "eval_loss": 3.184448003768921,
192
+ "eval_runtime": 16.3387,
193
+ "eval_samples_per_second": 123.939,
194
+ "eval_steps_per_second": 3.917,
195
+ "step": 2286
196
+ },
197
+ {
198
+ "epoch": 19.0,
199
+ "eval_accuracy": 0.345679012345679,
200
+ "eval_loss": 3.1492130756378174,
201
+ "eval_runtime": 16.2457,
202
+ "eval_samples_per_second": 124.648,
203
+ "eval_steps_per_second": 3.939,
204
+ "step": 2413
205
+ },
206
+ {
207
+ "epoch": 19.69,
208
+ "learning_rate": 1.6126482213438735e-06,
209
+ "loss": 3.1183,
210
+ "step": 2500
211
+ },
212
+ {
213
+ "epoch": 20.0,
214
+ "eval_accuracy": 0.3634567901234568,
215
+ "eval_loss": 3.117238998413086,
216
+ "eval_runtime": 16.3142,
217
+ "eval_samples_per_second": 124.125,
218
+ "eval_steps_per_second": 3.923,
219
+ "step": 2540
220
+ },
221
+ {
222
+ "epoch": 21.0,
223
+ "eval_accuracy": 0.37382716049382714,
224
+ "eval_loss": 3.0856010913848877,
225
+ "eval_runtime": 16.4136,
226
+ "eval_samples_per_second": 123.374,
227
+ "eval_steps_per_second": 3.899,
228
+ "step": 2667
229
+ },
230
+ {
231
+ "epoch": 22.0,
232
+ "eval_accuracy": 0.38469135802469134,
233
+ "eval_loss": 3.0540201663970947,
234
+ "eval_runtime": 16.404,
235
+ "eval_samples_per_second": 123.446,
236
+ "eval_steps_per_second": 3.901,
237
+ "step": 2794
238
+ },
239
+ {
240
+ "epoch": 23.0,
241
+ "eval_accuracy": 0.3916049382716049,
242
+ "eval_loss": 3.025864362716675,
243
+ "eval_runtime": 16.3387,
244
+ "eval_samples_per_second": 123.939,
245
+ "eval_steps_per_second": 3.917,
246
+ "step": 2921
247
+ },
248
+ {
249
+ "epoch": 23.62,
250
+ "learning_rate": 1.533596837944664e-06,
251
+ "loss": 2.9883,
252
+ "step": 3000
253
+ },
254
+ {
255
+ "epoch": 24.0,
256
+ "eval_accuracy": 0.4014814814814815,
257
+ "eval_loss": 2.9977123737335205,
258
+ "eval_runtime": 16.5183,
259
+ "eval_samples_per_second": 122.592,
260
+ "eval_steps_per_second": 3.874,
261
+ "step": 3048
262
+ },
263
+ {
264
+ "epoch": 25.0,
265
+ "eval_accuracy": 0.4103703703703704,
266
+ "eval_loss": 2.970736503601074,
267
+ "eval_runtime": 16.4478,
268
+ "eval_samples_per_second": 123.117,
269
+ "eval_steps_per_second": 3.891,
270
+ "step": 3175
271
+ },
272
+ {
273
+ "epoch": 26.0,
274
+ "eval_accuracy": 0.42962962962962964,
275
+ "eval_loss": 2.9437484741210938,
276
+ "eval_runtime": 16.3805,
277
+ "eval_samples_per_second": 123.623,
278
+ "eval_steps_per_second": 3.907,
279
+ "step": 3302
280
+ },
281
+ {
282
+ "epoch": 27.0,
283
+ "eval_accuracy": 0.4365432098765432,
284
+ "eval_loss": 2.918938636779785,
285
+ "eval_runtime": 16.4584,
286
+ "eval_samples_per_second": 123.038,
287
+ "eval_steps_per_second": 3.889,
288
+ "step": 3429
289
+ },
290
+ {
291
+ "epoch": 27.56,
292
+ "learning_rate": 1.4545454545454544e-06,
293
+ "loss": 2.8743,
294
+ "step": 3500
295
+ },
296
+ {
297
+ "epoch": 28.0,
298
+ "eval_accuracy": 0.44592592592592595,
299
+ "eval_loss": 2.8959240913391113,
300
+ "eval_runtime": 16.8148,
301
+ "eval_samples_per_second": 120.43,
302
+ "eval_steps_per_second": 3.806,
303
+ "step": 3556
304
+ },
305
+ {
306
+ "epoch": 29.0,
307
+ "eval_accuracy": 0.4597530864197531,
308
+ "eval_loss": 2.8701860904693604,
309
+ "eval_runtime": 16.5161,
310
+ "eval_samples_per_second": 122.607,
311
+ "eval_steps_per_second": 3.875,
312
+ "step": 3683
313
+ },
314
+ {
315
+ "epoch": 30.0,
316
+ "eval_accuracy": 0.46469135802469136,
317
+ "eval_loss": 2.848033905029297,
318
+ "eval_runtime": 16.6005,
319
+ "eval_samples_per_second": 121.984,
320
+ "eval_steps_per_second": 3.855,
321
+ "step": 3810
322
+ },
323
+ {
324
+ "epoch": 31.0,
325
+ "eval_accuracy": 0.4730864197530864,
326
+ "eval_loss": 2.825526714324951,
327
+ "eval_runtime": 16.5596,
328
+ "eval_samples_per_second": 122.286,
329
+ "eval_steps_per_second": 3.865,
330
+ "step": 3937
331
+ },
332
+ {
333
+ "epoch": 31.5,
334
+ "learning_rate": 1.375494071146245e-06,
335
+ "loss": 2.773,
336
+ "step": 4000
337
+ },
338
+ {
339
+ "epoch": 32.0,
340
+ "eval_accuracy": 0.47802469135802467,
341
+ "eval_loss": 2.804414749145508,
342
+ "eval_runtime": 16.6816,
343
+ "eval_samples_per_second": 121.391,
344
+ "eval_steps_per_second": 3.837,
345
+ "step": 4064
346
+ },
347
+ {
348
+ "epoch": 33.0,
349
+ "eval_accuracy": 0.47555555555555556,
350
+ "eval_loss": 2.7853612899780273,
351
+ "eval_runtime": 16.7524,
352
+ "eval_samples_per_second": 120.878,
353
+ "eval_steps_per_second": 3.82,
354
+ "step": 4191
355
+ },
356
+ {
357
+ "epoch": 34.0,
358
+ "eval_accuracy": 0.4918518518518519,
359
+ "eval_loss": 2.7612335681915283,
360
+ "eval_runtime": 16.5289,
361
+ "eval_samples_per_second": 122.513,
362
+ "eval_steps_per_second": 3.872,
363
+ "step": 4318
364
+ },
365
+ {
366
+ "epoch": 35.0,
367
+ "eval_accuracy": 0.497283950617284,
368
+ "eval_loss": 2.741335391998291,
369
+ "eval_runtime": 16.6912,
370
+ "eval_samples_per_second": 121.321,
371
+ "eval_steps_per_second": 3.834,
372
+ "step": 4445
373
+ },
374
+ {
375
+ "epoch": 35.43,
376
+ "learning_rate": 1.2964426877470356e-06,
377
+ "loss": 2.6822,
378
+ "step": 4500
379
+ },
380
+ {
381
+ "epoch": 36.0,
382
+ "eval_accuracy": 0.49876543209876545,
383
+ "eval_loss": 2.724756956100464,
384
+ "eval_runtime": 16.517,
385
+ "eval_samples_per_second": 122.601,
386
+ "eval_steps_per_second": 3.875,
387
+ "step": 4572
388
+ },
389
+ {
390
+ "epoch": 37.0,
391
+ "eval_accuracy": 0.49777777777777776,
392
+ "eval_loss": 2.7084524631500244,
393
+ "eval_runtime": 16.6709,
394
+ "eval_samples_per_second": 121.469,
395
+ "eval_steps_per_second": 3.839,
396
+ "step": 4699
397
+ },
398
+ {
399
+ "epoch": 38.0,
400
+ "eval_accuracy": 0.5150617283950617,
401
+ "eval_loss": 2.6876704692840576,
402
+ "eval_runtime": 16.6152,
403
+ "eval_samples_per_second": 121.876,
404
+ "eval_steps_per_second": 3.852,
405
+ "step": 4826
406
+ },
407
+ {
408
+ "epoch": 39.0,
409
+ "eval_accuracy": 0.5106172839506172,
410
+ "eval_loss": 2.6705734729766846,
411
+ "eval_runtime": 16.5075,
412
+ "eval_samples_per_second": 122.671,
413
+ "eval_steps_per_second": 3.877,
414
+ "step": 4953
415
+ },
416
+ {
417
+ "epoch": 39.37,
418
+ "learning_rate": 1.217391304347826e-06,
419
+ "loss": 2.5988,
420
+ "step": 5000
421
+ },
422
+ {
423
+ "epoch": 40.0,
424
+ "eval_accuracy": 0.5116049382716049,
425
+ "eval_loss": 2.655836343765259,
426
+ "eval_runtime": 16.7707,
427
+ "eval_samples_per_second": 120.746,
428
+ "eval_steps_per_second": 3.816,
429
+ "step": 5080
430
+ },
431
+ {
432
+ "epoch": 41.0,
433
+ "eval_accuracy": 0.5125925925925926,
434
+ "eval_loss": 2.6394340991973877,
435
+ "eval_runtime": 16.7747,
436
+ "eval_samples_per_second": 120.718,
437
+ "eval_steps_per_second": 3.815,
438
+ "step": 5207
439
+ },
440
+ {
441
+ "epoch": 42.0,
442
+ "eval_accuracy": 0.5209876543209877,
443
+ "eval_loss": 2.623532295227051,
444
+ "eval_runtime": 16.7009,
445
+ "eval_samples_per_second": 121.251,
446
+ "eval_steps_per_second": 3.832,
447
+ "step": 5334
448
+ },
449
+ {
450
+ "epoch": 43.0,
451
+ "eval_accuracy": 0.5249382716049382,
452
+ "eval_loss": 2.6078622341156006,
453
+ "eval_runtime": 16.3629,
454
+ "eval_samples_per_second": 123.756,
455
+ "eval_steps_per_second": 3.911,
456
+ "step": 5461
457
+ },
458
+ {
459
+ "epoch": 43.31,
460
+ "learning_rate": 1.1383399209486167e-06,
461
+ "loss": 2.5306,
462
+ "step": 5500
463
+ },
464
+ {
465
+ "epoch": 44.0,
466
+ "eval_accuracy": 0.5293827160493827,
467
+ "eval_loss": 2.592928647994995,
468
+ "eval_runtime": 16.7766,
469
+ "eval_samples_per_second": 120.704,
470
+ "eval_steps_per_second": 3.815,
471
+ "step": 5588
472
+ },
473
+ {
474
+ "epoch": 45.0,
475
+ "eval_accuracy": 0.5338271604938272,
476
+ "eval_loss": 2.575329542160034,
477
+ "eval_runtime": 17.0334,
478
+ "eval_samples_per_second": 118.884,
479
+ "eval_steps_per_second": 3.757,
480
+ "step": 5715
481
+ },
482
+ {
483
+ "epoch": 46.0,
484
+ "eval_accuracy": 0.542716049382716,
485
+ "eval_loss": 2.5605058670043945,
486
+ "eval_runtime": 16.7153,
487
+ "eval_samples_per_second": 121.146,
488
+ "eval_steps_per_second": 3.829,
489
+ "step": 5842
490
+ },
491
+ {
492
+ "epoch": 47.0,
493
+ "eval_accuracy": 0.548641975308642,
494
+ "eval_loss": 2.548151969909668,
495
+ "eval_runtime": 16.3632,
496
+ "eval_samples_per_second": 123.754,
497
+ "eval_steps_per_second": 3.911,
498
+ "step": 5969
499
+ },
500
+ {
501
+ "epoch": 47.24,
502
+ "learning_rate": 1.0592885375494072e-06,
503
+ "loss": 2.4638,
504
+ "step": 6000
505
+ },
506
+ {
507
+ "epoch": 48.0,
508
+ "eval_accuracy": 0.5530864197530864,
509
+ "eval_loss": 2.5332412719726562,
510
+ "eval_runtime": 16.2257,
511
+ "eval_samples_per_second": 124.802,
512
+ "eval_steps_per_second": 3.944,
513
+ "step": 6096
514
+ },
515
+ {
516
+ "epoch": 49.0,
517
+ "eval_accuracy": 0.5535802469135802,
518
+ "eval_loss": 2.521514654159546,
519
+ "eval_runtime": 16.2855,
520
+ "eval_samples_per_second": 124.344,
521
+ "eval_steps_per_second": 3.93,
522
+ "step": 6223
523
+ },
524
+ {
525
+ "epoch": 50.0,
526
+ "eval_accuracy": 0.5619753086419753,
527
+ "eval_loss": 2.507723569869995,
528
+ "eval_runtime": 16.2945,
529
+ "eval_samples_per_second": 124.275,
530
+ "eval_steps_per_second": 3.928,
531
+ "step": 6350
532
+ },
533
+ {
534
+ "epoch": 51.0,
535
+ "eval_accuracy": 0.5595061728395062,
536
+ "eval_loss": 2.4946696758270264,
537
+ "eval_runtime": 16.2486,
538
+ "eval_samples_per_second": 124.626,
539
+ "eval_steps_per_second": 3.939,
540
+ "step": 6477
541
+ },
542
+ {
543
+ "epoch": 51.18,
544
+ "learning_rate": 9.802371541501976e-07,
545
+ "loss": 2.405,
546
+ "step": 6500
547
+ },
548
+ {
549
+ "epoch": 52.0,
550
+ "eval_accuracy": 0.5580246913580247,
551
+ "eval_loss": 2.4835174083709717,
552
+ "eval_runtime": 16.5375,
553
+ "eval_samples_per_second": 122.449,
554
+ "eval_steps_per_second": 3.87,
555
+ "step": 6604
556
+ },
557
+ {
558
+ "epoch": 53.0,
559
+ "eval_accuracy": 0.5585185185185185,
560
+ "eval_loss": 2.473555088043213,
561
+ "eval_runtime": 17.2437,
562
+ "eval_samples_per_second": 117.434,
563
+ "eval_steps_per_second": 3.711,
564
+ "step": 6731
565
+ },
566
+ {
567
+ "epoch": 54.0,
568
+ "eval_accuracy": 0.5644444444444444,
569
+ "eval_loss": 2.4599006175994873,
570
+ "eval_runtime": 17.2017,
571
+ "eval_samples_per_second": 117.721,
572
+ "eval_steps_per_second": 3.721,
573
+ "step": 6858
574
+ },
575
+ {
576
+ "epoch": 55.0,
577
+ "eval_accuracy": 0.5708641975308641,
578
+ "eval_loss": 2.444490432739258,
579
+ "eval_runtime": 16.4767,
580
+ "eval_samples_per_second": 122.901,
581
+ "eval_steps_per_second": 3.884,
582
+ "step": 6985
583
+ },
584
+ {
585
+ "epoch": 55.12,
586
+ "learning_rate": 9.011857707509881e-07,
587
+ "loss": 2.3499,
588
+ "step": 7000
589
+ },
590
+ {
591
+ "epoch": 56.0,
592
+ "eval_accuracy": 0.5718518518518518,
593
+ "eval_loss": 2.43546199798584,
594
+ "eval_runtime": 17.1098,
595
+ "eval_samples_per_second": 118.353,
596
+ "eval_steps_per_second": 3.741,
597
+ "step": 7112
598
+ },
599
+ {
600
+ "epoch": 57.0,
601
+ "eval_accuracy": 0.5738271604938272,
602
+ "eval_loss": 2.424128532409668,
603
+ "eval_runtime": 16.3356,
604
+ "eval_samples_per_second": 123.962,
605
+ "eval_steps_per_second": 3.918,
606
+ "step": 7239
607
+ },
608
+ {
609
+ "epoch": 58.0,
610
+ "eval_accuracy": 0.5679012345679012,
611
+ "eval_loss": 2.4162774085998535,
612
+ "eval_runtime": 16.3093,
613
+ "eval_samples_per_second": 124.162,
614
+ "eval_steps_per_second": 3.924,
615
+ "step": 7366
616
+ },
617
+ {
618
+ "epoch": 59.0,
619
+ "eval_accuracy": 0.5733333333333334,
620
+ "eval_loss": 2.404547929763794,
621
+ "eval_runtime": 16.3838,
622
+ "eval_samples_per_second": 123.598,
623
+ "eval_steps_per_second": 3.906,
624
+ "step": 7493
625
+ },
626
+ {
627
+ "epoch": 59.06,
628
+ "learning_rate": 8.221343873517787e-07,
629
+ "loss": 2.3047,
630
+ "step": 7500
631
+ },
632
+ {
633
+ "epoch": 60.0,
634
+ "eval_accuracy": 0.5767901234567901,
635
+ "eval_loss": 2.3955578804016113,
636
+ "eval_runtime": 16.4961,
637
+ "eval_samples_per_second": 122.756,
638
+ "eval_steps_per_second": 3.88,
639
+ "step": 7620
640
+ },
641
+ {
642
+ "epoch": 61.0,
643
+ "eval_accuracy": 0.5733333333333334,
644
+ "eval_loss": 2.3882274627685547,
645
+ "eval_runtime": 16.5941,
646
+ "eval_samples_per_second": 122.031,
647
+ "eval_steps_per_second": 3.857,
648
+ "step": 7747
649
+ },
650
+ {
651
+ "epoch": 62.0,
652
+ "eval_accuracy": 0.5758024691358025,
653
+ "eval_loss": 2.3768720626831055,
654
+ "eval_runtime": 16.3063,
655
+ "eval_samples_per_second": 124.185,
656
+ "eval_steps_per_second": 3.925,
657
+ "step": 7874
658
+ },
659
+ {
660
+ "epoch": 62.99,
661
+ "learning_rate": 7.430830039525692e-07,
662
+ "loss": 2.2616,
663
+ "step": 8000
664
+ },
665
+ {
666
+ "epoch": 63.0,
667
+ "eval_accuracy": 0.5802469135802469,
668
+ "eval_loss": 2.3649182319641113,
669
+ "eval_runtime": 16.4383,
670
+ "eval_samples_per_second": 123.188,
671
+ "eval_steps_per_second": 3.893,
672
+ "step": 8001
673
+ },
674
+ {
675
+ "epoch": 64.0,
676
+ "eval_accuracy": 0.5782716049382716,
677
+ "eval_loss": 2.3605239391326904,
678
+ "eval_runtime": 16.5048,
679
+ "eval_samples_per_second": 122.691,
680
+ "eval_steps_per_second": 3.878,
681
+ "step": 8128
682
+ },
683
+ {
684
+ "epoch": 65.0,
685
+ "eval_accuracy": 0.5881481481481482,
686
+ "eval_loss": 2.348599433898926,
687
+ "eval_runtime": 16.6986,
688
+ "eval_samples_per_second": 121.268,
689
+ "eval_steps_per_second": 3.833,
690
+ "step": 8255
691
+ },
692
+ {
693
+ "epoch": 66.0,
694
+ "eval_accuracy": 0.5891358024691358,
695
+ "eval_loss": 2.3403825759887695,
696
+ "eval_runtime": 16.6528,
697
+ "eval_samples_per_second": 121.601,
698
+ "eval_steps_per_second": 3.843,
699
+ "step": 8382
700
+ },
701
+ {
702
+ "epoch": 66.93,
703
+ "learning_rate": 6.640316205533597e-07,
704
+ "loss": 2.224,
705
+ "step": 8500
706
+ },
707
+ {
708
+ "epoch": 67.0,
709
+ "eval_accuracy": 0.5876543209876544,
710
+ "eval_loss": 2.333630323410034,
711
+ "eval_runtime": 16.323,
712
+ "eval_samples_per_second": 124.058,
713
+ "eval_steps_per_second": 3.921,
714
+ "step": 8509
715
+ },
716
+ {
717
+ "epoch": 68.0,
718
+ "eval_accuracy": 0.5881481481481482,
719
+ "eval_loss": 2.3267500400543213,
720
+ "eval_runtime": 16.3769,
721
+ "eval_samples_per_second": 123.649,
722
+ "eval_steps_per_second": 3.908,
723
+ "step": 8636
724
+ },
725
+ {
726
+ "epoch": 69.0,
727
+ "eval_accuracy": 0.5871604938271605,
728
+ "eval_loss": 2.3214409351348877,
729
+ "eval_runtime": 16.4152,
730
+ "eval_samples_per_second": 123.361,
731
+ "eval_steps_per_second": 3.899,
732
+ "step": 8763
733
+ },
734
+ {
735
+ "epoch": 70.0,
736
+ "eval_accuracy": 0.5920987654320987,
737
+ "eval_loss": 2.3111300468444824,
738
+ "eval_runtime": 16.7406,
739
+ "eval_samples_per_second": 120.963,
740
+ "eval_steps_per_second": 3.823,
741
+ "step": 8890
742
+ },
743
+ {
744
+ "epoch": 70.87,
745
+ "learning_rate": 5.849802371541502e-07,
746
+ "loss": 2.1885,
747
+ "step": 9000
748
+ },
749
+ {
750
+ "epoch": 71.0,
751
+ "eval_accuracy": 0.5906172839506173,
752
+ "eval_loss": 2.304325580596924,
753
+ "eval_runtime": 16.5534,
754
+ "eval_samples_per_second": 122.331,
755
+ "eval_steps_per_second": 3.866,
756
+ "step": 9017
757
+ },
758
+ {
759
+ "epoch": 72.0,
760
+ "eval_accuracy": 0.5960493827160493,
761
+ "eval_loss": 2.297461986541748,
762
+ "eval_runtime": 16.4435,
763
+ "eval_samples_per_second": 123.149,
764
+ "eval_steps_per_second": 3.892,
765
+ "step": 9144
766
+ },
767
+ {
768
+ "epoch": 73.0,
769
+ "eval_accuracy": 0.5871604938271605,
770
+ "eval_loss": 2.296058177947998,
771
+ "eval_runtime": 16.3403,
772
+ "eval_samples_per_second": 123.927,
773
+ "eval_steps_per_second": 3.917,
774
+ "step": 9271
775
+ },
776
+ {
777
+ "epoch": 74.0,
778
+ "eval_accuracy": 0.5960493827160493,
779
+ "eval_loss": 2.2856781482696533,
780
+ "eval_runtime": 16.6488,
781
+ "eval_samples_per_second": 121.63,
782
+ "eval_steps_per_second": 3.844,
783
+ "step": 9398
784
+ },
785
+ {
786
+ "epoch": 74.8,
787
+ "learning_rate": 5.059288537549406e-07,
788
+ "loss": 2.1603,
789
+ "step": 9500
790
+ },
791
+ {
792
+ "epoch": 75.0,
793
+ "eval_accuracy": 0.5965432098765432,
794
+ "eval_loss": 2.2791759967803955,
795
+ "eval_runtime": 17.0015,
796
+ "eval_samples_per_second": 119.107,
797
+ "eval_steps_per_second": 3.764,
798
+ "step": 9525
799
+ },
800
+ {
801
+ "epoch": 76.0,
802
+ "eval_accuracy": 0.5955555555555555,
803
+ "eval_loss": 2.2750935554504395,
804
+ "eval_runtime": 16.6586,
805
+ "eval_samples_per_second": 121.559,
806
+ "eval_steps_per_second": 3.842,
807
+ "step": 9652
808
+ },
809
+ {
810
+ "epoch": 77.0,
811
+ "eval_accuracy": 0.5965432098765432,
812
+ "eval_loss": 2.268305540084839,
813
+ "eval_runtime": 16.6007,
814
+ "eval_samples_per_second": 121.983,
815
+ "eval_steps_per_second": 3.855,
816
+ "step": 9779
817
+ },
818
+ {
819
+ "epoch": 78.0,
820
+ "eval_accuracy": 0.6049382716049383,
821
+ "eval_loss": 2.2604947090148926,
822
+ "eval_runtime": 16.3684,
823
+ "eval_samples_per_second": 123.714,
824
+ "eval_steps_per_second": 3.91,
825
+ "step": 9906
826
+ },
827
+ {
828
+ "epoch": 78.74,
829
+ "learning_rate": 4.268774703557312e-07,
830
+ "loss": 2.1357,
831
+ "step": 10000
832
+ },
833
+ {
834
+ "epoch": 79.0,
835
+ "eval_accuracy": 0.6064197530864197,
836
+ "eval_loss": 2.254981756210327,
837
+ "eval_runtime": 16.4537,
838
+ "eval_samples_per_second": 123.072,
839
+ "eval_steps_per_second": 3.89,
840
+ "step": 10033
841
+ },
842
+ {
843
+ "epoch": 80.0,
844
+ "eval_accuracy": 0.6034567901234568,
845
+ "eval_loss": 2.2524936199188232,
846
+ "eval_runtime": 16.3798,
847
+ "eval_samples_per_second": 123.628,
848
+ "eval_steps_per_second": 3.907,
849
+ "step": 10160
850
+ },
851
+ {
852
+ "epoch": 81.0,
853
+ "eval_accuracy": 0.6054320987654321,
854
+ "eval_loss": 2.2475554943084717,
855
+ "eval_runtime": 16.2339,
856
+ "eval_samples_per_second": 124.739,
857
+ "eval_steps_per_second": 3.942,
858
+ "step": 10287
859
+ },
860
+ {
861
+ "epoch": 82.0,
862
+ "eval_accuracy": 0.6004938271604938,
863
+ "eval_loss": 2.244779586791992,
864
+ "eval_runtime": 16.19,
865
+ "eval_samples_per_second": 125.077,
866
+ "eval_steps_per_second": 3.953,
867
+ "step": 10414
868
+ },
869
+ {
870
+ "epoch": 82.68,
871
+ "learning_rate": 3.478260869565217e-07,
872
+ "loss": 2.1153,
873
+ "step": 10500
874
+ },
875
+ {
876
+ "epoch": 83.0,
877
+ "eval_accuracy": 0.6009876543209877,
878
+ "eval_loss": 2.242079973220825,
879
+ "eval_runtime": 16.2447,
880
+ "eval_samples_per_second": 124.656,
881
+ "eval_steps_per_second": 3.94,
882
+ "step": 10541
883
+ },
884
+ {
885
+ "epoch": 84.0,
886
+ "eval_accuracy": 0.6009876543209877,
887
+ "eval_loss": 2.237128973007202,
888
+ "eval_runtime": 16.3552,
889
+ "eval_samples_per_second": 123.814,
890
+ "eval_steps_per_second": 3.913,
891
+ "step": 10668
892
+ },
893
+ {
894
+ "epoch": 85.0,
895
+ "eval_accuracy": 0.6034567901234568,
896
+ "eval_loss": 2.2331736087799072,
897
+ "eval_runtime": 16.4131,
898
+ "eval_samples_per_second": 123.377,
899
+ "eval_steps_per_second": 3.899,
900
+ "step": 10795
901
+ },
902
+ {
903
+ "epoch": 86.0,
904
+ "eval_accuracy": 0.6064197530864197,
905
+ "eval_loss": 2.230192184448242,
906
+ "eval_runtime": 16.263,
907
+ "eval_samples_per_second": 124.516,
908
+ "eval_steps_per_second": 3.935,
909
+ "step": 10922
910
+ },
911
+ {
912
+ "epoch": 86.61,
913
+ "learning_rate": 2.6877470355731227e-07,
914
+ "loss": 2.0969,
915
+ "step": 11000
916
+ },
917
+ {
918
+ "epoch": 87.0,
919
+ "eval_accuracy": 0.6083950617283951,
920
+ "eval_loss": 2.2269179821014404,
921
+ "eval_runtime": 16.4471,
922
+ "eval_samples_per_second": 123.122,
923
+ "eval_steps_per_second": 3.891,
924
+ "step": 11049
925
+ },
926
+ {
927
+ "epoch": 88.0,
928
+ "eval_accuracy": 0.6054320987654321,
929
+ "eval_loss": 2.224729537963867,
930
+ "eval_runtime": 16.5547,
931
+ "eval_samples_per_second": 122.322,
932
+ "eval_steps_per_second": 3.866,
933
+ "step": 11176
934
+ },
935
+ {
936
+ "epoch": 89.0,
937
+ "eval_accuracy": 0.6098765432098765,
938
+ "eval_loss": 2.2210581302642822,
939
+ "eval_runtime": 16.3636,
940
+ "eval_samples_per_second": 123.75,
941
+ "eval_steps_per_second": 3.911,
942
+ "step": 11303
943
+ },
944
+ {
945
+ "epoch": 90.0,
946
+ "eval_accuracy": 0.6049382716049383,
947
+ "eval_loss": 2.2199292182922363,
948
+ "eval_runtime": 16.5723,
949
+ "eval_samples_per_second": 122.192,
950
+ "eval_steps_per_second": 3.862,
951
+ "step": 11430
952
+ },
953
+ {
954
+ "epoch": 90.55,
955
+ "learning_rate": 1.8972332015810276e-07,
956
+ "loss": 2.0851,
957
+ "step": 11500
958
+ },
959
+ {
960
+ "epoch": 91.0,
961
+ "eval_accuracy": 0.6103703703703703,
962
+ "eval_loss": 2.2158267498016357,
963
+ "eval_runtime": 16.2141,
964
+ "eval_samples_per_second": 124.892,
965
+ "eval_steps_per_second": 3.947,
966
+ "step": 11557
967
+ },
968
+ {
969
+ "epoch": 92.0,
970
+ "eval_accuracy": 0.6079012345679012,
971
+ "eval_loss": 2.214700937271118,
972
+ "eval_runtime": 16.2859,
973
+ "eval_samples_per_second": 124.341,
974
+ "eval_steps_per_second": 3.93,
975
+ "step": 11684
976
+ },
977
+ {
978
+ "epoch": 93.0,
979
+ "eval_accuracy": 0.6064197530864197,
980
+ "eval_loss": 2.2131123542785645,
981
+ "eval_runtime": 16.337,
982
+ "eval_samples_per_second": 123.952,
983
+ "eval_steps_per_second": 3.917,
984
+ "step": 11811
985
+ },
986
+ {
987
+ "epoch": 94.0,
988
+ "eval_accuracy": 0.6098765432098765,
989
+ "eval_loss": 2.2114579677581787,
990
+ "eval_runtime": 16.393,
991
+ "eval_samples_per_second": 123.528,
992
+ "eval_steps_per_second": 3.904,
993
+ "step": 11938
994
+ },
995
+ {
996
+ "epoch": 94.49,
997
+ "learning_rate": 1.1067193675889327e-07,
998
+ "loss": 2.0754,
999
+ "step": 12000
1000
+ },
1001
+ {
1002
+ "epoch": 95.0,
1003
+ "eval_accuracy": 0.6138271604938271,
1004
+ "eval_loss": 2.209113597869873,
1005
+ "eval_runtime": 16.4652,
1006
+ "eval_samples_per_second": 122.987,
1007
+ "eval_steps_per_second": 3.887,
1008
+ "step": 12065
1009
+ },
1010
+ {
1011
+ "epoch": 96.0,
1012
+ "eval_accuracy": 0.6128395061728396,
1013
+ "eval_loss": 2.2085654735565186,
1014
+ "eval_runtime": 16.3862,
1015
+ "eval_samples_per_second": 123.579,
1016
+ "eval_steps_per_second": 3.906,
1017
+ "step": 12192
1018
+ },
1019
+ {
1020
+ "epoch": 97.0,
1021
+ "eval_accuracy": 0.6123456790123457,
1022
+ "eval_loss": 2.207934617996216,
1023
+ "eval_runtime": 16.3381,
1024
+ "eval_samples_per_second": 123.943,
1025
+ "eval_steps_per_second": 3.917,
1026
+ "step": 12319
1027
+ },
1028
+ {
1029
+ "epoch": 98.0,
1030
+ "eval_accuracy": 0.6128395061728396,
1031
+ "eval_loss": 2.206979513168335,
1032
+ "eval_runtime": 16.3485,
1033
+ "eval_samples_per_second": 123.865,
1034
+ "eval_steps_per_second": 3.915,
1035
+ "step": 12446
1036
+ },
1037
+ {
1038
+ "epoch": 98.43,
1039
+ "learning_rate": 3.162055335968379e-08,
1040
+ "loss": 2.0667,
1041
+ "step": 12500
1042
+ },
1043
+ {
1044
+ "epoch": 99.0,
1045
+ "eval_accuracy": 0.6133333333333333,
1046
+ "eval_loss": 2.2068374156951904,
1047
+ "eval_runtime": 16.2194,
1048
+ "eval_samples_per_second": 124.851,
1049
+ "eval_steps_per_second": 3.946,
1050
+ "step": 12573
1051
+ }
1052
+ ],
1053
+ "logging_steps": 500,
1054
+ "max_steps": 12700,
1055
+ "num_train_epochs": 100,
1056
+ "save_steps": 500,
1057
+ "total_flos": 6.215388042176161e+19,
1058
+ "trial_name": null,
1059
+ "trial_params": null
1060
+ }
checkpoint-12573/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228f29b6a2e9560079c65c3416fd8bcbe6376471ac08b36d341233494efae6e3
3
+ size 4091
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:add53bc2efceedf1c5511c06f55aab935a7e8227e7835acad7b9a9070f227347
3
  size 343380860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e67fe27f4a357a2b8bce1a9e565ac9aa096eb5fed005739ce9dd866069f41146
3
  size 343380860
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad7dc7cf87f80c4a27d4fcc41049b7b3f9902b042f1102733c65d2b55b912c14
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228f29b6a2e9560079c65c3416fd8bcbe6376471ac08b36d341233494efae6e3
3
  size 4091