rickysk commited on
Commit
f6a6d29
1 Parent(s): f1d3e64

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 14.05,
3
- "eval_accuracy": 0.25,
4
- "eval_loss": 1.7664662599563599,
5
  "eval_runtime": 44.2055,
6
  "eval_samples_per_second": 0.724,
7
  "eval_steps_per_second": 0.181
 
1
  {
2
  "epoch": 14.05,
3
+ "eval_accuracy": 0.75,
4
+ "eval_loss": 0.7047864198684692,
5
  "eval_runtime": 44.2055,
6
  "eval_samples_per_second": 0.724,
7
  "eval_steps_per_second": 0.181
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c09dedd63ea516c5b5524e935236f24e27618861177becfc3ffe9732bdb52771
3
  size 344991221
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e143d40e68c068ea767c1097593f1eb66eb40b2e22e879162519301dafab74
3
  size 344991221
test_results.json CHANGED
@@ -1,8 +1,4 @@
1
  {
2
- "epoch": 14.05,
3
- "eval_accuracy": 0.25,
4
- "eval_loss": 1.7664662599563599,
5
- "eval_runtime": 44.2055,
6
- "eval_samples_per_second": 0.724,
7
- "eval_steps_per_second": 0.181
8
  }
 
1
  {
2
+ "eval_accuracy": 0.75,
3
+ "eval_loss": 0.7047864198684692
 
 
 
 
4
  }
trainer_state.json CHANGED
@@ -1,202 +1,550 @@
1
  {
2
- "best_metric": 0.22857142857142856,
3
- "best_model_checkpoint": "videomae-base-ipm_first_videos/checkpoint-3",
4
- "epoch": 14.045454545454545,
5
- "global_step": 44,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.07,
12
  "eval_accuracy": 0.22857142857142856,
13
- "eval_loss": 1.769743800163269,
14
- "eval_runtime": 54.8821,
15
- "eval_samples_per_second": 0.638,
16
- "eval_steps_per_second": 0.164,
17
  "step": 3
18
  },
19
  {
20
- "epoch": 1.07,
21
- "eval_accuracy": 0.11428571428571428,
22
- "eval_loss": 1.8754996061325073,
23
- "eval_runtime": 57.2016,
24
- "eval_samples_per_second": 0.612,
25
- "eval_steps_per_second": 0.157,
26
  "step": 6
27
  },
28
  {
29
- "epoch": 2.07,
30
- "eval_accuracy": 0.11428571428571428,
31
- "eval_loss": 1.983254075050354,
32
- "eval_runtime": 55.4835,
33
- "eval_samples_per_second": 0.631,
34
- "eval_steps_per_second": 0.162,
35
  "step": 9
36
  },
37
  {
38
- "epoch": 3.02,
39
- "learning_rate": 4.358974358974359e-05,
40
- "loss": 1.7083,
41
  "step": 10
42
  },
43
  {
44
- "epoch": 3.07,
45
  "eval_accuracy": 0.11428571428571428,
46
- "eval_loss": 2.096163749694824,
47
- "eval_runtime": 54.5602,
48
- "eval_samples_per_second": 0.641,
49
- "eval_steps_per_second": 0.165,
50
  "step": 12
51
  },
52
  {
53
- "epoch": 4.07,
54
  "eval_accuracy": 0.11428571428571428,
55
- "eval_loss": 2.1202526092529297,
56
- "eval_runtime": 53.9604,
57
- "eval_samples_per_second": 0.649,
58
- "eval_steps_per_second": 0.167,
59
  "step": 15
60
  },
61
  {
62
- "epoch": 5.07,
63
  "eval_accuracy": 0.11428571428571428,
64
- "eval_loss": 2.1553356647491455,
65
- "eval_runtime": 55.7558,
66
- "eval_samples_per_second": 0.628,
67
- "eval_steps_per_second": 0.161,
68
  "step": 18
69
  },
70
  {
71
- "epoch": 6.05,
72
- "learning_rate": 3.0769230769230774e-05,
73
- "loss": 1.6139,
74
  "step": 20
75
  },
76
  {
77
- "epoch": 6.07,
78
  "eval_accuracy": 0.11428571428571428,
79
- "eval_loss": 2.156655788421631,
80
- "eval_runtime": 46.7059,
81
- "eval_samples_per_second": 0.749,
82
- "eval_steps_per_second": 0.193,
83
  "step": 21
84
  },
85
  {
86
- "epoch": 7.07,
87
  "eval_accuracy": 0.11428571428571428,
88
- "eval_loss": 2.1459949016571045,
89
- "eval_runtime": 46.9731,
90
- "eval_samples_per_second": 0.745,
91
- "eval_steps_per_second": 0.192,
92
  "step": 24
93
  },
94
  {
95
- "epoch": 8.07,
96
  "eval_accuracy": 0.11428571428571428,
97
- "eval_loss": 2.1409194469451904,
98
- "eval_runtime": 53.8833,
99
- "eval_samples_per_second": 0.65,
100
- "eval_steps_per_second": 0.167,
101
  "step": 27
102
  },
103
  {
104
- "epoch": 9.07,
105
- "learning_rate": 1.794871794871795e-05,
106
- "loss": 1.6933,
107
  "step": 30
108
  },
109
  {
110
- "epoch": 9.07,
111
  "eval_accuracy": 0.11428571428571428,
112
- "eval_loss": 2.100686550140381,
113
- "eval_runtime": 46.5756,
114
- "eval_samples_per_second": 0.751,
115
- "eval_steps_per_second": 0.193,
116
  "step": 30
117
  },
118
  {
119
- "epoch": 10.07,
120
- "eval_accuracy": 0.14285714285714285,
121
- "eval_loss": 2.0845894813537598,
122
- "eval_runtime": 48.1537,
123
- "eval_samples_per_second": 0.727,
124
- "eval_steps_per_second": 0.187,
125
  "step": 33
126
  },
127
  {
128
- "epoch": 11.07,
129
- "eval_accuracy": 0.14285714285714285,
130
- "eval_loss": 2.0486605167388916,
131
- "eval_runtime": 55.8454,
132
- "eval_samples_per_second": 0.627,
133
- "eval_steps_per_second": 0.161,
134
  "step": 36
135
  },
136
  {
137
- "epoch": 12.07,
138
- "eval_accuracy": 0.14285714285714285,
139
- "eval_loss": 2.039897918701172,
140
- "eval_runtime": 55.0769,
141
- "eval_samples_per_second": 0.635,
142
- "eval_steps_per_second": 0.163,
143
  "step": 39
144
  },
145
  {
146
- "epoch": 13.02,
147
- "learning_rate": 5.128205128205128e-06,
148
- "loss": 1.6804,
149
  "step": 40
150
  },
151
  {
152
- "epoch": 13.07,
153
- "eval_accuracy": 0.17142857142857143,
154
- "eval_loss": 2.0327117443084717,
155
- "eval_runtime": 55.472,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  "eval_samples_per_second": 0.631,
157
  "eval_steps_per_second": 0.162,
158
- "step": 42
159
  },
160
  {
161
- "epoch": 14.05,
162
- "eval_accuracy": 0.17142857142857143,
163
- "eval_loss": 2.033078908920288,
164
- "eval_runtime": 51.6178,
165
- "eval_samples_per_second": 0.678,
166
- "eval_steps_per_second": 0.174,
167
- "step": 44
 
 
 
 
 
 
 
 
 
168
  },
169
  {
170
- "epoch": 14.05,
171
- "step": 44,
172
- "total_flos": 2.018696644456612e+17,
173
- "train_loss": 1.6448797962882302,
174
- "train_runtime": 1281.9744,
175
- "train_samples_per_second": 0.137,
176
- "train_steps_per_second": 0.034
177
  },
178
  {
179
- "epoch": 14.05,
180
- "eval_accuracy": 0.25,
181
- "eval_loss": 1.7664662599563599,
182
- "eval_runtime": 43.7527,
183
- "eval_samples_per_second": 0.731,
184
- "eval_steps_per_second": 0.183,
185
- "step": 44
186
  },
187
  {
188
- "epoch": 14.05,
189
- "eval_accuracy": 0.25,
190
- "eval_loss": 1.7664662599563599,
191
- "eval_runtime": 44.2055,
192
- "eval_samples_per_second": 0.724,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  "eval_steps_per_second": 0.181,
194
- "step": 44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  }
196
  ],
197
- "max_steps": 44,
198
  "num_train_epochs": 9223372036854775807,
199
- "total_flos": 2.018696644456612e+17,
200
  "trial_name": null,
201
  "trial_params": null
202
  }
 
1
  {
2
+ "best_metric": 0.8,
3
+ "best_model_checkpoint": "videomae-base-ipm_first_videos/checkpoint-123",
4
+ "epoch": 48.00606060606061,
5
+ "global_step": 146,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.01,
12
  "eval_accuracy": 0.22857142857142856,
13
+ "eval_loss": 1.7709606885910034,
14
+ "eval_runtime": 56.0244,
15
+ "eval_samples_per_second": 0.625,
16
+ "eval_steps_per_second": 0.161,
17
  "step": 3
18
  },
19
  {
20
+ "epoch": 1.01,
21
+ "eval_accuracy": 0.14285714285714285,
22
+ "eval_loss": 1.7788584232330322,
23
+ "eval_runtime": 54.8936,
24
+ "eval_samples_per_second": 0.638,
25
+ "eval_steps_per_second": 0.164,
26
  "step": 6
27
  },
28
  {
29
+ "epoch": 2.01,
30
+ "eval_accuracy": 0.08571428571428572,
31
+ "eval_loss": 1.7939355373382568,
32
+ "eval_runtime": 57.093,
33
+ "eval_samples_per_second": 0.613,
34
+ "eval_steps_per_second": 0.158,
35
  "step": 9
36
  },
37
  {
38
+ "epoch": 3.0,
39
+ "learning_rate": 1.5151515151515153e-05,
40
+ "loss": 1.7369,
41
  "step": 10
42
  },
43
  {
44
+ "epoch": 3.01,
45
  "eval_accuracy": 0.11428571428571428,
46
+ "eval_loss": 1.8259599208831787,
47
+ "eval_runtime": 49.6198,
48
+ "eval_samples_per_second": 0.705,
49
+ "eval_steps_per_second": 0.181,
50
  "step": 12
51
  },
52
  {
53
+ "epoch": 4.01,
54
  "eval_accuracy": 0.11428571428571428,
55
+ "eval_loss": 1.8636661767959595,
56
+ "eval_runtime": 47.6299,
57
+ "eval_samples_per_second": 0.735,
58
+ "eval_steps_per_second": 0.189,
59
  "step": 15
60
  },
61
  {
62
+ "epoch": 5.01,
63
  "eval_accuracy": 0.11428571428571428,
64
+ "eval_loss": 1.916202187538147,
65
+ "eval_runtime": 54.478,
66
+ "eval_samples_per_second": 0.642,
67
+ "eval_steps_per_second": 0.165,
68
  "step": 18
69
  },
70
  {
71
+ "epoch": 6.01,
72
+ "learning_rate": 3.0303030303030306e-05,
73
+ "loss": 1.6934,
74
  "step": 20
75
  },
76
  {
77
+ "epoch": 6.01,
78
  "eval_accuracy": 0.11428571428571428,
79
+ "eval_loss": 1.979854702949524,
80
+ "eval_runtime": 47.3817,
81
+ "eval_samples_per_second": 0.739,
82
+ "eval_steps_per_second": 0.19,
83
  "step": 21
84
  },
85
  {
86
+ "epoch": 7.01,
87
  "eval_accuracy": 0.11428571428571428,
88
+ "eval_loss": 2.032222270965576,
89
+ "eval_runtime": 47.048,
90
+ "eval_samples_per_second": 0.744,
91
+ "eval_steps_per_second": 0.191,
92
  "step": 24
93
  },
94
  {
95
+ "epoch": 8.01,
96
  "eval_accuracy": 0.11428571428571428,
97
+ "eval_loss": 2.147186756134033,
98
+ "eval_runtime": 56.4896,
99
+ "eval_samples_per_second": 0.62,
100
+ "eval_steps_per_second": 0.159,
101
  "step": 27
102
  },
103
  {
104
+ "epoch": 9.01,
105
+ "learning_rate": 4.545454545454546e-05,
106
+ "loss": 1.636,
107
  "step": 30
108
  },
109
  {
110
+ "epoch": 9.01,
111
  "eval_accuracy": 0.11428571428571428,
112
+ "eval_loss": 2.201059103012085,
113
+ "eval_runtime": 47.193,
114
+ "eval_samples_per_second": 0.742,
115
+ "eval_steps_per_second": 0.191,
116
  "step": 30
117
  },
118
  {
119
+ "epoch": 10.01,
120
+ "eval_accuracy": 0.11428571428571428,
121
+ "eval_loss": 2.3041186332702637,
122
+ "eval_runtime": 49.6007,
123
+ "eval_samples_per_second": 0.706,
124
+ "eval_steps_per_second": 0.181,
125
  "step": 33
126
  },
127
  {
128
+ "epoch": 11.01,
129
+ "eval_accuracy": 0.11428571428571428,
130
+ "eval_loss": 2.202650785446167,
131
+ "eval_runtime": 51.3674,
132
+ "eval_samples_per_second": 0.681,
133
+ "eval_steps_per_second": 0.175,
134
  "step": 36
135
  },
136
  {
137
+ "epoch": 12.01,
138
+ "eval_accuracy": 0.11428571428571428,
139
+ "eval_loss": 2.1011433601379395,
140
+ "eval_runtime": 47.3736,
141
+ "eval_samples_per_second": 0.739,
142
+ "eval_steps_per_second": 0.19,
143
  "step": 39
144
  },
145
  {
146
+ "epoch": 13.0,
147
+ "learning_rate": 4.882154882154882e-05,
148
+ "loss": 1.6069,
149
  "step": 40
150
  },
151
  {
152
+ "epoch": 13.01,
153
+ "eval_accuracy": 0.11428571428571428,
154
+ "eval_loss": 2.0470354557037354,
155
+ "eval_runtime": 52.7055,
156
+ "eval_samples_per_second": 0.664,
157
+ "eval_steps_per_second": 0.171,
158
+ "step": 42
159
+ },
160
+ {
161
+ "epoch": 14.01,
162
+ "eval_accuracy": 0.11428571428571428,
163
+ "eval_loss": 1.9688891172409058,
164
+ "eval_runtime": 46.2559,
165
+ "eval_samples_per_second": 0.757,
166
+ "eval_steps_per_second": 0.195,
167
+ "step": 45
168
+ },
169
+ {
170
+ "epoch": 15.01,
171
+ "eval_accuracy": 0.11428571428571428,
172
+ "eval_loss": 1.911560297012329,
173
+ "eval_runtime": 55.4227,
174
+ "eval_samples_per_second": 0.632,
175
+ "eval_steps_per_second": 0.162,
176
+ "step": 48
177
+ },
178
+ {
179
+ "epoch": 16.01,
180
+ "learning_rate": 4.713804713804714e-05,
181
+ "loss": 1.6509,
182
+ "step": 50
183
+ },
184
+ {
185
+ "epoch": 16.01,
186
+ "eval_accuracy": 0.11428571428571428,
187
+ "eval_loss": 1.9125531911849976,
188
+ "eval_runtime": 55.4531,
189
  "eval_samples_per_second": 0.631,
190
  "eval_steps_per_second": 0.162,
191
+ "step": 51
192
  },
193
  {
194
+ "epoch": 17.01,
195
+ "eval_accuracy": 0.11428571428571428,
196
+ "eval_loss": 2.038973808288574,
197
+ "eval_runtime": 47.6175,
198
+ "eval_samples_per_second": 0.735,
199
+ "eval_steps_per_second": 0.189,
200
+ "step": 54
201
+ },
202
+ {
203
+ "epoch": 18.01,
204
+ "eval_accuracy": 0.11428571428571428,
205
+ "eval_loss": 1.904532551765442,
206
+ "eval_runtime": 47.9854,
207
+ "eval_samples_per_second": 0.729,
208
+ "eval_steps_per_second": 0.188,
209
+ "step": 57
210
  },
211
  {
212
+ "epoch": 19.01,
213
+ "learning_rate": 4.545454545454546e-05,
214
+ "loss": 1.5023,
215
+ "step": 60
 
 
 
216
  },
217
  {
218
+ "epoch": 19.01,
219
+ "eval_accuracy": 0.11428571428571428,
220
+ "eval_loss": 1.796205759048462,
221
+ "eval_runtime": 51.1635,
222
+ "eval_samples_per_second": 0.684,
223
+ "eval_steps_per_second": 0.176,
224
+ "step": 60
225
  },
226
  {
227
+ "epoch": 20.01,
228
+ "eval_accuracy": 0.34285714285714286,
229
+ "eval_loss": 1.618465542793274,
230
+ "eval_runtime": 54.8541,
231
+ "eval_samples_per_second": 0.638,
232
+ "eval_steps_per_second": 0.164,
233
+ "step": 63
234
+ },
235
+ {
236
+ "epoch": 21.01,
237
+ "eval_accuracy": 0.34285714285714286,
238
+ "eval_loss": 1.4067193269729614,
239
+ "eval_runtime": 56.983,
240
+ "eval_samples_per_second": 0.614,
241
+ "eval_steps_per_second": 0.158,
242
+ "step": 66
243
+ },
244
+ {
245
+ "epoch": 22.01,
246
+ "eval_accuracy": 0.5142857142857142,
247
+ "eval_loss": 1.342714548110962,
248
+ "eval_runtime": 49.6455,
249
+ "eval_samples_per_second": 0.705,
250
  "eval_steps_per_second": 0.181,
251
+ "step": 69
252
+ },
253
+ {
254
+ "epoch": 23.0,
255
+ "learning_rate": 4.3771043771043774e-05,
256
+ "loss": 1.2231,
257
+ "step": 70
258
+ },
259
+ {
260
+ "epoch": 23.01,
261
+ "eval_accuracy": 0.37142857142857144,
262
+ "eval_loss": 1.199639081954956,
263
+ "eval_runtime": 46.5204,
264
+ "eval_samples_per_second": 0.752,
265
+ "eval_steps_per_second": 0.193,
266
+ "step": 72
267
+ },
268
+ {
269
+ "epoch": 24.01,
270
+ "eval_accuracy": 0.5428571428571428,
271
+ "eval_loss": 1.067163348197937,
272
+ "eval_runtime": 47.5503,
273
+ "eval_samples_per_second": 0.736,
274
+ "eval_steps_per_second": 0.189,
275
+ "step": 75
276
+ },
277
+ {
278
+ "epoch": 25.01,
279
+ "eval_accuracy": 0.5142857142857142,
280
+ "eval_loss": 1.1605939865112305,
281
+ "eval_runtime": 46.8125,
282
+ "eval_samples_per_second": 0.748,
283
+ "eval_steps_per_second": 0.192,
284
+ "step": 78
285
+ },
286
+ {
287
+ "epoch": 26.01,
288
+ "learning_rate": 4.208754208754209e-05,
289
+ "loss": 1.086,
290
+ "step": 80
291
+ },
292
+ {
293
+ "epoch": 26.01,
294
+ "eval_accuracy": 0.5142857142857142,
295
+ "eval_loss": 1.0507373809814453,
296
+ "eval_runtime": 47.2904,
297
+ "eval_samples_per_second": 0.74,
298
+ "eval_steps_per_second": 0.19,
299
+ "step": 81
300
+ },
301
+ {
302
+ "epoch": 27.01,
303
+ "eval_accuracy": 0.6571428571428571,
304
+ "eval_loss": 0.8726997971534729,
305
+ "eval_runtime": 47.448,
306
+ "eval_samples_per_second": 0.738,
307
+ "eval_steps_per_second": 0.19,
308
+ "step": 84
309
+ },
310
+ {
311
+ "epoch": 28.01,
312
+ "eval_accuracy": 0.5714285714285714,
313
+ "eval_loss": 1.1010727882385254,
314
+ "eval_runtime": 46.68,
315
+ "eval_samples_per_second": 0.75,
316
+ "eval_steps_per_second": 0.193,
317
+ "step": 87
318
+ },
319
+ {
320
+ "epoch": 29.01,
321
+ "learning_rate": 4.0404040404040405e-05,
322
+ "loss": 1.1221,
323
+ "step": 90
324
+ },
325
+ {
326
+ "epoch": 29.01,
327
+ "eval_accuracy": 0.6285714285714286,
328
+ "eval_loss": 1.070949673652649,
329
+ "eval_runtime": 47.7392,
330
+ "eval_samples_per_second": 0.733,
331
+ "eval_steps_per_second": 0.189,
332
+ "step": 90
333
+ },
334
+ {
335
+ "epoch": 30.01,
336
+ "eval_accuracy": 0.34285714285714286,
337
+ "eval_loss": 1.2051596641540527,
338
+ "eval_runtime": 52.6759,
339
+ "eval_samples_per_second": 0.664,
340
+ "eval_steps_per_second": 0.171,
341
+ "step": 93
342
+ },
343
+ {
344
+ "epoch": 31.01,
345
+ "eval_accuracy": 0.3142857142857143,
346
+ "eval_loss": 1.1637940406799316,
347
+ "eval_runtime": 47.6299,
348
+ "eval_samples_per_second": 0.735,
349
+ "eval_steps_per_second": 0.189,
350
+ "step": 96
351
+ },
352
+ {
353
+ "epoch": 32.01,
354
+ "eval_accuracy": 0.5714285714285714,
355
+ "eval_loss": 1.0540904998779297,
356
+ "eval_runtime": 47.2579,
357
+ "eval_samples_per_second": 0.741,
358
+ "eval_steps_per_second": 0.19,
359
+ "step": 99
360
+ },
361
+ {
362
+ "epoch": 33.0,
363
+ "learning_rate": 3.872053872053872e-05,
364
+ "loss": 1.0483,
365
+ "step": 100
366
+ },
367
+ {
368
+ "epoch": 33.01,
369
+ "eval_accuracy": 0.5428571428571428,
370
+ "eval_loss": 0.9793229103088379,
371
+ "eval_runtime": 48.2595,
372
+ "eval_samples_per_second": 0.725,
373
+ "eval_steps_per_second": 0.186,
374
+ "step": 102
375
+ },
376
+ {
377
+ "epoch": 34.01,
378
+ "eval_accuracy": 0.7142857142857143,
379
+ "eval_loss": 0.9788510203361511,
380
+ "eval_runtime": 46.729,
381
+ "eval_samples_per_second": 0.749,
382
+ "eval_steps_per_second": 0.193,
383
+ "step": 105
384
+ },
385
+ {
386
+ "epoch": 35.01,
387
+ "eval_accuracy": 0.6285714285714286,
388
+ "eval_loss": 1.0231552124023438,
389
+ "eval_runtime": 47.7551,
390
+ "eval_samples_per_second": 0.733,
391
+ "eval_steps_per_second": 0.188,
392
+ "step": 108
393
+ },
394
+ {
395
+ "epoch": 36.01,
396
+ "learning_rate": 3.7037037037037037e-05,
397
+ "loss": 0.8313,
398
+ "step": 110
399
+ },
400
+ {
401
+ "epoch": 36.01,
402
+ "eval_accuracy": 0.6857142857142857,
403
+ "eval_loss": 1.0134854316711426,
404
+ "eval_runtime": 50.8022,
405
+ "eval_samples_per_second": 0.689,
406
+ "eval_steps_per_second": 0.177,
407
+ "step": 111
408
+ },
409
+ {
410
+ "epoch": 37.01,
411
+ "eval_accuracy": 0.7142857142857143,
412
+ "eval_loss": 0.9512158036231995,
413
+ "eval_runtime": 47.6931,
414
+ "eval_samples_per_second": 0.734,
415
+ "eval_steps_per_second": 0.189,
416
+ "step": 114
417
+ },
418
+ {
419
+ "epoch": 38.01,
420
+ "eval_accuracy": 0.7428571428571429,
421
+ "eval_loss": 0.6964335441589355,
422
+ "eval_runtime": 48.6009,
423
+ "eval_samples_per_second": 0.72,
424
+ "eval_steps_per_second": 0.185,
425
+ "step": 117
426
+ },
427
+ {
428
+ "epoch": 39.01,
429
+ "learning_rate": 3.535353535353535e-05,
430
+ "loss": 0.9314,
431
+ "step": 120
432
+ },
433
+ {
434
+ "epoch": 39.01,
435
+ "eval_accuracy": 0.6,
436
+ "eval_loss": 0.7565953135490417,
437
+ "eval_runtime": 48.8407,
438
+ "eval_samples_per_second": 0.717,
439
+ "eval_steps_per_second": 0.184,
440
+ "step": 120
441
+ },
442
+ {
443
+ "epoch": 40.01,
444
+ "eval_accuracy": 0.8,
445
+ "eval_loss": 0.6933528184890747,
446
+ "eval_runtime": 46.6051,
447
+ "eval_samples_per_second": 0.751,
448
+ "eval_steps_per_second": 0.193,
449
+ "step": 123
450
+ },
451
+ {
452
+ "epoch": 41.01,
453
+ "eval_accuracy": 0.7714285714285715,
454
+ "eval_loss": 0.7758246064186096,
455
+ "eval_runtime": 46.5497,
456
+ "eval_samples_per_second": 0.752,
457
+ "eval_steps_per_second": 0.193,
458
+ "step": 126
459
+ },
460
+ {
461
+ "epoch": 42.01,
462
+ "eval_accuracy": 0.5714285714285714,
463
+ "eval_loss": 0.8644936084747314,
464
+ "eval_runtime": 47.6569,
465
+ "eval_samples_per_second": 0.734,
466
+ "eval_steps_per_second": 0.189,
467
+ "step": 129
468
+ },
469
+ {
470
+ "epoch": 43.0,
471
+ "learning_rate": 3.3670033670033675e-05,
472
+ "loss": 0.9863,
473
+ "step": 130
474
+ },
475
+ {
476
+ "epoch": 43.01,
477
+ "eval_accuracy": 0.7714285714285715,
478
+ "eval_loss": 0.8204832673072815,
479
+ "eval_runtime": 53.1222,
480
+ "eval_samples_per_second": 0.659,
481
+ "eval_steps_per_second": 0.169,
482
+ "step": 132
483
+ },
484
+ {
485
+ "epoch": 44.01,
486
+ "eval_accuracy": 0.7714285714285715,
487
+ "eval_loss": 0.7429643273353577,
488
+ "eval_runtime": 47.3081,
489
+ "eval_samples_per_second": 0.74,
490
+ "eval_steps_per_second": 0.19,
491
+ "step": 135
492
+ },
493
+ {
494
+ "epoch": 45.01,
495
+ "eval_accuracy": 0.6857142857142857,
496
+ "eval_loss": 0.7228943705558777,
497
+ "eval_runtime": 45.8699,
498
+ "eval_samples_per_second": 0.763,
499
+ "eval_steps_per_second": 0.196,
500
+ "step": 138
501
+ },
502
+ {
503
+ "epoch": 46.01,
504
+ "learning_rate": 3.198653198653199e-05,
505
+ "loss": 0.6828,
506
+ "step": 140
507
+ },
508
+ {
509
+ "epoch": 46.01,
510
+ "eval_accuracy": 0.6571428571428571,
511
+ "eval_loss": 0.684984028339386,
512
+ "eval_runtime": 52.7837,
513
+ "eval_samples_per_second": 0.663,
514
+ "eval_steps_per_second": 0.171,
515
+ "step": 141
516
+ },
517
+ {
518
+ "epoch": 47.01,
519
+ "eval_accuracy": 0.7714285714285715,
520
+ "eval_loss": 0.6505874395370483,
521
+ "eval_runtime": 47.1631,
522
+ "eval_samples_per_second": 0.742,
523
+ "eval_steps_per_second": 0.191,
524
+ "step": 144
525
+ },
526
+ {
527
+ "epoch": 48.01,
528
+ "eval_accuracy": 0.7894736842105263,
529
+ "eval_loss": 0.6245588660240173,
530
+ "eval_runtime": 50.9021,
531
+ "eval_samples_per_second": 0.747,
532
+ "eval_steps_per_second": 0.196,
533
+ "step": 146
534
+ },
535
+ {
536
+ "epoch": 48.01,
537
+ "eval_accuracy": 0.75,
538
+ "eval_loss": 0.7047864198684692,
539
+ "eval_runtime": 43.3232,
540
+ "eval_samples_per_second": 0.739,
541
+ "eval_steps_per_second": 0.185,
542
+ "step": 146
543
  }
544
  ],
545
+ "max_steps": 330,
546
  "num_train_epochs": 9223372036854775807,
547
+ "total_flos": 6.579455730080809e+17,
548
  "trial_name": null,
549
  "trial_params": null
550
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbaadefe4ce9a64a0f3c278eb504c2cda7bad8204953e159c924c841388d8237
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc0c9f2d3be88a3b16f882a91d47c15fad36dcba3504236baf70d3bf9a0d5f6e
3
  size 3963