nicolasdupuisroy commited on
Commit
77f9210
1 Parent(s): b4db065

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - imagefolder
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
+ - vision
7
  - generated_from_trainer
8
  datasets:
9
  - imagefolder
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "eval_accuracy": 0.15306122448979592,
4
+ "eval_loss": 3.643535614013672,
5
+ "eval_runtime": 1.6218,
6
+ "eval_samples_per_second": 60.427,
7
+ "eval_steps_per_second": 1.233,
8
+ "train_loss": 3.289549721309117,
9
+ "train_runtime": 4059.8356,
10
+ "train_samples_per_second": 13.597,
11
+ "train_steps_per_second": 0.172
12
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "eval_accuracy": 0.15306122448979592,
4
+ "eval_loss": 3.643535614013672,
5
+ "eval_runtime": 1.6218,
6
+ "eval_samples_per_second": 60.427,
7
+ "eval_steps_per_second": 1.233
8
+ }
runs/Jan16_21-09-40_d19f5dc858c0/events.out.tfevents.1705443471.d19f5dc858c0.15490.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5cd2bbd5677b8880e42d00bd24b2a403bb291ee85d5cf438a1e29f109fa4a8e
3
+ size 411
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.0,
3
+ "train_loss": 3.289549721309117,
4
+ "train_runtime": 4059.8356,
5
+ "train_samples_per_second": 13.597,
6
+ "train_steps_per_second": 0.172
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,1350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 3.643535614013672,
3
+ "best_model_checkpoint": "./drive/MyDrive/repositories/torch_example_image-classification/outputs_letter3/checkpoint-700",
4
+ "epoch": 100.0,
5
+ "eval_steps": 500,
6
+ "global_step": 700,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.030612244897959183,
14
+ "eval_loss": 3.945223808288574,
15
+ "eval_runtime": 35.9416,
16
+ "eval_samples_per_second": 2.727,
17
+ "eval_steps_per_second": 0.056,
18
+ "step": 7
19
+ },
20
+ {
21
+ "epoch": 1.43,
22
+ "learning_rate": 1.9714285714285718e-05,
23
+ "loss": 3.9498,
24
+ "step": 10
25
+ },
26
+ {
27
+ "epoch": 2.0,
28
+ "eval_accuracy": 0.05102040816326531,
29
+ "eval_loss": 3.943814277648926,
30
+ "eval_runtime": 1.6003,
31
+ "eval_samples_per_second": 61.237,
32
+ "eval_steps_per_second": 1.25,
33
+ "step": 14
34
+ },
35
+ {
36
+ "epoch": 2.86,
37
+ "learning_rate": 1.942857142857143e-05,
38
+ "loss": 3.9413,
39
+ "step": 20
40
+ },
41
+ {
42
+ "epoch": 3.0,
43
+ "eval_accuracy": 0.04081632653061224,
44
+ "eval_loss": 3.9436657428741455,
45
+ "eval_runtime": 1.5584,
46
+ "eval_samples_per_second": 62.886,
47
+ "eval_steps_per_second": 1.283,
48
+ "step": 21
49
+ },
50
+ {
51
+ "epoch": 4.0,
52
+ "eval_accuracy": 0.04081632653061224,
53
+ "eval_loss": 3.9431352615356445,
54
+ "eval_runtime": 1.6475,
55
+ "eval_samples_per_second": 59.483,
56
+ "eval_steps_per_second": 1.214,
57
+ "step": 28
58
+ },
59
+ {
60
+ "epoch": 4.29,
61
+ "learning_rate": 1.9142857142857146e-05,
62
+ "loss": 3.9255,
63
+ "step": 30
64
+ },
65
+ {
66
+ "epoch": 5.0,
67
+ "eval_accuracy": 0.04081632653061224,
68
+ "eval_loss": 3.9424338340759277,
69
+ "eval_runtime": 1.5527,
70
+ "eval_samples_per_second": 63.114,
71
+ "eval_steps_per_second": 1.288,
72
+ "step": 35
73
+ },
74
+ {
75
+ "epoch": 5.71,
76
+ "learning_rate": 1.885714285714286e-05,
77
+ "loss": 3.9132,
78
+ "step": 40
79
+ },
80
+ {
81
+ "epoch": 6.0,
82
+ "eval_accuracy": 0.030612244897959183,
83
+ "eval_loss": 3.9400839805603027,
84
+ "eval_runtime": 1.6728,
85
+ "eval_samples_per_second": 58.585,
86
+ "eval_steps_per_second": 1.196,
87
+ "step": 42
88
+ },
89
+ {
90
+ "epoch": 7.0,
91
+ "eval_accuracy": 0.030612244897959183,
92
+ "eval_loss": 3.937299966812134,
93
+ "eval_runtime": 1.6864,
94
+ "eval_samples_per_second": 58.111,
95
+ "eval_steps_per_second": 1.186,
96
+ "step": 49
97
+ },
98
+ {
99
+ "epoch": 7.14,
100
+ "learning_rate": 1.8571428571428575e-05,
101
+ "loss": 3.8913,
102
+ "step": 50
103
+ },
104
+ {
105
+ "epoch": 8.0,
106
+ "eval_accuracy": 0.02040816326530612,
107
+ "eval_loss": 3.9351377487182617,
108
+ "eval_runtime": 1.7475,
109
+ "eval_samples_per_second": 56.08,
110
+ "eval_steps_per_second": 1.144,
111
+ "step": 56
112
+ },
113
+ {
114
+ "epoch": 8.57,
115
+ "learning_rate": 1.8285714285714288e-05,
116
+ "loss": 3.8685,
117
+ "step": 60
118
+ },
119
+ {
120
+ "epoch": 9.0,
121
+ "eval_accuracy": 0.02040816326530612,
122
+ "eval_loss": 3.931184768676758,
123
+ "eval_runtime": 1.7165,
124
+ "eval_samples_per_second": 57.092,
125
+ "eval_steps_per_second": 1.165,
126
+ "step": 63
127
+ },
128
+ {
129
+ "epoch": 10.0,
130
+ "learning_rate": 1.8e-05,
131
+ "loss": 3.8413,
132
+ "step": 70
133
+ },
134
+ {
135
+ "epoch": 10.0,
136
+ "eval_accuracy": 0.030612244897959183,
137
+ "eval_loss": 3.9259071350097656,
138
+ "eval_runtime": 1.7086,
139
+ "eval_samples_per_second": 57.357,
140
+ "eval_steps_per_second": 1.171,
141
+ "step": 70
142
+ },
143
+ {
144
+ "epoch": 11.0,
145
+ "eval_accuracy": 0.030612244897959183,
146
+ "eval_loss": 3.9218833446502686,
147
+ "eval_runtime": 1.6006,
148
+ "eval_samples_per_second": 61.228,
149
+ "eval_steps_per_second": 1.25,
150
+ "step": 77
151
+ },
152
+ {
153
+ "epoch": 11.43,
154
+ "learning_rate": 1.7714285714285717e-05,
155
+ "loss": 3.8163,
156
+ "step": 80
157
+ },
158
+ {
159
+ "epoch": 12.0,
160
+ "eval_accuracy": 0.02040816326530612,
161
+ "eval_loss": 3.9182682037353516,
162
+ "eval_runtime": 1.8155,
163
+ "eval_samples_per_second": 53.98,
164
+ "eval_steps_per_second": 1.102,
165
+ "step": 84
166
+ },
167
+ {
168
+ "epoch": 12.86,
169
+ "learning_rate": 1.742857142857143e-05,
170
+ "loss": 3.7912,
171
+ "step": 90
172
+ },
173
+ {
174
+ "epoch": 13.0,
175
+ "eval_accuracy": 0.04081632653061224,
176
+ "eval_loss": 3.9151253700256348,
177
+ "eval_runtime": 1.704,
178
+ "eval_samples_per_second": 57.511,
179
+ "eval_steps_per_second": 1.174,
180
+ "step": 91
181
+ },
182
+ {
183
+ "epoch": 14.0,
184
+ "eval_accuracy": 0.030612244897959183,
185
+ "eval_loss": 3.911611795425415,
186
+ "eval_runtime": 1.7037,
187
+ "eval_samples_per_second": 57.521,
188
+ "eval_steps_per_second": 1.174,
189
+ "step": 98
190
+ },
191
+ {
192
+ "epoch": 14.29,
193
+ "learning_rate": 1.7142857142857142e-05,
194
+ "loss": 3.7616,
195
+ "step": 100
196
+ },
197
+ {
198
+ "epoch": 15.0,
199
+ "eval_accuracy": 0.04081632653061224,
200
+ "eval_loss": 3.9074392318725586,
201
+ "eval_runtime": 1.6801,
202
+ "eval_samples_per_second": 58.331,
203
+ "eval_steps_per_second": 1.19,
204
+ "step": 105
205
+ },
206
+ {
207
+ "epoch": 15.71,
208
+ "learning_rate": 1.6857142857142858e-05,
209
+ "loss": 3.734,
210
+ "step": 110
211
+ },
212
+ {
213
+ "epoch": 16.0,
214
+ "eval_accuracy": 0.04081632653061224,
215
+ "eval_loss": 3.9029488563537598,
216
+ "eval_runtime": 1.7659,
217
+ "eval_samples_per_second": 55.496,
218
+ "eval_steps_per_second": 1.133,
219
+ "step": 112
220
+ },
221
+ {
222
+ "epoch": 17.0,
223
+ "eval_accuracy": 0.061224489795918366,
224
+ "eval_loss": 3.8968818187713623,
225
+ "eval_runtime": 1.6451,
226
+ "eval_samples_per_second": 59.57,
227
+ "eval_steps_per_second": 1.216,
228
+ "step": 119
229
+ },
230
+ {
231
+ "epoch": 17.14,
232
+ "learning_rate": 1.6571428571428574e-05,
233
+ "loss": 3.7014,
234
+ "step": 120
235
+ },
236
+ {
237
+ "epoch": 18.0,
238
+ "eval_accuracy": 0.07142857142857142,
239
+ "eval_loss": 3.890695095062256,
240
+ "eval_runtime": 1.6901,
241
+ "eval_samples_per_second": 57.983,
242
+ "eval_steps_per_second": 1.183,
243
+ "step": 126
244
+ },
245
+ {
246
+ "epoch": 18.57,
247
+ "learning_rate": 1.6285714285714287e-05,
248
+ "loss": 3.6707,
249
+ "step": 130
250
+ },
251
+ {
252
+ "epoch": 19.0,
253
+ "eval_accuracy": 0.07142857142857142,
254
+ "eval_loss": 3.8844714164733887,
255
+ "eval_runtime": 1.6505,
256
+ "eval_samples_per_second": 59.374,
257
+ "eval_steps_per_second": 1.212,
258
+ "step": 133
259
+ },
260
+ {
261
+ "epoch": 20.0,
262
+ "learning_rate": 1.6000000000000003e-05,
263
+ "loss": 3.6307,
264
+ "step": 140
265
+ },
266
+ {
267
+ "epoch": 20.0,
268
+ "eval_accuracy": 0.08163265306122448,
269
+ "eval_loss": 3.877913475036621,
270
+ "eval_runtime": 1.8041,
271
+ "eval_samples_per_second": 54.322,
272
+ "eval_steps_per_second": 1.109,
273
+ "step": 140
274
+ },
275
+ {
276
+ "epoch": 21.0,
277
+ "eval_accuracy": 0.08163265306122448,
278
+ "eval_loss": 3.8703930377960205,
279
+ "eval_runtime": 1.7575,
280
+ "eval_samples_per_second": 55.762,
281
+ "eval_steps_per_second": 1.138,
282
+ "step": 147
283
+ },
284
+ {
285
+ "epoch": 21.43,
286
+ "learning_rate": 1.5714285714285715e-05,
287
+ "loss": 3.596,
288
+ "step": 150
289
+ },
290
+ {
291
+ "epoch": 22.0,
292
+ "eval_accuracy": 0.09183673469387756,
293
+ "eval_loss": 3.8646252155303955,
294
+ "eval_runtime": 1.6598,
295
+ "eval_samples_per_second": 59.042,
296
+ "eval_steps_per_second": 1.205,
297
+ "step": 154
298
+ },
299
+ {
300
+ "epoch": 22.86,
301
+ "learning_rate": 1.542857142857143e-05,
302
+ "loss": 3.5875,
303
+ "step": 160
304
+ },
305
+ {
306
+ "epoch": 23.0,
307
+ "eval_accuracy": 0.09183673469387756,
308
+ "eval_loss": 3.8603618144989014,
309
+ "eval_runtime": 1.6878,
310
+ "eval_samples_per_second": 58.065,
311
+ "eval_steps_per_second": 1.185,
312
+ "step": 161
313
+ },
314
+ {
315
+ "epoch": 24.0,
316
+ "eval_accuracy": 0.09183673469387756,
317
+ "eval_loss": 3.8561482429504395,
318
+ "eval_runtime": 1.6677,
319
+ "eval_samples_per_second": 58.764,
320
+ "eval_steps_per_second": 1.199,
321
+ "step": 168
322
+ },
323
+ {
324
+ "epoch": 24.29,
325
+ "learning_rate": 1.5142857142857144e-05,
326
+ "loss": 3.5532,
327
+ "step": 170
328
+ },
329
+ {
330
+ "epoch": 25.0,
331
+ "eval_accuracy": 0.09183673469387756,
332
+ "eval_loss": 3.8509910106658936,
333
+ "eval_runtime": 1.7363,
334
+ "eval_samples_per_second": 56.441,
335
+ "eval_steps_per_second": 1.152,
336
+ "step": 175
337
+ },
338
+ {
339
+ "epoch": 25.71,
340
+ "learning_rate": 1.4857142857142858e-05,
341
+ "loss": 3.5374,
342
+ "step": 180
343
+ },
344
+ {
345
+ "epoch": 26.0,
346
+ "eval_accuracy": 0.09183673469387756,
347
+ "eval_loss": 3.844221353530884,
348
+ "eval_runtime": 1.7776,
349
+ "eval_samples_per_second": 55.131,
350
+ "eval_steps_per_second": 1.125,
351
+ "step": 182
352
+ },
353
+ {
354
+ "epoch": 27.0,
355
+ "eval_accuracy": 0.10204081632653061,
356
+ "eval_loss": 3.83986496925354,
357
+ "eval_runtime": 1.6514,
358
+ "eval_samples_per_second": 59.342,
359
+ "eval_steps_per_second": 1.211,
360
+ "step": 189
361
+ },
362
+ {
363
+ "epoch": 27.14,
364
+ "learning_rate": 1.4571428571428573e-05,
365
+ "loss": 3.51,
366
+ "step": 190
367
+ },
368
+ {
369
+ "epoch": 28.0,
370
+ "eval_accuracy": 0.11224489795918367,
371
+ "eval_loss": 3.8350086212158203,
372
+ "eval_runtime": 1.6793,
373
+ "eval_samples_per_second": 58.357,
374
+ "eval_steps_per_second": 1.191,
375
+ "step": 196
376
+ },
377
+ {
378
+ "epoch": 28.57,
379
+ "learning_rate": 1.4285714285714287e-05,
380
+ "loss": 3.4842,
381
+ "step": 200
382
+ },
383
+ {
384
+ "epoch": 29.0,
385
+ "eval_accuracy": 0.12244897959183673,
386
+ "eval_loss": 3.8296377658843994,
387
+ "eval_runtime": 1.7517,
388
+ "eval_samples_per_second": 55.946,
389
+ "eval_steps_per_second": 1.142,
390
+ "step": 203
391
+ },
392
+ {
393
+ "epoch": 30.0,
394
+ "learning_rate": 1.4e-05,
395
+ "loss": 3.4495,
396
+ "step": 210
397
+ },
398
+ {
399
+ "epoch": 30.0,
400
+ "eval_accuracy": 0.12244897959183673,
401
+ "eval_loss": 3.8243255615234375,
402
+ "eval_runtime": 1.7837,
403
+ "eval_samples_per_second": 54.942,
404
+ "eval_steps_per_second": 1.121,
405
+ "step": 210
406
+ },
407
+ {
408
+ "epoch": 31.0,
409
+ "eval_accuracy": 0.12244897959183673,
410
+ "eval_loss": 3.8213043212890625,
411
+ "eval_runtime": 1.6545,
412
+ "eval_samples_per_second": 59.233,
413
+ "eval_steps_per_second": 1.209,
414
+ "step": 217
415
+ },
416
+ {
417
+ "epoch": 31.43,
418
+ "learning_rate": 1.3714285714285716e-05,
419
+ "loss": 3.4155,
420
+ "step": 220
421
+ },
422
+ {
423
+ "epoch": 32.0,
424
+ "eval_accuracy": 0.12244897959183673,
425
+ "eval_loss": 3.815812110900879,
426
+ "eval_runtime": 1.6721,
427
+ "eval_samples_per_second": 58.61,
428
+ "eval_steps_per_second": 1.196,
429
+ "step": 224
430
+ },
431
+ {
432
+ "epoch": 32.86,
433
+ "learning_rate": 1.3428571428571429e-05,
434
+ "loss": 3.4257,
435
+ "step": 230
436
+ },
437
+ {
438
+ "epoch": 33.0,
439
+ "eval_accuracy": 0.12244897959183673,
440
+ "eval_loss": 3.8117594718933105,
441
+ "eval_runtime": 1.6581,
442
+ "eval_samples_per_second": 59.102,
443
+ "eval_steps_per_second": 1.206,
444
+ "step": 231
445
+ },
446
+ {
447
+ "epoch": 34.0,
448
+ "eval_accuracy": 0.1326530612244898,
449
+ "eval_loss": 3.8061439990997314,
450
+ "eval_runtime": 1.6272,
451
+ "eval_samples_per_second": 60.227,
452
+ "eval_steps_per_second": 1.229,
453
+ "step": 238
454
+ },
455
+ {
456
+ "epoch": 34.29,
457
+ "learning_rate": 1.3142857142857145e-05,
458
+ "loss": 3.395,
459
+ "step": 240
460
+ },
461
+ {
462
+ "epoch": 35.0,
463
+ "eval_accuracy": 0.1326530612244898,
464
+ "eval_loss": 3.8029837608337402,
465
+ "eval_runtime": 1.6449,
466
+ "eval_samples_per_second": 59.577,
467
+ "eval_steps_per_second": 1.216,
468
+ "step": 245
469
+ },
470
+ {
471
+ "epoch": 35.71,
472
+ "learning_rate": 1.2857142857142859e-05,
473
+ "loss": 3.3693,
474
+ "step": 250
475
+ },
476
+ {
477
+ "epoch": 36.0,
478
+ "eval_accuracy": 0.14285714285714285,
479
+ "eval_loss": 3.795672655105591,
480
+ "eval_runtime": 1.6624,
481
+ "eval_samples_per_second": 58.953,
482
+ "eval_steps_per_second": 1.203,
483
+ "step": 252
484
+ },
485
+ {
486
+ "epoch": 37.0,
487
+ "eval_accuracy": 0.12244897959183673,
488
+ "eval_loss": 3.790419578552246,
489
+ "eval_runtime": 1.6455,
490
+ "eval_samples_per_second": 59.556,
491
+ "eval_steps_per_second": 1.215,
492
+ "step": 259
493
+ },
494
+ {
495
+ "epoch": 37.14,
496
+ "learning_rate": 1.2571428571428572e-05,
497
+ "loss": 3.35,
498
+ "step": 260
499
+ },
500
+ {
501
+ "epoch": 38.0,
502
+ "eval_accuracy": 0.12244897959183673,
503
+ "eval_loss": 3.783411741256714,
504
+ "eval_runtime": 1.6562,
505
+ "eval_samples_per_second": 59.172,
506
+ "eval_steps_per_second": 1.208,
507
+ "step": 266
508
+ },
509
+ {
510
+ "epoch": 38.57,
511
+ "learning_rate": 1.2285714285714288e-05,
512
+ "loss": 3.3453,
513
+ "step": 270
514
+ },
515
+ {
516
+ "epoch": 39.0,
517
+ "eval_accuracy": 0.12244897959183673,
518
+ "eval_loss": 3.778721332550049,
519
+ "eval_runtime": 1.7932,
520
+ "eval_samples_per_second": 54.651,
521
+ "eval_steps_per_second": 1.115,
522
+ "step": 273
523
+ },
524
+ {
525
+ "epoch": 40.0,
526
+ "learning_rate": 1.2e-05,
527
+ "loss": 3.2977,
528
+ "step": 280
529
+ },
530
+ {
531
+ "epoch": 40.0,
532
+ "eval_accuracy": 0.12244897959183673,
533
+ "eval_loss": 3.772735357284546,
534
+ "eval_runtime": 1.6336,
535
+ "eval_samples_per_second": 59.991,
536
+ "eval_steps_per_second": 1.224,
537
+ "step": 280
538
+ },
539
+ {
540
+ "epoch": 41.0,
541
+ "eval_accuracy": 0.12244897959183673,
542
+ "eval_loss": 3.7680680751800537,
543
+ "eval_runtime": 1.7171,
544
+ "eval_samples_per_second": 57.072,
545
+ "eval_steps_per_second": 1.165,
546
+ "step": 287
547
+ },
548
+ {
549
+ "epoch": 41.43,
550
+ "learning_rate": 1.1714285714285716e-05,
551
+ "loss": 3.2875,
552
+ "step": 290
553
+ },
554
+ {
555
+ "epoch": 42.0,
556
+ "eval_accuracy": 0.12244897959183673,
557
+ "eval_loss": 3.762765407562256,
558
+ "eval_runtime": 1.6284,
559
+ "eval_samples_per_second": 60.181,
560
+ "eval_steps_per_second": 1.228,
561
+ "step": 294
562
+ },
563
+ {
564
+ "epoch": 42.86,
565
+ "learning_rate": 1.1428571428571429e-05,
566
+ "loss": 3.2504,
567
+ "step": 300
568
+ },
569
+ {
570
+ "epoch": 43.0,
571
+ "eval_accuracy": 0.12244897959183673,
572
+ "eval_loss": 3.758150100708008,
573
+ "eval_runtime": 1.6731,
574
+ "eval_samples_per_second": 58.575,
575
+ "eval_steps_per_second": 1.195,
576
+ "step": 301
577
+ },
578
+ {
579
+ "epoch": 44.0,
580
+ "eval_accuracy": 0.12244897959183673,
581
+ "eval_loss": 3.7527263164520264,
582
+ "eval_runtime": 1.7657,
583
+ "eval_samples_per_second": 55.502,
584
+ "eval_steps_per_second": 1.133,
585
+ "step": 308
586
+ },
587
+ {
588
+ "epoch": 44.29,
589
+ "learning_rate": 1.1142857142857143e-05,
590
+ "loss": 3.2772,
591
+ "step": 310
592
+ },
593
+ {
594
+ "epoch": 45.0,
595
+ "eval_accuracy": 0.12244897959183673,
596
+ "eval_loss": 3.749258041381836,
597
+ "eval_runtime": 1.6842,
598
+ "eval_samples_per_second": 58.188,
599
+ "eval_steps_per_second": 1.188,
600
+ "step": 315
601
+ },
602
+ {
603
+ "epoch": 45.71,
604
+ "learning_rate": 1.0857142857142858e-05,
605
+ "loss": 3.2353,
606
+ "step": 320
607
+ },
608
+ {
609
+ "epoch": 46.0,
610
+ "eval_accuracy": 0.11224489795918367,
611
+ "eval_loss": 3.7462167739868164,
612
+ "eval_runtime": 1.6536,
613
+ "eval_samples_per_second": 59.264,
614
+ "eval_steps_per_second": 1.209,
615
+ "step": 322
616
+ },
617
+ {
618
+ "epoch": 47.0,
619
+ "eval_accuracy": 0.1326530612244898,
620
+ "eval_loss": 3.743089437484741,
621
+ "eval_runtime": 1.639,
622
+ "eval_samples_per_second": 59.793,
623
+ "eval_steps_per_second": 1.22,
624
+ "step": 329
625
+ },
626
+ {
627
+ "epoch": 47.14,
628
+ "learning_rate": 1.0571428571428572e-05,
629
+ "loss": 3.2198,
630
+ "step": 330
631
+ },
632
+ {
633
+ "epoch": 48.0,
634
+ "eval_accuracy": 0.1326530612244898,
635
+ "eval_loss": 3.7392406463623047,
636
+ "eval_runtime": 1.6659,
637
+ "eval_samples_per_second": 58.827,
638
+ "eval_steps_per_second": 1.201,
639
+ "step": 336
640
+ },
641
+ {
642
+ "epoch": 48.57,
643
+ "learning_rate": 1.0285714285714285e-05,
644
+ "loss": 3.204,
645
+ "step": 340
646
+ },
647
+ {
648
+ "epoch": 49.0,
649
+ "eval_accuracy": 0.14285714285714285,
650
+ "eval_loss": 3.73702073097229,
651
+ "eval_runtime": 1.7576,
652
+ "eval_samples_per_second": 55.758,
653
+ "eval_steps_per_second": 1.138,
654
+ "step": 343
655
+ },
656
+ {
657
+ "epoch": 50.0,
658
+ "learning_rate": 1e-05,
659
+ "loss": 3.1762,
660
+ "step": 350
661
+ },
662
+ {
663
+ "epoch": 50.0,
664
+ "eval_accuracy": 0.14285714285714285,
665
+ "eval_loss": 3.733855962753296,
666
+ "eval_runtime": 1.6472,
667
+ "eval_samples_per_second": 59.494,
668
+ "eval_steps_per_second": 1.214,
669
+ "step": 350
670
+ },
671
+ {
672
+ "epoch": 51.0,
673
+ "eval_accuracy": 0.14285714285714285,
674
+ "eval_loss": 3.730581283569336,
675
+ "eval_runtime": 1.6737,
676
+ "eval_samples_per_second": 58.554,
677
+ "eval_steps_per_second": 1.195,
678
+ "step": 357
679
+ },
680
+ {
681
+ "epoch": 51.43,
682
+ "learning_rate": 9.714285714285715e-06,
683
+ "loss": 3.1741,
684
+ "step": 360
685
+ },
686
+ {
687
+ "epoch": 52.0,
688
+ "eval_accuracy": 0.16326530612244897,
689
+ "eval_loss": 3.7267162799835205,
690
+ "eval_runtime": 1.669,
691
+ "eval_samples_per_second": 58.718,
692
+ "eval_steps_per_second": 1.198,
693
+ "step": 364
694
+ },
695
+ {
696
+ "epoch": 52.86,
697
+ "learning_rate": 9.42857142857143e-06,
698
+ "loss": 3.1757,
699
+ "step": 370
700
+ },
701
+ {
702
+ "epoch": 53.0,
703
+ "eval_accuracy": 0.16326530612244897,
704
+ "eval_loss": 3.7221927642822266,
705
+ "eval_runtime": 1.6595,
706
+ "eval_samples_per_second": 59.054,
707
+ "eval_steps_per_second": 1.205,
708
+ "step": 371
709
+ },
710
+ {
711
+ "epoch": 54.0,
712
+ "eval_accuracy": 0.15306122448979592,
713
+ "eval_loss": 3.717994451522827,
714
+ "eval_runtime": 1.6603,
715
+ "eval_samples_per_second": 59.027,
716
+ "eval_steps_per_second": 1.205,
717
+ "step": 378
718
+ },
719
+ {
720
+ "epoch": 54.29,
721
+ "learning_rate": 9.142857142857144e-06,
722
+ "loss": 3.1492,
723
+ "step": 380
724
+ },
725
+ {
726
+ "epoch": 55.0,
727
+ "eval_accuracy": 0.15306122448979592,
728
+ "eval_loss": 3.7148733139038086,
729
+ "eval_runtime": 1.6452,
730
+ "eval_samples_per_second": 59.566,
731
+ "eval_steps_per_second": 1.216,
732
+ "step": 385
733
+ },
734
+ {
735
+ "epoch": 55.71,
736
+ "learning_rate": 8.857142857142858e-06,
737
+ "loss": 3.1442,
738
+ "step": 390
739
+ },
740
+ {
741
+ "epoch": 56.0,
742
+ "eval_accuracy": 0.15306122448979592,
743
+ "eval_loss": 3.7107248306274414,
744
+ "eval_runtime": 1.6947,
745
+ "eval_samples_per_second": 57.827,
746
+ "eval_steps_per_second": 1.18,
747
+ "step": 392
748
+ },
749
+ {
750
+ "epoch": 57.0,
751
+ "eval_accuracy": 0.15306122448979592,
752
+ "eval_loss": 3.7084951400756836,
753
+ "eval_runtime": 1.6087,
754
+ "eval_samples_per_second": 60.919,
755
+ "eval_steps_per_second": 1.243,
756
+ "step": 399
757
+ },
758
+ {
759
+ "epoch": 57.14,
760
+ "learning_rate": 8.571428571428571e-06,
761
+ "loss": 3.1174,
762
+ "step": 400
763
+ },
764
+ {
765
+ "epoch": 58.0,
766
+ "eval_accuracy": 0.15306122448979592,
767
+ "eval_loss": 3.705909013748169,
768
+ "eval_runtime": 1.7081,
769
+ "eval_samples_per_second": 57.372,
770
+ "eval_steps_per_second": 1.171,
771
+ "step": 406
772
+ },
773
+ {
774
+ "epoch": 58.57,
775
+ "learning_rate": 8.285714285714287e-06,
776
+ "loss": 3.0962,
777
+ "step": 410
778
+ },
779
+ {
780
+ "epoch": 59.0,
781
+ "eval_accuracy": 0.15306122448979592,
782
+ "eval_loss": 3.7031126022338867,
783
+ "eval_runtime": 1.5815,
784
+ "eval_samples_per_second": 61.967,
785
+ "eval_steps_per_second": 1.265,
786
+ "step": 413
787
+ },
788
+ {
789
+ "epoch": 60.0,
790
+ "learning_rate": 8.000000000000001e-06,
791
+ "loss": 3.1237,
792
+ "step": 420
793
+ },
794
+ {
795
+ "epoch": 60.0,
796
+ "eval_accuracy": 0.15306122448979592,
797
+ "eval_loss": 3.701946258544922,
798
+ "eval_runtime": 1.7026,
799
+ "eval_samples_per_second": 57.559,
800
+ "eval_steps_per_second": 1.175,
801
+ "step": 420
802
+ },
803
+ {
804
+ "epoch": 61.0,
805
+ "eval_accuracy": 0.15306122448979592,
806
+ "eval_loss": 3.6996471881866455,
807
+ "eval_runtime": 1.6975,
808
+ "eval_samples_per_second": 57.732,
809
+ "eval_steps_per_second": 1.178,
810
+ "step": 427
811
+ },
812
+ {
813
+ "epoch": 61.43,
814
+ "learning_rate": 7.714285714285716e-06,
815
+ "loss": 3.1229,
816
+ "step": 430
817
+ },
818
+ {
819
+ "epoch": 62.0,
820
+ "eval_accuracy": 0.15306122448979592,
821
+ "eval_loss": 3.6955974102020264,
822
+ "eval_runtime": 1.6826,
823
+ "eval_samples_per_second": 58.242,
824
+ "eval_steps_per_second": 1.189,
825
+ "step": 434
826
+ },
827
+ {
828
+ "epoch": 62.86,
829
+ "learning_rate": 7.428571428571429e-06,
830
+ "loss": 3.0946,
831
+ "step": 440
832
+ },
833
+ {
834
+ "epoch": 63.0,
835
+ "eval_accuracy": 0.15306122448979592,
836
+ "eval_loss": 3.692981719970703,
837
+ "eval_runtime": 1.7683,
838
+ "eval_samples_per_second": 55.42,
839
+ "eval_steps_per_second": 1.131,
840
+ "step": 441
841
+ },
842
+ {
843
+ "epoch": 64.0,
844
+ "eval_accuracy": 0.15306122448979592,
845
+ "eval_loss": 3.6915957927703857,
846
+ "eval_runtime": 1.6752,
847
+ "eval_samples_per_second": 58.501,
848
+ "eval_steps_per_second": 1.194,
849
+ "step": 448
850
+ },
851
+ {
852
+ "epoch": 64.29,
853
+ "learning_rate": 7.1428571428571436e-06,
854
+ "loss": 3.0861,
855
+ "step": 450
856
+ },
857
+ {
858
+ "epoch": 65.0,
859
+ "eval_accuracy": 0.15306122448979592,
860
+ "eval_loss": 3.6893138885498047,
861
+ "eval_runtime": 1.6711,
862
+ "eval_samples_per_second": 58.644,
863
+ "eval_steps_per_second": 1.197,
864
+ "step": 455
865
+ },
866
+ {
867
+ "epoch": 65.71,
868
+ "learning_rate": 6.857142857142858e-06,
869
+ "loss": 3.0406,
870
+ "step": 460
871
+ },
872
+ {
873
+ "epoch": 66.0,
874
+ "eval_accuracy": 0.15306122448979592,
875
+ "eval_loss": 3.6859352588653564,
876
+ "eval_runtime": 1.7238,
877
+ "eval_samples_per_second": 56.85,
878
+ "eval_steps_per_second": 1.16,
879
+ "step": 462
880
+ },
881
+ {
882
+ "epoch": 67.0,
883
+ "eval_accuracy": 0.15306122448979592,
884
+ "eval_loss": 3.6839077472686768,
885
+ "eval_runtime": 1.6549,
886
+ "eval_samples_per_second": 59.218,
887
+ "eval_steps_per_second": 1.209,
888
+ "step": 469
889
+ },
890
+ {
891
+ "epoch": 67.14,
892
+ "learning_rate": 6.571428571428572e-06,
893
+ "loss": 3.077,
894
+ "step": 470
895
+ },
896
+ {
897
+ "epoch": 68.0,
898
+ "eval_accuracy": 0.15306122448979592,
899
+ "eval_loss": 3.6815552711486816,
900
+ "eval_runtime": 1.673,
901
+ "eval_samples_per_second": 58.578,
902
+ "eval_steps_per_second": 1.195,
903
+ "step": 476
904
+ },
905
+ {
906
+ "epoch": 68.57,
907
+ "learning_rate": 6.285714285714286e-06,
908
+ "loss": 3.0555,
909
+ "step": 480
910
+ },
911
+ {
912
+ "epoch": 69.0,
913
+ "eval_accuracy": 0.15306122448979592,
914
+ "eval_loss": 3.678163766860962,
915
+ "eval_runtime": 1.6518,
916
+ "eval_samples_per_second": 59.329,
917
+ "eval_steps_per_second": 1.211,
918
+ "step": 483
919
+ },
920
+ {
921
+ "epoch": 70.0,
922
+ "learning_rate": 6e-06,
923
+ "loss": 3.035,
924
+ "step": 490
925
+ },
926
+ {
927
+ "epoch": 70.0,
928
+ "eval_accuracy": 0.15306122448979592,
929
+ "eval_loss": 3.6762583255767822,
930
+ "eval_runtime": 1.6591,
931
+ "eval_samples_per_second": 59.069,
932
+ "eval_steps_per_second": 1.205,
933
+ "step": 490
934
+ },
935
+ {
936
+ "epoch": 71.0,
937
+ "eval_accuracy": 0.15306122448979592,
938
+ "eval_loss": 3.672853469848633,
939
+ "eval_runtime": 1.6495,
940
+ "eval_samples_per_second": 59.412,
941
+ "eval_steps_per_second": 1.212,
942
+ "step": 497
943
+ },
944
+ {
945
+ "epoch": 71.43,
946
+ "learning_rate": 5.7142857142857145e-06,
947
+ "loss": 3.0246,
948
+ "step": 500
949
+ },
950
+ {
951
+ "epoch": 72.0,
952
+ "eval_accuracy": 0.15306122448979592,
953
+ "eval_loss": 3.6719117164611816,
954
+ "eval_runtime": 1.6901,
955
+ "eval_samples_per_second": 57.986,
956
+ "eval_steps_per_second": 1.183,
957
+ "step": 504
958
+ },
959
+ {
960
+ "epoch": 72.86,
961
+ "learning_rate": 5.428571428571429e-06,
962
+ "loss": 3.0282,
963
+ "step": 510
964
+ },
965
+ {
966
+ "epoch": 73.0,
967
+ "eval_accuracy": 0.15306122448979592,
968
+ "eval_loss": 3.670848846435547,
969
+ "eval_runtime": 1.7293,
970
+ "eval_samples_per_second": 56.669,
971
+ "eval_steps_per_second": 1.157,
972
+ "step": 511
973
+ },
974
+ {
975
+ "epoch": 74.0,
976
+ "eval_accuracy": 0.14285714285714285,
977
+ "eval_loss": 3.6683461666107178,
978
+ "eval_runtime": 1.6959,
979
+ "eval_samples_per_second": 57.785,
980
+ "eval_steps_per_second": 1.179,
981
+ "step": 518
982
+ },
983
+ {
984
+ "epoch": 74.29,
985
+ "learning_rate": 5.142857142857142e-06,
986
+ "loss": 3.0293,
987
+ "step": 520
988
+ },
989
+ {
990
+ "epoch": 75.0,
991
+ "eval_accuracy": 0.14285714285714285,
992
+ "eval_loss": 3.665170669555664,
993
+ "eval_runtime": 1.6438,
994
+ "eval_samples_per_second": 59.617,
995
+ "eval_steps_per_second": 1.217,
996
+ "step": 525
997
+ },
998
+ {
999
+ "epoch": 75.71,
1000
+ "learning_rate": 4.857142857142858e-06,
1001
+ "loss": 2.9893,
1002
+ "step": 530
1003
+ },
1004
+ {
1005
+ "epoch": 76.0,
1006
+ "eval_accuracy": 0.14285714285714285,
1007
+ "eval_loss": 3.66402268409729,
1008
+ "eval_runtime": 1.6852,
1009
+ "eval_samples_per_second": 58.155,
1010
+ "eval_steps_per_second": 1.187,
1011
+ "step": 532
1012
+ },
1013
+ {
1014
+ "epoch": 77.0,
1015
+ "eval_accuracy": 0.14285714285714285,
1016
+ "eval_loss": 3.6634998321533203,
1017
+ "eval_runtime": 1.7537,
1018
+ "eval_samples_per_second": 55.882,
1019
+ "eval_steps_per_second": 1.14,
1020
+ "step": 539
1021
+ },
1022
+ {
1023
+ "epoch": 77.14,
1024
+ "learning_rate": 4.571428571428572e-06,
1025
+ "loss": 2.9888,
1026
+ "step": 540
1027
+ },
1028
+ {
1029
+ "epoch": 78.0,
1030
+ "eval_accuracy": 0.14285714285714285,
1031
+ "eval_loss": 3.6618170738220215,
1032
+ "eval_runtime": 1.7774,
1033
+ "eval_samples_per_second": 55.135,
1034
+ "eval_steps_per_second": 1.125,
1035
+ "step": 546
1036
+ },
1037
+ {
1038
+ "epoch": 78.57,
1039
+ "learning_rate": 4.2857142857142855e-06,
1040
+ "loss": 2.9833,
1041
+ "step": 550
1042
+ },
1043
+ {
1044
+ "epoch": 79.0,
1045
+ "eval_accuracy": 0.14285714285714285,
1046
+ "eval_loss": 3.659451484680176,
1047
+ "eval_runtime": 1.6474,
1048
+ "eval_samples_per_second": 59.487,
1049
+ "eval_steps_per_second": 1.214,
1050
+ "step": 553
1051
+ },
1052
+ {
1053
+ "epoch": 80.0,
1054
+ "learning_rate": 4.000000000000001e-06,
1055
+ "loss": 2.9739,
1056
+ "step": 560
1057
+ },
1058
+ {
1059
+ "epoch": 80.0,
1060
+ "eval_accuracy": 0.14285714285714285,
1061
+ "eval_loss": 3.6578376293182373,
1062
+ "eval_runtime": 1.6531,
1063
+ "eval_samples_per_second": 59.283,
1064
+ "eval_steps_per_second": 1.21,
1065
+ "step": 560
1066
+ },
1067
+ {
1068
+ "epoch": 81.0,
1069
+ "eval_accuracy": 0.14285714285714285,
1070
+ "eval_loss": 3.656160593032837,
1071
+ "eval_runtime": 1.7409,
1072
+ "eval_samples_per_second": 56.294,
1073
+ "eval_steps_per_second": 1.149,
1074
+ "step": 567
1075
+ },
1076
+ {
1077
+ "epoch": 81.43,
1078
+ "learning_rate": 3.7142857142857146e-06,
1079
+ "loss": 2.9513,
1080
+ "step": 570
1081
+ },
1082
+ {
1083
+ "epoch": 82.0,
1084
+ "eval_accuracy": 0.14285714285714285,
1085
+ "eval_loss": 3.655242681503296,
1086
+ "eval_runtime": 1.6811,
1087
+ "eval_samples_per_second": 58.294,
1088
+ "eval_steps_per_second": 1.19,
1089
+ "step": 574
1090
+ },
1091
+ {
1092
+ "epoch": 82.86,
1093
+ "learning_rate": 3.428571428571429e-06,
1094
+ "loss": 2.9503,
1095
+ "step": 580
1096
+ },
1097
+ {
1098
+ "epoch": 83.0,
1099
+ "eval_accuracy": 0.14285714285714285,
1100
+ "eval_loss": 3.6539218425750732,
1101
+ "eval_runtime": 1.7574,
1102
+ "eval_samples_per_second": 55.764,
1103
+ "eval_steps_per_second": 1.138,
1104
+ "step": 581
1105
+ },
1106
+ {
1107
+ "epoch": 84.0,
1108
+ "eval_accuracy": 0.15306122448979592,
1109
+ "eval_loss": 3.653193950653076,
1110
+ "eval_runtime": 1.6302,
1111
+ "eval_samples_per_second": 60.114,
1112
+ "eval_steps_per_second": 1.227,
1113
+ "step": 588
1114
+ },
1115
+ {
1116
+ "epoch": 84.29,
1117
+ "learning_rate": 3.142857142857143e-06,
1118
+ "loss": 2.9792,
1119
+ "step": 590
1120
+ },
1121
+ {
1122
+ "epoch": 85.0,
1123
+ "eval_accuracy": 0.15306122448979592,
1124
+ "eval_loss": 3.651691436767578,
1125
+ "eval_runtime": 1.7277,
1126
+ "eval_samples_per_second": 56.724,
1127
+ "eval_steps_per_second": 1.158,
1128
+ "step": 595
1129
+ },
1130
+ {
1131
+ "epoch": 85.71,
1132
+ "learning_rate": 2.8571428571428573e-06,
1133
+ "loss": 2.9561,
1134
+ "step": 600
1135
+ },
1136
+ {
1137
+ "epoch": 86.0,
1138
+ "eval_accuracy": 0.15306122448979592,
1139
+ "eval_loss": 3.6496691703796387,
1140
+ "eval_runtime": 1.6772,
1141
+ "eval_samples_per_second": 58.432,
1142
+ "eval_steps_per_second": 1.192,
1143
+ "step": 602
1144
+ },
1145
+ {
1146
+ "epoch": 87.0,
1147
+ "eval_accuracy": 0.15306122448979592,
1148
+ "eval_loss": 3.6485908031463623,
1149
+ "eval_runtime": 1.664,
1150
+ "eval_samples_per_second": 58.893,
1151
+ "eval_steps_per_second": 1.202,
1152
+ "step": 609
1153
+ },
1154
+ {
1155
+ "epoch": 87.14,
1156
+ "learning_rate": 2.571428571428571e-06,
1157
+ "loss": 2.964,
1158
+ "step": 610
1159
+ },
1160
+ {
1161
+ "epoch": 88.0,
1162
+ "eval_accuracy": 0.15306122448979592,
1163
+ "eval_loss": 3.647573709487915,
1164
+ "eval_runtime": 1.7778,
1165
+ "eval_samples_per_second": 55.124,
1166
+ "eval_steps_per_second": 1.125,
1167
+ "step": 616
1168
+ },
1169
+ {
1170
+ "epoch": 88.57,
1171
+ "learning_rate": 2.285714285714286e-06,
1172
+ "loss": 2.9665,
1173
+ "step": 620
1174
+ },
1175
+ {
1176
+ "epoch": 89.0,
1177
+ "eval_accuracy": 0.15306122448979592,
1178
+ "eval_loss": 3.6470184326171875,
1179
+ "eval_runtime": 1.6689,
1180
+ "eval_samples_per_second": 58.722,
1181
+ "eval_steps_per_second": 1.198,
1182
+ "step": 623
1183
+ },
1184
+ {
1185
+ "epoch": 90.0,
1186
+ "learning_rate": 2.0000000000000003e-06,
1187
+ "loss": 2.9439,
1188
+ "step": 630
1189
+ },
1190
+ {
1191
+ "epoch": 90.0,
1192
+ "eval_accuracy": 0.15306122448979592,
1193
+ "eval_loss": 3.646164894104004,
1194
+ "eval_runtime": 2.2025,
1195
+ "eval_samples_per_second": 44.495,
1196
+ "eval_steps_per_second": 0.908,
1197
+ "step": 630
1198
+ },
1199
+ {
1200
+ "epoch": 91.0,
1201
+ "eval_accuracy": 0.15306122448979592,
1202
+ "eval_loss": 3.6452953815460205,
1203
+ "eval_runtime": 1.6826,
1204
+ "eval_samples_per_second": 58.242,
1205
+ "eval_steps_per_second": 1.189,
1206
+ "step": 637
1207
+ },
1208
+ {
1209
+ "epoch": 91.43,
1210
+ "learning_rate": 1.7142857142857145e-06,
1211
+ "loss": 2.9369,
1212
+ "step": 640
1213
+ },
1214
+ {
1215
+ "epoch": 92.0,
1216
+ "eval_accuracy": 0.15306122448979592,
1217
+ "eval_loss": 3.645128011703491,
1218
+ "eval_runtime": 1.646,
1219
+ "eval_samples_per_second": 59.539,
1220
+ "eval_steps_per_second": 1.215,
1221
+ "step": 644
1222
+ },
1223
+ {
1224
+ "epoch": 92.86,
1225
+ "learning_rate": 1.4285714285714286e-06,
1226
+ "loss": 2.9619,
1227
+ "step": 650
1228
+ },
1229
+ {
1230
+ "epoch": 93.0,
1231
+ "eval_accuracy": 0.15306122448979592,
1232
+ "eval_loss": 3.64505934715271,
1233
+ "eval_runtime": 1.7663,
1234
+ "eval_samples_per_second": 55.483,
1235
+ "eval_steps_per_second": 1.132,
1236
+ "step": 651
1237
+ },
1238
+ {
1239
+ "epoch": 94.0,
1240
+ "eval_accuracy": 0.15306122448979592,
1241
+ "eval_loss": 3.6448814868927,
1242
+ "eval_runtime": 1.6466,
1243
+ "eval_samples_per_second": 59.516,
1244
+ "eval_steps_per_second": 1.215,
1245
+ "step": 658
1246
+ },
1247
+ {
1248
+ "epoch": 94.29,
1249
+ "learning_rate": 1.142857142857143e-06,
1250
+ "loss": 2.955,
1251
+ "step": 660
1252
+ },
1253
+ {
1254
+ "epoch": 95.0,
1255
+ "eval_accuracy": 0.15306122448979592,
1256
+ "eval_loss": 3.6444060802459717,
1257
+ "eval_runtime": 1.6863,
1258
+ "eval_samples_per_second": 58.115,
1259
+ "eval_steps_per_second": 1.186,
1260
+ "step": 665
1261
+ },
1262
+ {
1263
+ "epoch": 95.71,
1264
+ "learning_rate": 8.571428571428572e-07,
1265
+ "loss": 2.9323,
1266
+ "step": 670
1267
+ },
1268
+ {
1269
+ "epoch": 96.0,
1270
+ "eval_accuracy": 0.15306122448979592,
1271
+ "eval_loss": 3.6440815925598145,
1272
+ "eval_runtime": 1.626,
1273
+ "eval_samples_per_second": 60.271,
1274
+ "eval_steps_per_second": 1.23,
1275
+ "step": 672
1276
+ },
1277
+ {
1278
+ "epoch": 97.0,
1279
+ "eval_accuracy": 0.15306122448979592,
1280
+ "eval_loss": 3.6438138484954834,
1281
+ "eval_runtime": 1.7724,
1282
+ "eval_samples_per_second": 55.292,
1283
+ "eval_steps_per_second": 1.128,
1284
+ "step": 679
1285
+ },
1286
+ {
1287
+ "epoch": 97.14,
1288
+ "learning_rate": 5.714285714285715e-07,
1289
+ "loss": 2.9466,
1290
+ "step": 680
1291
+ },
1292
+ {
1293
+ "epoch": 98.0,
1294
+ "eval_accuracy": 0.15306122448979592,
1295
+ "eval_loss": 3.643672466278076,
1296
+ "eval_runtime": 1.7638,
1297
+ "eval_samples_per_second": 55.56,
1298
+ "eval_steps_per_second": 1.134,
1299
+ "step": 686
1300
+ },
1301
+ {
1302
+ "epoch": 98.57,
1303
+ "learning_rate": 2.8571428571428575e-07,
1304
+ "loss": 2.945,
1305
+ "step": 690
1306
+ },
1307
+ {
1308
+ "epoch": 99.0,
1309
+ "eval_accuracy": 0.15306122448979592,
1310
+ "eval_loss": 3.6435706615448,
1311
+ "eval_runtime": 1.6937,
1312
+ "eval_samples_per_second": 57.862,
1313
+ "eval_steps_per_second": 1.181,
1314
+ "step": 693
1315
+ },
1316
+ {
1317
+ "epoch": 100.0,
1318
+ "learning_rate": 0.0,
1319
+ "loss": 2.9665,
1320
+ "step": 700
1321
+ },
1322
+ {
1323
+ "epoch": 100.0,
1324
+ "eval_accuracy": 0.15306122448979592,
1325
+ "eval_loss": 3.643535614013672,
1326
+ "eval_runtime": 1.66,
1327
+ "eval_samples_per_second": 59.037,
1328
+ "eval_steps_per_second": 1.205,
1329
+ "step": 700
1330
+ },
1331
+ {
1332
+ "epoch": 100.0,
1333
+ "step": 700,
1334
+ "total_flos": 4.2794747466153984e+18,
1335
+ "train_loss": 3.289549721309117,
1336
+ "train_runtime": 4059.8356,
1337
+ "train_samples_per_second": 13.597,
1338
+ "train_steps_per_second": 0.172
1339
+ }
1340
+ ],
1341
+ "logging_steps": 10,
1342
+ "max_steps": 700,
1343
+ "num_input_tokens_seen": 0,
1344
+ "num_train_epochs": 100,
1345
+ "save_steps": 500,
1346
+ "total_flos": 4.2794747466153984e+18,
1347
+ "train_batch_size": 80,
1348
+ "trial_name": null,
1349
+ "trial_params": null
1350
+ }