sl82 commited on
Commit
38e0caf
1 Parent(s): e05016e

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.9803703703703703,
4
- "eval_loss": 0.05731452256441116,
5
- "eval_runtime": 17.7396,
6
- "eval_samples_per_second": 152.202,
7
- "eval_steps_per_second": 4.792,
8
  "total_flos": 1.8124066505760768e+18,
9
- "train_loss": 0.33203531901041666,
10
- "train_runtime": 1146.1315,
11
- "train_samples_per_second": 63.605,
12
- "train_steps_per_second": 0.497
13
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.9837037037037037,
4
+ "eval_loss": 0.05808306112885475,
5
+ "eval_runtime": 18.3764,
6
+ "eval_samples_per_second": 146.927,
7
+ "eval_steps_per_second": 4.625,
8
  "total_flos": 1.8124066505760768e+18,
9
+ "train_loss": 0.3388945644361931,
10
+ "train_runtime": 1188.1189,
11
+ "train_samples_per_second": 61.357,
12
+ "train_steps_per_second": 0.48
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.9803703703703703,
4
- "eval_loss": 0.05731452256441116,
5
- "eval_runtime": 17.7396,
6
- "eval_samples_per_second": 152.202,
7
- "eval_steps_per_second": 4.792
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.9837037037037037,
4
+ "eval_loss": 0.05808306112885475,
5
+ "eval_runtime": 18.3764,
6
+ "eval_samples_per_second": 146.927,
7
+ "eval_steps_per_second": 4.625
8
  }
runs/Jul09_02-44-35_ea30630c095e/events.out.tfevents.1657337795.ea30630c095e.72.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5a7d0ccd3ac330eb59722ecca75c1536522f80fd4e7a977e8b5500eaf3b703b
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 1.8124066505760768e+18,
4
- "train_loss": 0.33203531901041666,
5
- "train_runtime": 1146.1315,
6
- "train_samples_per_second": 63.605,
7
- "train_steps_per_second": 0.497
8
  }
 
1
  {
2
  "epoch": 3.0,
3
  "total_flos": 1.8124066505760768e+18,
4
+ "train_loss": 0.3388945644361931,
5
+ "train_runtime": 1188.1189,
6
+ "train_samples_per_second": 61.357,
7
+ "train_steps_per_second": 0.48
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9803703703703703,
3
  "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-570",
4
  "epoch": 3.0,
5
  "global_step": 570,
@@ -10,380 +10,380 @@
10
  {
11
  "epoch": 0.05,
12
  "learning_rate": 8.771929824561403e-06,
13
- "loss": 2.3385,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.11,
18
  "learning_rate": 1.7543859649122806e-05,
19
- "loss": 2.1275,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.16,
24
  "learning_rate": 2.6315789473684212e-05,
25
- "loss": 1.7313,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.21,
30
  "learning_rate": 3.508771929824561e-05,
31
- "loss": 1.1094,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.26,
36
  "learning_rate": 4.3859649122807014e-05,
37
- "loss": 0.6705,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.32,
42
  "learning_rate": 4.970760233918128e-05,
43
- "loss": 0.5056,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.37,
48
  "learning_rate": 4.8732943469785574e-05,
49
- "loss": 0.4138,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.42,
54
  "learning_rate": 4.7758284600389865e-05,
55
- "loss": 0.4173,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.47,
60
  "learning_rate": 4.678362573099415e-05,
61
- "loss": 0.3542,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.53,
66
  "learning_rate": 4.580896686159844e-05,
67
- "loss": 0.3654,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.58,
72
  "learning_rate": 4.483430799220273e-05,
73
- "loss": 0.3132,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.63,
78
  "learning_rate": 4.3859649122807014e-05,
79
- "loss": 0.3199,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.68,
84
  "learning_rate": 4.2884990253411305e-05,
85
- "loss": 0.2957,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.74,
90
  "learning_rate": 4.1910331384015596e-05,
91
- "loss": 0.3042,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.79,
96
  "learning_rate": 4.093567251461988e-05,
97
- "loss": 0.2853,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.84,
102
  "learning_rate": 3.996101364522417e-05,
103
- "loss": 0.2277,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.89,
108
  "learning_rate": 3.898635477582846e-05,
109
- "loss": 0.2379,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.95,
114
  "learning_rate": 3.8011695906432746e-05,
115
- "loss": 0.2495,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 1.0,
120
  "learning_rate": 3.7037037037037037e-05,
121
- "loss": 0.2221,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 1.0,
126
- "eval_accuracy": 0.9725925925925926,
127
- "eval_loss": 0.09543684124946594,
128
- "eval_runtime": 17.8689,
129
- "eval_samples_per_second": 151.1,
130
- "eval_steps_per_second": 4.757,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.05,
135
  "learning_rate": 3.606237816764133e-05,
136
- "loss": 0.2593,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.11,
141
  "learning_rate": 3.508771929824561e-05,
142
- "loss": 0.2474,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.16,
147
  "learning_rate": 3.41130604288499e-05,
148
- "loss": 0.1954,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.21,
153
  "learning_rate": 3.313840155945419e-05,
154
- "loss": 0.1855,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.26,
159
  "learning_rate": 3.216374269005848e-05,
160
- "loss": 0.2073,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.32,
165
  "learning_rate": 3.118908382066277e-05,
166
- "loss": 0.1896,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.37,
171
  "learning_rate": 3.0214424951267055e-05,
172
- "loss": 0.1839,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.42,
177
  "learning_rate": 2.9239766081871346e-05,
178
- "loss": 0.1746,
179
  "step": 270
180
  },
181
  {
182
  "epoch": 1.47,
183
  "learning_rate": 2.8265107212475634e-05,
184
- "loss": 0.2005,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 1.53,
189
  "learning_rate": 2.729044834307992e-05,
190
- "loss": 0.1935,
191
  "step": 290
192
  },
193
  {
194
  "epoch": 1.58,
195
  "learning_rate": 2.6315789473684212e-05,
196
- "loss": 0.2099,
197
  "step": 300
198
  },
199
  {
200
  "epoch": 1.63,
201
  "learning_rate": 2.53411306042885e-05,
202
- "loss": 0.1665,
203
  "step": 310
204
  },
205
  {
206
  "epoch": 1.68,
207
  "learning_rate": 2.4366471734892787e-05,
208
- "loss": 0.1707,
209
  "step": 320
210
  },
211
  {
212
  "epoch": 1.74,
213
  "learning_rate": 2.3391812865497074e-05,
214
- "loss": 0.1637,
215
  "step": 330
216
  },
217
  {
218
  "epoch": 1.79,
219
  "learning_rate": 2.2417153996101365e-05,
220
- "loss": 0.2051,
221
  "step": 340
222
  },
223
  {
224
  "epoch": 1.84,
225
  "learning_rate": 2.1442495126705653e-05,
226
- "loss": 0.1545,
227
  "step": 350
228
  },
229
  {
230
  "epoch": 1.89,
231
  "learning_rate": 2.046783625730994e-05,
232
- "loss": 0.1664,
233
  "step": 360
234
  },
235
  {
236
  "epoch": 1.95,
237
  "learning_rate": 1.949317738791423e-05,
238
- "loss": 0.1587,
239
  "step": 370
240
  },
241
  {
242
  "epoch": 2.0,
243
  "learning_rate": 1.8518518518518518e-05,
244
- "loss": 0.201,
245
  "step": 380
246
  },
247
  {
248
  "epoch": 2.0,
249
- "eval_accuracy": 0.9737037037037037,
250
- "eval_loss": 0.07519854605197906,
251
- "eval_runtime": 17.8441,
252
- "eval_samples_per_second": 151.311,
253
- "eval_steps_per_second": 4.763,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 2.05,
258
  "learning_rate": 1.7543859649122806e-05,
259
- "loss": 0.1411,
260
  "step": 390
261
  },
262
  {
263
  "epoch": 2.11,
264
  "learning_rate": 1.6569200779727097e-05,
265
- "loss": 0.1344,
266
  "step": 400
267
  },
268
  {
269
  "epoch": 2.16,
270
  "learning_rate": 1.5594541910331384e-05,
271
- "loss": 0.1507,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.21,
276
  "learning_rate": 1.4619883040935673e-05,
277
- "loss": 0.1642,
278
  "step": 420
279
  },
280
  {
281
  "epoch": 2.26,
282
  "learning_rate": 1.364522417153996e-05,
283
- "loss": 0.1454,
284
  "step": 430
285
  },
286
  {
287
  "epoch": 2.32,
288
  "learning_rate": 1.267056530214425e-05,
289
- "loss": 0.1664,
290
  "step": 440
291
  },
292
  {
293
  "epoch": 2.37,
294
  "learning_rate": 1.1695906432748537e-05,
295
- "loss": 0.1304,
296
  "step": 450
297
  },
298
  {
299
  "epoch": 2.42,
300
  "learning_rate": 1.0721247563352826e-05,
301
- "loss": 0.1181,
302
  "step": 460
303
  },
304
  {
305
  "epoch": 2.47,
306
  "learning_rate": 9.746588693957115e-06,
307
- "loss": 0.1802,
308
  "step": 470
309
  },
310
  {
311
  "epoch": 2.53,
312
  "learning_rate": 8.771929824561403e-06,
313
- "loss": 0.1329,
314
  "step": 480
315
  },
316
  {
317
  "epoch": 2.58,
318
  "learning_rate": 7.797270955165692e-06,
319
- "loss": 0.1456,
320
  "step": 490
321
  },
322
  {
323
  "epoch": 2.63,
324
  "learning_rate": 6.82261208576998e-06,
325
- "loss": 0.1452,
326
  "step": 500
327
  },
328
  {
329
  "epoch": 2.68,
330
  "learning_rate": 5.8479532163742686e-06,
331
- "loss": 0.1548,
332
  "step": 510
333
  },
334
  {
335
  "epoch": 2.74,
336
  "learning_rate": 4.873294346978558e-06,
337
- "loss": 0.1634,
338
  "step": 520
339
  },
340
  {
341
  "epoch": 2.79,
342
  "learning_rate": 3.898635477582846e-06,
343
- "loss": 0.1569,
344
  "step": 530
345
  },
346
  {
347
  "epoch": 2.84,
348
  "learning_rate": 2.9239766081871343e-06,
349
- "loss": 0.144,
350
  "step": 540
351
  },
352
  {
353
  "epoch": 2.89,
354
  "learning_rate": 1.949317738791423e-06,
355
- "loss": 0.134,
356
  "step": 550
357
  },
358
  {
359
  "epoch": 2.95,
360
  "learning_rate": 9.746588693957115e-07,
361
- "loss": 0.1503,
362
  "step": 560
363
  },
364
  {
365
  "epoch": 3.0,
366
  "learning_rate": 0.0,
367
- "loss": 0.1456,
368
  "step": 570
369
  },
370
  {
371
  "epoch": 3.0,
372
- "eval_accuracy": 0.9803703703703703,
373
- "eval_loss": 0.05731452256441116,
374
- "eval_runtime": 17.8129,
375
- "eval_samples_per_second": 151.575,
376
- "eval_steps_per_second": 4.772,
377
  "step": 570
378
  },
379
  {
380
  "epoch": 3.0,
381
  "step": 570,
382
  "total_flos": 1.8124066505760768e+18,
383
- "train_loss": 0.33203531901041666,
384
- "train_runtime": 1146.1315,
385
- "train_samples_per_second": 63.605,
386
- "train_steps_per_second": 0.497
387
  }
388
  ],
389
  "max_steps": 570,
 
1
  {
2
+ "best_metric": 0.9837037037037037,
3
  "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-570",
4
  "epoch": 3.0,
5
  "global_step": 570,
 
10
  {
11
  "epoch": 0.05,
12
  "learning_rate": 8.771929824561403e-06,
13
+ "loss": 2.335,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.11,
18
  "learning_rate": 1.7543859649122806e-05,
19
+ "loss": 2.1086,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.16,
24
  "learning_rate": 2.6315789473684212e-05,
25
+ "loss": 1.7134,
26
  "step": 30
27
  },
28
  {
29
  "epoch": 0.21,
30
  "learning_rate": 3.508771929824561e-05,
31
+ "loss": 1.0858,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 0.26,
36
  "learning_rate": 4.3859649122807014e-05,
37
+ "loss": 0.6907,
38
  "step": 50
39
  },
40
  {
41
  "epoch": 0.32,
42
  "learning_rate": 4.970760233918128e-05,
43
+ "loss": 0.5373,
44
  "step": 60
45
  },
46
  {
47
  "epoch": 0.37,
48
  "learning_rate": 4.8732943469785574e-05,
49
+ "loss": 0.4256,
50
  "step": 70
51
  },
52
  {
53
  "epoch": 0.42,
54
  "learning_rate": 4.7758284600389865e-05,
55
+ "loss": 0.3958,
56
  "step": 80
57
  },
58
  {
59
  "epoch": 0.47,
60
  "learning_rate": 4.678362573099415e-05,
61
+ "loss": 0.3965,
62
  "step": 90
63
  },
64
  {
65
  "epoch": 0.53,
66
  "learning_rate": 4.580896686159844e-05,
67
+ "loss": 0.3546,
68
  "step": 100
69
  },
70
  {
71
  "epoch": 0.58,
72
  "learning_rate": 4.483430799220273e-05,
73
+ "loss": 0.3201,
74
  "step": 110
75
  },
76
  {
77
  "epoch": 0.63,
78
  "learning_rate": 4.3859649122807014e-05,
79
+ "loss": 0.2759,
80
  "step": 120
81
  },
82
  {
83
  "epoch": 0.68,
84
  "learning_rate": 4.2884990253411305e-05,
85
+ "loss": 0.2594,
86
  "step": 130
87
  },
88
  {
89
  "epoch": 0.74,
90
  "learning_rate": 4.1910331384015596e-05,
91
+ "loss": 0.3289,
92
  "step": 140
93
  },
94
  {
95
  "epoch": 0.79,
96
  "learning_rate": 4.093567251461988e-05,
97
+ "loss": 0.2768,
98
  "step": 150
99
  },
100
  {
101
  "epoch": 0.84,
102
  "learning_rate": 3.996101364522417e-05,
103
+ "loss": 0.2895,
104
  "step": 160
105
  },
106
  {
107
  "epoch": 0.89,
108
  "learning_rate": 3.898635477582846e-05,
109
+ "loss": 0.2943,
110
  "step": 170
111
  },
112
  {
113
  "epoch": 0.95,
114
  "learning_rate": 3.8011695906432746e-05,
115
+ "loss": 0.2417,
116
  "step": 180
117
  },
118
  {
119
  "epoch": 1.0,
120
  "learning_rate": 3.7037037037037037e-05,
121
+ "loss": 0.2666,
122
  "step": 190
123
  },
124
  {
125
  "epoch": 1.0,
126
+ "eval_accuracy": 0.9540740740740741,
127
+ "eval_loss": 0.13643595576286316,
128
+ "eval_runtime": 18.4106,
129
+ "eval_samples_per_second": 146.655,
130
+ "eval_steps_per_second": 4.617,
131
  "step": 190
132
  },
133
  {
134
  "epoch": 1.05,
135
  "learning_rate": 3.606237816764133e-05,
136
+ "loss": 0.2072,
137
  "step": 200
138
  },
139
  {
140
  "epoch": 1.11,
141
  "learning_rate": 3.508771929824561e-05,
142
+ "loss": 0.2437,
143
  "step": 210
144
  },
145
  {
146
  "epoch": 1.16,
147
  "learning_rate": 3.41130604288499e-05,
148
+ "loss": 0.2488,
149
  "step": 220
150
  },
151
  {
152
  "epoch": 1.21,
153
  "learning_rate": 3.313840155945419e-05,
154
+ "loss": 0.2861,
155
  "step": 230
156
  },
157
  {
158
  "epoch": 1.26,
159
  "learning_rate": 3.216374269005848e-05,
160
+ "loss": 0.2095,
161
  "step": 240
162
  },
163
  {
164
  "epoch": 1.32,
165
  "learning_rate": 3.118908382066277e-05,
166
+ "loss": 0.2347,
167
  "step": 250
168
  },
169
  {
170
  "epoch": 1.37,
171
  "learning_rate": 3.0214424951267055e-05,
172
+ "loss": 0.2053,
173
  "step": 260
174
  },
175
  {
176
  "epoch": 1.42,
177
  "learning_rate": 2.9239766081871346e-05,
178
+ "loss": 0.198,
179
  "step": 270
180
  },
181
  {
182
  "epoch": 1.47,
183
  "learning_rate": 2.8265107212475634e-05,
184
+ "loss": 0.2064,
185
  "step": 280
186
  },
187
  {
188
  "epoch": 1.53,
189
  "learning_rate": 2.729044834307992e-05,
190
+ "loss": 0.1863,
191
  "step": 290
192
  },
193
  {
194
  "epoch": 1.58,
195
  "learning_rate": 2.6315789473684212e-05,
196
+ "loss": 0.216,
197
  "step": 300
198
  },
199
  {
200
  "epoch": 1.63,
201
  "learning_rate": 2.53411306042885e-05,
202
+ "loss": 0.2085,
203
  "step": 310
204
  },
205
  {
206
  "epoch": 1.68,
207
  "learning_rate": 2.4366471734892787e-05,
208
+ "loss": 0.1671,
209
  "step": 320
210
  },
211
  {
212
  "epoch": 1.74,
213
  "learning_rate": 2.3391812865497074e-05,
214
+ "loss": 0.1641,
215
  "step": 330
216
  },
217
  {
218
  "epoch": 1.79,
219
  "learning_rate": 2.2417153996101365e-05,
220
+ "loss": 0.2049,
221
  "step": 340
222
  },
223
  {
224
  "epoch": 1.84,
225
  "learning_rate": 2.1442495126705653e-05,
226
+ "loss": 0.1592,
227
  "step": 350
228
  },
229
  {
230
  "epoch": 1.89,
231
  "learning_rate": 2.046783625730994e-05,
232
+ "loss": 0.1787,
233
  "step": 360
234
  },
235
  {
236
  "epoch": 1.95,
237
  "learning_rate": 1.949317738791423e-05,
238
+ "loss": 0.1879,
239
  "step": 370
240
  },
241
  {
242
  "epoch": 2.0,
243
  "learning_rate": 1.8518518518518518e-05,
244
+ "loss": 0.1735,
245
  "step": 380
246
  },
247
  {
248
  "epoch": 2.0,
249
+ "eval_accuracy": 0.9662962962962963,
250
+ "eval_loss": 0.09703890234231949,
251
+ "eval_runtime": 18.678,
252
+ "eval_samples_per_second": 144.555,
253
+ "eval_steps_per_second": 4.551,
254
  "step": 380
255
  },
256
  {
257
  "epoch": 2.05,
258
  "learning_rate": 1.7543859649122806e-05,
259
+ "loss": 0.1551,
260
  "step": 390
261
  },
262
  {
263
  "epoch": 2.11,
264
  "learning_rate": 1.6569200779727097e-05,
265
+ "loss": 0.1206,
266
  "step": 400
267
  },
268
  {
269
  "epoch": 2.16,
270
  "learning_rate": 1.5594541910331384e-05,
271
+ "loss": 0.1494,
272
  "step": 410
273
  },
274
  {
275
  "epoch": 2.21,
276
  "learning_rate": 1.4619883040935673e-05,
277
+ "loss": 0.1655,
278
  "step": 420
279
  },
280
  {
281
  "epoch": 2.26,
282
  "learning_rate": 1.364522417153996e-05,
283
+ "loss": 0.1497,
284
  "step": 430
285
  },
286
  {
287
  "epoch": 2.32,
288
  "learning_rate": 1.267056530214425e-05,
289
+ "loss": 0.1597,
290
  "step": 440
291
  },
292
  {
293
  "epoch": 2.37,
294
  "learning_rate": 1.1695906432748537e-05,
295
+ "loss": 0.1754,
296
  "step": 450
297
  },
298
  {
299
  "epoch": 2.42,
300
  "learning_rate": 1.0721247563352826e-05,
301
+ "loss": 0.1388,
302
  "step": 460
303
  },
304
  {
305
  "epoch": 2.47,
306
  "learning_rate": 9.746588693957115e-06,
307
+ "loss": 0.141,
308
  "step": 470
309
  },
310
  {
311
  "epoch": 2.53,
312
  "learning_rate": 8.771929824561403e-06,
313
+ "loss": 0.1691,
314
  "step": 480
315
  },
316
  {
317
  "epoch": 2.58,
318
  "learning_rate": 7.797270955165692e-06,
319
+ "loss": 0.1452,
320
  "step": 490
321
  },
322
  {
323
  "epoch": 2.63,
324
  "learning_rate": 6.82261208576998e-06,
325
+ "loss": 0.1697,
326
  "step": 500
327
  },
328
  {
329
  "epoch": 2.68,
330
  "learning_rate": 5.8479532163742686e-06,
331
+ "loss": 0.1514,
332
  "step": 510
333
  },
334
  {
335
  "epoch": 2.74,
336
  "learning_rate": 4.873294346978558e-06,
337
+ "loss": 0.1455,
338
  "step": 520
339
  },
340
  {
341
  "epoch": 2.79,
342
  "learning_rate": 3.898635477582846e-06,
343
+ "loss": 0.1507,
344
  "step": 530
345
  },
346
  {
347
  "epoch": 2.84,
348
  "learning_rate": 2.9239766081871343e-06,
349
+ "loss": 0.1575,
350
  "step": 540
351
  },
352
  {
353
  "epoch": 2.89,
354
  "learning_rate": 1.949317738791423e-06,
355
+ "loss": 0.1271,
356
  "step": 550
357
  },
358
  {
359
  "epoch": 2.95,
360
  "learning_rate": 9.746588693957115e-07,
361
+ "loss": 0.1371,
362
  "step": 560
363
  },
364
  {
365
  "epoch": 3.0,
366
  "learning_rate": 0.0,
367
+ "loss": 0.126,
368
  "step": 570
369
  },
370
  {
371
  "epoch": 3.0,
372
+ "eval_accuracy": 0.9837037037037037,
373
+ "eval_loss": 0.05808306112885475,
374
+ "eval_runtime": 18.4817,
375
+ "eval_samples_per_second": 146.09,
376
+ "eval_steps_per_second": 4.599,
377
  "step": 570
378
  },
379
  {
380
  "epoch": 3.0,
381
  "step": 570,
382
  "total_flos": 1.8124066505760768e+18,
383
+ "train_loss": 0.3388945644361931,
384
+ "train_runtime": 1188.1189,
385
+ "train_samples_per_second": 61.357,
386
+ "train_steps_per_second": 0.48
387
  }
388
  ],
389
  "max_steps": 570,