t1msan commited on
Commit
418438e
1 Parent(s): d9697c9

End of training

Browse files
README.md CHANGED
@@ -17,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [microsoft/swin-base-patch4-window7-224-in22k](https://huggingface.co/microsoft/swin-base-patch4-window7-224-in22k) on the imagefolder dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.0036
21
 
22
  ## Model description
23
 
 
17
 
18
  This model is a fine-tuned version of [microsoft/swin-base-patch4-window7-224-in22k](https://huggingface.co/microsoft/swin-base-patch4-window7-224-in22k) on the imagefolder dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.0008
21
 
22
  ## Model description
23
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 9.6,
3
- "eval_loss": 0.010416708886623383,
4
- "eval_runtime": 2.6618,
5
- "eval_samples_per_second": 49.59,
6
- "eval_steps_per_second": 1.127,
7
- "total_flos": 8.884386008218829e+17,
8
- "train_loss": 0.13027287572622298,
9
- "train_runtime": 399.2998,
10
- "train_samples_per_second": 29.552,
11
- "train_steps_per_second": 0.15
12
  }
 
1
  {
2
+ "epoch": 9.87,
3
+ "eval_loss": 0.000785744923632592,
4
+ "eval_runtime": 26.9588,
5
+ "eval_samples_per_second": 58.682,
6
+ "eval_steps_per_second": 0.927,
7
+ "total_flos": 1.100143961239951e+19,
8
+ "train_loss": 0.03171633874193173,
9
+ "train_runtime": 4576.6164,
10
+ "train_samples_per_second": 31.093,
11
+ "train_steps_per_second": 0.12
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 9.6,
3
- "eval_loss": 0.010416708886623383,
4
- "eval_runtime": 2.6618,
5
- "eval_samples_per_second": 49.59,
6
- "eval_steps_per_second": 1.127
7
  }
 
1
  {
2
+ "epoch": 9.87,
3
+ "eval_loss": 0.000785744923632592,
4
+ "eval_runtime": 26.9588,
5
+ "eval_samples_per_second": 58.682,
6
+ "eval_steps_per_second": 0.927
7
  }
runs/Apr19_11-49-01_c9d5019042b9/events.out.tfevents.1713532637.c9d5019042b9.34.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb67ff6354834d15459e10a754777753b7ebb154ac46348fc4174ad25c1eae9
3
+ size 359
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.6,
3
- "total_flos": 8.884386008218829e+17,
4
- "train_loss": 0.13027287572622298,
5
- "train_runtime": 399.2998,
6
- "train_samples_per_second": 29.552,
7
- "train_steps_per_second": 0.15
8
  }
 
1
  {
2
+ "epoch": 9.87,
3
+ "total_flos": 1.100143961239951e+19,
4
+ "train_loss": 0.03171633874193173,
5
+ "train_runtime": 4576.6164,
6
+ "train_samples_per_second": 31.093,
7
+ "train_steps_per_second": 0.12
8
  }
trainer_state.json CHANGED
@@ -1,152 +1,495 @@
1
  {
2
- "best_metric": 0.010416708886623383,
3
- "best_model_checkpoint": "swin-base-patch4-window7-224-in22k-Kontur-competition-1.3K/checkpoint-56",
4
- "epoch": 9.6,
5
  "eval_steps": 500,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.96,
13
- "eval_loss": 0.4818643033504486,
14
- "eval_runtime": 2.5684,
15
- "eval_samples_per_second": 51.393,
16
- "eval_steps_per_second": 1.168,
17
- "step": 6
18
  },
19
  {
20
- "epoch": 1.6,
21
- "grad_norm": 3.978776454925537,
22
- "learning_rate": 4.62962962962963e-05,
23
- "loss": 0.5639,
24
- "step": 10
25
  },
26
  {
27
- "epoch": 1.92,
28
- "eval_loss": 0.22424142062664032,
29
- "eval_runtime": 2.618,
30
- "eval_samples_per_second": 50.419,
31
- "eval_steps_per_second": 1.146,
32
- "step": 12
33
  },
34
  {
35
- "epoch": 2.88,
36
- "eval_loss": 0.0641486644744873,
37
- "eval_runtime": 2.5792,
38
- "eval_samples_per_second": 51.18,
39
- "eval_steps_per_second": 1.163,
40
- "step": 18
41
  },
42
  {
43
- "epoch": 3.2,
44
- "grad_norm": 2.1514062881469727,
45
- "learning_rate": 3.7037037037037037e-05,
46
- "loss": 0.1359,
47
- "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  },
49
  {
50
  "epoch": 4.0,
51
- "eval_loss": 0.07821954041719437,
52
- "eval_runtime": 2.6087,
53
- "eval_samples_per_second": 50.601,
54
- "eval_steps_per_second": 1.15,
55
- "step": 25
56
  },
57
  {
58
- "epoch": 4.8,
59
- "grad_norm": 1.6489207744598389,
60
- "learning_rate": 2.777777777777778e-05,
61
- "loss": 0.031,
62
- "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  },
64
  {
65
- "epoch": 4.96,
66
- "eval_loss": 0.025011500343680382,
67
- "eval_runtime": 2.632,
68
- "eval_samples_per_second": 50.152,
69
- "eval_steps_per_second": 1.14,
70
- "step": 31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  },
72
  {
73
  "epoch": 5.92,
74
- "eval_loss": 0.017290038987994194,
75
- "eval_runtime": 2.6102,
76
- "eval_samples_per_second": 50.571,
77
- "eval_steps_per_second": 1.149,
78
- "step": 37
79
  },
80
  {
81
- "epoch": 6.4,
82
- "grad_norm": 7.398428440093994,
83
- "learning_rate": 1.8518518518518518e-05,
84
- "loss": 0.0221,
85
- "step": 40
 
86
  },
87
  {
88
- "epoch": 6.88,
89
- "eval_loss": 0.01186411827802658,
90
- "eval_runtime": 2.603,
91
- "eval_samples_per_second": 50.712,
92
- "eval_steps_per_second": 1.153,
93
- "step": 43
94
  },
95
  {
96
- "epoch": 8.0,
97
- "grad_norm": 0.057548072189092636,
98
- "learning_rate": 9.259259259259259e-06,
99
- "loss": 0.0144,
100
- "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  },
102
  {
103
  "epoch": 8.0,
104
- "eval_loss": 0.01627057045698166,
105
- "eval_runtime": 2.5904,
106
- "eval_samples_per_second": 50.957,
107
- "eval_steps_per_second": 1.158,
108
- "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  },
110
  {
111
- "epoch": 8.96,
112
- "eval_loss": 0.010416708886623383,
113
- "eval_runtime": 2.5905,
114
- "eval_samples_per_second": 50.956,
115
- "eval_steps_per_second": 1.158,
116
- "step": 56
117
  },
118
  {
119
- "epoch": 9.6,
120
- "grad_norm": 0.8250289559364319,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  "learning_rate": 0.0,
122
- "loss": 0.0144,
123
- "step": 60
124
  },
125
  {
126
- "epoch": 9.6,
127
- "eval_loss": 0.011465705931186676,
128
- "eval_runtime": 2.9818,
129
- "eval_samples_per_second": 44.269,
130
- "eval_steps_per_second": 1.006,
131
- "step": 60
132
  },
133
  {
134
- "epoch": 9.6,
135
- "step": 60,
136
- "total_flos": 8.884386008218829e+17,
137
- "train_loss": 0.13027287572622298,
138
- "train_runtime": 399.2998,
139
- "train_samples_per_second": 29.552,
140
- "train_steps_per_second": 0.15
141
  }
142
  ],
143
  "logging_steps": 10,
144
- "max_steps": 60,
145
  "num_input_tokens_seen": 0,
146
  "num_train_epochs": 10,
147
  "save_steps": 500,
148
- "total_flos": 8.884386008218829e+17,
149
- "train_batch_size": 48,
150
  "trial_name": null,
151
  "trial_params": null
152
  }
 
1
  {
2
+ "best_metric": 0.000785744923632592,
3
+ "best_model_checkpoint": "swin-base-patch4-window7-224-in22k-Kontur-competition-1.3K/checkpoint-334",
4
+ "epoch": 9.865470852017937,
5
  "eval_steps": 500,
6
+ "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.18,
13
+ "grad_norm": 3.5358009338378906,
14
+ "learning_rate": 9.090909090909091e-06,
15
+ "loss": 0.674,
16
+ "step": 10
 
17
  },
18
  {
19
+ "epoch": 0.36,
20
+ "grad_norm": 3.1279354095458984,
21
+ "learning_rate": 1.8181818181818182e-05,
22
+ "loss": 0.4594,
23
+ "step": 20
24
  },
25
  {
26
+ "epoch": 0.54,
27
+ "grad_norm": 2.273749351501465,
28
+ "learning_rate": 2.7272727272727273e-05,
29
+ "loss": 0.1591,
30
+ "step": 30
 
31
  },
32
  {
33
+ "epoch": 0.72,
34
+ "grad_norm": 16.241113662719727,
35
+ "learning_rate": 3.6363636363636364e-05,
36
+ "loss": 0.0193,
37
+ "step": 40
 
38
  },
39
  {
40
+ "epoch": 0.9,
41
+ "grad_norm": 5.422853469848633,
42
+ "learning_rate": 4.545454545454546e-05,
43
+ "loss": 0.0593,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.99,
48
+ "eval_loss": 0.029350830242037773,
49
+ "eval_runtime": 35.1661,
50
+ "eval_samples_per_second": 44.986,
51
+ "eval_steps_per_second": 0.711,
52
+ "step": 55
53
+ },
54
+ {
55
+ "epoch": 1.08,
56
+ "grad_norm": 6.060277938842773,
57
+ "learning_rate": 4.94949494949495e-05,
58
+ "loss": 0.0433,
59
+ "step": 60
60
+ },
61
+ {
62
+ "epoch": 1.26,
63
+ "grad_norm": 0.930535614490509,
64
+ "learning_rate": 4.848484848484849e-05,
65
+ "loss": 0.018,
66
+ "step": 70
67
+ },
68
+ {
69
+ "epoch": 1.43,
70
+ "grad_norm": 4.823692321777344,
71
+ "learning_rate": 4.7474747474747476e-05,
72
+ "loss": 0.0436,
73
+ "step": 80
74
+ },
75
+ {
76
+ "epoch": 1.61,
77
+ "grad_norm": 1.3981133699417114,
78
+ "learning_rate": 4.6464646464646464e-05,
79
+ "loss": 0.0184,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 1.79,
84
+ "grad_norm": 0.6671331524848938,
85
+ "learning_rate": 4.545454545454546e-05,
86
+ "loss": 0.0201,
87
+ "step": 100
88
+ },
89
+ {
90
+ "epoch": 1.97,
91
+ "grad_norm": 1.8355345726013184,
92
+ "learning_rate": 4.4444444444444447e-05,
93
+ "loss": 0.0098,
94
+ "step": 110
95
+ },
96
+ {
97
+ "epoch": 1.99,
98
+ "eval_loss": 0.03146681934595108,
99
+ "eval_runtime": 28.3147,
100
+ "eval_samples_per_second": 55.872,
101
+ "eval_steps_per_second": 0.883,
102
+ "step": 111
103
+ },
104
+ {
105
+ "epoch": 2.15,
106
+ "grad_norm": 0.02300291508436203,
107
+ "learning_rate": 4.343434343434344e-05,
108
+ "loss": 0.0131,
109
+ "step": 120
110
+ },
111
+ {
112
+ "epoch": 2.33,
113
+ "grad_norm": 7.2036871910095215,
114
+ "learning_rate": 4.242424242424243e-05,
115
+ "loss": 0.0215,
116
+ "step": 130
117
+ },
118
+ {
119
+ "epoch": 2.51,
120
+ "grad_norm": 1.0436112880706787,
121
+ "learning_rate": 4.141414141414142e-05,
122
+ "loss": 0.0224,
123
+ "step": 140
124
+ },
125
+ {
126
+ "epoch": 2.69,
127
+ "grad_norm": 2.077420949935913,
128
+ "learning_rate": 4.0404040404040405e-05,
129
+ "loss": 0.0133,
130
+ "step": 150
131
+ },
132
+ {
133
+ "epoch": 2.87,
134
+ "grad_norm": 0.17279785871505737,
135
+ "learning_rate": 3.939393939393939e-05,
136
+ "loss": 0.0066,
137
+ "step": 160
138
+ },
139
+ {
140
+ "epoch": 3.0,
141
+ "eval_loss": 0.03221270069479942,
142
+ "eval_runtime": 27.4288,
143
+ "eval_samples_per_second": 57.677,
144
+ "eval_steps_per_second": 0.911,
145
+ "step": 167
146
+ },
147
+ {
148
+ "epoch": 3.05,
149
+ "grad_norm": 0.07281704246997833,
150
+ "learning_rate": 3.838383838383838e-05,
151
+ "loss": 0.0034,
152
+ "step": 170
153
+ },
154
+ {
155
+ "epoch": 3.23,
156
+ "grad_norm": 0.06530273705720901,
157
+ "learning_rate": 3.7373737373737376e-05,
158
+ "loss": 0.0038,
159
+ "step": 180
160
+ },
161
+ {
162
+ "epoch": 3.41,
163
+ "grad_norm": 0.2904812693595886,
164
+ "learning_rate": 3.6363636363636364e-05,
165
+ "loss": 0.0045,
166
+ "step": 190
167
+ },
168
+ {
169
+ "epoch": 3.59,
170
+ "grad_norm": 0.01948358118534088,
171
+ "learning_rate": 3.535353535353535e-05,
172
+ "loss": 0.0052,
173
+ "step": 200
174
+ },
175
+ {
176
+ "epoch": 3.77,
177
+ "grad_norm": 0.24817952513694763,
178
+ "learning_rate": 3.434343434343435e-05,
179
+ "loss": 0.004,
180
+ "step": 210
181
+ },
182
+ {
183
+ "epoch": 3.95,
184
+ "grad_norm": 1.0436078310012817,
185
+ "learning_rate": 3.3333333333333335e-05,
186
+ "loss": 0.0179,
187
+ "step": 220
188
  },
189
  {
190
  "epoch": 4.0,
191
+ "eval_loss": 0.006767116021364927,
192
+ "eval_runtime": 27.0156,
193
+ "eval_samples_per_second": 58.559,
194
+ "eval_steps_per_second": 0.925,
195
+ "step": 223
196
  },
197
  {
198
+ "epoch": 4.13,
199
+ "grad_norm": 0.4218502342700958,
200
+ "learning_rate": 3.232323232323233e-05,
201
+ "loss": 0.011,
202
+ "step": 230
203
+ },
204
+ {
205
+ "epoch": 4.3,
206
+ "grad_norm": 1.2248862981796265,
207
+ "learning_rate": 3.131313131313132e-05,
208
+ "loss": 0.0043,
209
+ "step": 240
210
+ },
211
+ {
212
+ "epoch": 4.48,
213
+ "grad_norm": 3.450435161590576,
214
+ "learning_rate": 3.0303030303030306e-05,
215
+ "loss": 0.0115,
216
+ "step": 250
217
+ },
218
+ {
219
+ "epoch": 4.66,
220
+ "grad_norm": 0.10841131955385208,
221
+ "learning_rate": 2.9292929292929294e-05,
222
+ "loss": 0.0189,
223
+ "step": 260
224
+ },
225
+ {
226
+ "epoch": 4.84,
227
+ "grad_norm": 2.503843307495117,
228
+ "learning_rate": 2.8282828282828282e-05,
229
+ "loss": 0.0078,
230
+ "step": 270
231
  },
232
  {
233
+ "epoch": 4.99,
234
+ "eval_loss": 0.0032722819596529007,
235
+ "eval_runtime": 27.2058,
236
+ "eval_samples_per_second": 58.149,
237
+ "eval_steps_per_second": 0.919,
238
+ "step": 278
239
+ },
240
+ {
241
+ "epoch": 5.02,
242
+ "grad_norm": 1.6022576093673706,
243
+ "learning_rate": 2.7272727272727273e-05,
244
+ "loss": 0.0069,
245
+ "step": 280
246
+ },
247
+ {
248
+ "epoch": 5.2,
249
+ "grad_norm": 0.23544766008853912,
250
+ "learning_rate": 2.6262626262626268e-05,
251
+ "loss": 0.0077,
252
+ "step": 290
253
+ },
254
+ {
255
+ "epoch": 5.38,
256
+ "grad_norm": 1.4383817911148071,
257
+ "learning_rate": 2.5252525252525256e-05,
258
+ "loss": 0.002,
259
+ "step": 300
260
+ },
261
+ {
262
+ "epoch": 5.56,
263
+ "grad_norm": 0.105356365442276,
264
+ "learning_rate": 2.4242424242424244e-05,
265
+ "loss": 0.0017,
266
+ "step": 310
267
+ },
268
+ {
269
+ "epoch": 5.74,
270
+ "grad_norm": 0.7489631175994873,
271
+ "learning_rate": 2.3232323232323232e-05,
272
+ "loss": 0.0012,
273
+ "step": 320
274
  },
275
  {
276
  "epoch": 5.92,
277
+ "grad_norm": 1.5391371250152588,
278
+ "learning_rate": 2.2222222222222223e-05,
279
+ "loss": 0.0015,
280
+ "step": 330
 
281
  },
282
  {
283
+ "epoch": 5.99,
284
+ "eval_loss": 0.000785744923632592,
285
+ "eval_runtime": 26.43,
286
+ "eval_samples_per_second": 59.856,
287
+ "eval_steps_per_second": 0.946,
288
+ "step": 334
289
  },
290
  {
291
+ "epoch": 6.1,
292
+ "grad_norm": 0.05875357240438461,
293
+ "learning_rate": 2.1212121212121215e-05,
294
+ "loss": 0.002,
295
+ "step": 340
 
296
  },
297
  {
298
+ "epoch": 6.28,
299
+ "grad_norm": 2.8288419246673584,
300
+ "learning_rate": 2.0202020202020203e-05,
301
+ "loss": 0.0036,
302
+ "step": 350
303
+ },
304
+ {
305
+ "epoch": 6.46,
306
+ "grad_norm": 0.018499189987778664,
307
+ "learning_rate": 1.919191919191919e-05,
308
+ "loss": 0.0007,
309
+ "step": 360
310
+ },
311
+ {
312
+ "epoch": 6.64,
313
+ "grad_norm": 0.00614353409036994,
314
+ "learning_rate": 1.8181818181818182e-05,
315
+ "loss": 0.0036,
316
+ "step": 370
317
+ },
318
+ {
319
+ "epoch": 6.82,
320
+ "grad_norm": 0.01476567517966032,
321
+ "learning_rate": 1.7171717171717173e-05,
322
+ "loss": 0.0041,
323
+ "step": 380
324
+ },
325
+ {
326
+ "epoch": 7.0,
327
+ "grad_norm": 0.16370485723018646,
328
+ "learning_rate": 1.6161616161616165e-05,
329
+ "loss": 0.0017,
330
+ "step": 390
331
+ },
332
+ {
333
+ "epoch": 7.0,
334
+ "eval_loss": 0.007814141921699047,
335
+ "eval_runtime": 26.7049,
336
+ "eval_samples_per_second": 59.24,
337
+ "eval_steps_per_second": 0.936,
338
+ "step": 390
339
+ },
340
+ {
341
+ "epoch": 7.17,
342
+ "grad_norm": 0.09164857864379883,
343
+ "learning_rate": 1.5151515151515153e-05,
344
+ "loss": 0.0004,
345
+ "step": 400
346
+ },
347
+ {
348
+ "epoch": 7.35,
349
+ "grad_norm": 0.19655057787895203,
350
+ "learning_rate": 1.4141414141414141e-05,
351
+ "loss": 0.0005,
352
+ "step": 410
353
+ },
354
+ {
355
+ "epoch": 7.53,
356
+ "grad_norm": 0.0026813277509063482,
357
+ "learning_rate": 1.3131313131313134e-05,
358
+ "loss": 0.0003,
359
+ "step": 420
360
+ },
361
+ {
362
+ "epoch": 7.71,
363
+ "grad_norm": 0.7142350673675537,
364
+ "learning_rate": 1.2121212121212122e-05,
365
+ "loss": 0.0005,
366
+ "step": 430
367
+ },
368
+ {
369
+ "epoch": 7.89,
370
+ "grad_norm": 0.004237661603838205,
371
+ "learning_rate": 1.1111111111111112e-05,
372
+ "loss": 0.0008,
373
+ "step": 440
374
  },
375
  {
376
  "epoch": 8.0,
377
+ "eval_loss": 0.002696491777896881,
378
+ "eval_runtime": 26.1158,
379
+ "eval_samples_per_second": 60.576,
380
+ "eval_steps_per_second": 0.957,
381
+ "step": 446
382
+ },
383
+ {
384
+ "epoch": 8.07,
385
+ "grad_norm": 0.010493074543774128,
386
+ "learning_rate": 1.0101010101010101e-05,
387
+ "loss": 0.0001,
388
+ "step": 450
389
+ },
390
+ {
391
+ "epoch": 8.25,
392
+ "grad_norm": 0.015726672485470772,
393
+ "learning_rate": 9.090909090909091e-06,
394
+ "loss": 0.0003,
395
+ "step": 460
396
+ },
397
+ {
398
+ "epoch": 8.43,
399
+ "grad_norm": 2.051889657974243,
400
+ "learning_rate": 8.080808080808082e-06,
401
+ "loss": 0.0023,
402
+ "step": 470
403
+ },
404
+ {
405
+ "epoch": 8.61,
406
+ "grad_norm": 0.001357908477075398,
407
+ "learning_rate": 7.0707070707070704e-06,
408
+ "loss": 0.0018,
409
+ "step": 480
410
  },
411
  {
412
+ "epoch": 8.79,
413
+ "grad_norm": 0.0037935995496809483,
414
+ "learning_rate": 6.060606060606061e-06,
415
+ "loss": 0.0001,
416
+ "step": 490
 
417
  },
418
  {
419
+ "epoch": 8.97,
420
+ "grad_norm": 0.014398468658328056,
421
+ "learning_rate": 5.050505050505051e-06,
422
+ "loss": 0.0019,
423
+ "step": 500
424
+ },
425
+ {
426
+ "epoch": 8.99,
427
+ "eval_loss": 0.0011395993642508984,
428
+ "eval_runtime": 26.5634,
429
+ "eval_samples_per_second": 59.556,
430
+ "eval_steps_per_second": 0.941,
431
+ "step": 501
432
+ },
433
+ {
434
+ "epoch": 9.15,
435
+ "grad_norm": 0.04990648478269577,
436
+ "learning_rate": 4.040404040404041e-06,
437
+ "loss": 0.0008,
438
+ "step": 510
439
+ },
440
+ {
441
+ "epoch": 9.33,
442
+ "grad_norm": 0.009485547430813313,
443
+ "learning_rate": 3.0303030303030305e-06,
444
+ "loss": 0.0024,
445
+ "step": 520
446
+ },
447
+ {
448
+ "epoch": 9.51,
449
+ "grad_norm": 0.007252832874655724,
450
+ "learning_rate": 2.0202020202020206e-06,
451
+ "loss": 0.0007,
452
+ "step": 530
453
+ },
454
+ {
455
+ "epoch": 9.69,
456
+ "grad_norm": 0.007266469299793243,
457
+ "learning_rate": 1.0101010101010103e-06,
458
+ "loss": 0.0003,
459
+ "step": 540
460
+ },
461
+ {
462
+ "epoch": 9.87,
463
+ "grad_norm": 0.01849411241710186,
464
  "learning_rate": 0.0,
465
+ "loss": 0.0014,
466
+ "step": 550
467
  },
468
  {
469
+ "epoch": 9.87,
470
+ "eval_loss": 0.0035754255950450897,
471
+ "eval_runtime": 37.9782,
472
+ "eval_samples_per_second": 41.655,
473
+ "eval_steps_per_second": 0.658,
474
+ "step": 550
475
  },
476
  {
477
+ "epoch": 9.87,
478
+ "step": 550,
479
+ "total_flos": 1.100143961239951e+19,
480
+ "train_loss": 0.03171633874193173,
481
+ "train_runtime": 4576.6164,
482
+ "train_samples_per_second": 31.093,
483
+ "train_steps_per_second": 0.12
484
  }
485
  ],
486
  "logging_steps": 10,
487
+ "max_steps": 550,
488
  "num_input_tokens_seen": 0,
489
  "num_train_epochs": 10,
490
  "save_steps": 500,
491
+ "total_flos": 1.100143961239951e+19,
492
+ "train_batch_size": 64,
493
  "trial_name": null,
494
  "trial_params": null
495
  }