Magneto commited on
Commit
4744548
1 Parent(s): 2912f09

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. adapter_model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +68 -428
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f16612b3558a994f7f9fdb2eda772f0d7ee92b8ab65d308d952c16e2cf5882d6
3
  size 93378688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9508f3e5ab410c13b37ce777d7f03968cd1ef912ad7468af91cddcd618845ed
3
  size 93378688
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33e4753d8e967daf58be3c4586c14128aaf5cf8b18762c58bbde37b302042703
3
  size 187031330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe0837d3b546e57addf74036233fb37d0d61eda99a01741c2eb7c08f60db9fc2
3
  size 187031330
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb920a55454edc05cad446eb986eb35a20c5c33567a5a093ac93baf95f6aec7c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a18fdd216403847cd41e3eed155b254200482f615f1b6fea997a601e92ff90f0
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25ab4523fd7262af4b9e1ebf15a79d63c966e86fc99ff91e2840498756cca05b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c17ddebb9f7da522ed1db7b26ff2dc5c2f0ec86d553faec156dfe4119486e13d
3
  size 1064
trainer_state.json CHANGED
@@ -1,537 +1,177 @@
1
  {
2
- "best_metric": 0.8183558583259583,
3
- "best_model_checkpoint": "/content/Nasa_Mars_Model_Outputs/checkpoint-170",
4
- "epoch": 0.1591451631237922,
5
  "eval_steps": 10,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.004547004660679777,
13
- "grad_norm": 8.844450950622559,
14
  "learning_rate": 2e-05,
15
- "loss": 0.5029,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.004547004660679777,
20
- "eval_loss": 0.852020800113678,
21
- "eval_runtime": 20.6332,
22
- "eval_samples_per_second": 2.423,
23
- "eval_steps_per_second": 2.423,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.009094009321359554,
28
- "grad_norm": 6.392278671264648,
29
  "learning_rate": 4e-05,
30
- "loss": 0.4208,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 0.009094009321359554,
35
- "eval_loss": 0.8326458930969238,
36
- "eval_runtime": 20.8468,
37
- "eval_samples_per_second": 2.398,
38
- "eval_steps_per_second": 2.398,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.013641013982039332,
43
- "grad_norm": 5.979872703552246,
44
  "learning_rate": 6e-05,
45
- "loss": 0.3174,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 0.013641013982039332,
50
- "eval_loss": 0.9364193677902222,
51
- "eval_runtime": 20.7537,
52
- "eval_samples_per_second": 2.409,
53
- "eval_steps_per_second": 2.409,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 0.018188018642719107,
58
- "grad_norm": 8.079290390014648,
59
  "learning_rate": 8e-05,
60
- "loss": 0.2436,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 0.018188018642719107,
65
- "eval_loss": 0.9858406782150269,
66
- "eval_runtime": 20.7434,
67
- "eval_samples_per_second": 2.41,
68
- "eval_steps_per_second": 2.41,
69
  "step": 40
70
  },
71
  {
72
  "epoch": 0.022735023303398886,
73
- "grad_norm": 7.087987422943115,
74
  "learning_rate": 0.0001,
75
- "loss": 0.2069,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 0.022735023303398886,
80
- "eval_loss": 0.9912980794906616,
81
- "eval_runtime": 20.7587,
82
- "eval_samples_per_second": 2.409,
83
- "eval_steps_per_second": 2.409,
84
  "step": 50
85
  },
86
  {
87
  "epoch": 0.027282027964078664,
88
- "grad_norm": 7.0141496658325195,
89
  "learning_rate": 9.977000919963202e-05,
90
- "loss": 0.2171,
91
  "step": 60
92
  },
93
  {
94
  "epoch": 0.027282027964078664,
95
- "eval_loss": 1.029310941696167,
96
- "eval_runtime": 20.7996,
97
  "eval_samples_per_second": 2.404,
98
  "eval_steps_per_second": 2.404,
99
  "step": 60
100
  },
101
  {
102
  "epoch": 0.03182903262475844,
103
- "grad_norm": 7.3678998947143555,
104
  "learning_rate": 9.954001839926403e-05,
105
- "loss": 0.2756,
106
  "step": 70
107
  },
108
  {
109
  "epoch": 0.03182903262475844,
110
- "eval_loss": 0.9257596731185913,
111
- "eval_runtime": 20.8032,
112
- "eval_samples_per_second": 2.403,
113
- "eval_steps_per_second": 2.403,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.036376037285438215,
118
- "grad_norm": 8.662162780761719,
119
  "learning_rate": 9.931002759889605e-05,
120
- "loss": 0.3853,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.036376037285438215,
125
- "eval_loss": 0.9099023342132568,
126
- "eval_runtime": 20.8394,
127
- "eval_samples_per_second": 2.399,
128
- "eval_steps_per_second": 2.399,
129
  "step": 80
130
  },
131
  {
132
  "epoch": 0.040923041946118,
133
- "grad_norm": 8.646831512451172,
134
  "learning_rate": 9.908003679852806e-05,
135
- "loss": 0.4754,
136
  "step": 90
137
  },
138
  {
139
  "epoch": 0.040923041946118,
140
- "eval_loss": 0.8751680254936218,
141
- "eval_runtime": 20.8501,
142
- "eval_samples_per_second": 2.398,
143
- "eval_steps_per_second": 2.398,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 0.04547004660679777,
148
- "grad_norm": 5.3645501136779785,
149
  "learning_rate": 9.885004599816008e-05,
150
- "loss": 0.3919,
151
  "step": 100
152
  },
153
  {
154
  "epoch": 0.04547004660679777,
155
- "eval_loss": 0.8975805640220642,
156
- "eval_runtime": 20.7625,
157
- "eval_samples_per_second": 2.408,
158
- "eval_steps_per_second": 2.408,
159
  "step": 100
160
  },
161
  {
162
  "epoch": 0.05001705126747755,
163
- "grad_norm": 7.332831859588623,
164
  "learning_rate": 9.86200551977921e-05,
165
- "loss": 0.5172,
166
  "step": 110
167
  },
168
  {
169
  "epoch": 0.05001705126747755,
170
- "eval_loss": 0.8908578753471375,
171
- "eval_runtime": 20.8039,
172
- "eval_samples_per_second": 2.403,
173
- "eval_steps_per_second": 2.403,
174
- "step": 110
175
- },
176
- {
177
- "epoch": 0.05456405592815733,
178
- "grad_norm": 5.561324119567871,
179
- "learning_rate": 9.83900643974241e-05,
180
- "loss": 0.4599,
181
- "step": 120
182
- },
183
- {
184
- "epoch": 0.05456405592815733,
185
- "eval_loss": 0.8581109046936035,
186
- "eval_runtime": 20.8113,
187
- "eval_samples_per_second": 2.403,
188
- "eval_steps_per_second": 2.403,
189
- "step": 120
190
- },
191
- {
192
- "epoch": 0.059111060588837104,
193
- "grad_norm": 7.407668113708496,
194
- "learning_rate": 9.816007359705612e-05,
195
- "loss": 0.5029,
196
- "step": 130
197
- },
198
- {
199
- "epoch": 0.059111060588837104,
200
- "eval_loss": 0.8726012706756592,
201
- "eval_runtime": 20.8342,
202
  "eval_samples_per_second": 2.4,
203
  "eval_steps_per_second": 2.4,
204
- "step": 130
205
- },
206
- {
207
- "epoch": 0.06365806524951688,
208
- "grad_norm": 5.486892223358154,
209
- "learning_rate": 9.793008279668814e-05,
210
- "loss": 0.4758,
211
- "step": 140
212
- },
213
- {
214
- "epoch": 0.06365806524951688,
215
- "eval_loss": 0.8502522110939026,
216
- "eval_runtime": 20.7835,
217
- "eval_samples_per_second": 2.406,
218
- "eval_steps_per_second": 2.406,
219
- "step": 140
220
- },
221
- {
222
- "epoch": 0.06820506991019666,
223
- "grad_norm": 7.221977710723877,
224
- "learning_rate": 9.770009199632015e-05,
225
- "loss": 0.6792,
226
- "step": 150
227
- },
228
- {
229
- "epoch": 0.06820506991019666,
230
- "eval_loss": 0.827602207660675,
231
- "eval_runtime": 20.7446,
232
- "eval_samples_per_second": 2.41,
233
- "eval_steps_per_second": 2.41,
234
- "step": 150
235
- },
236
- {
237
- "epoch": 0.07275207457087643,
238
- "grad_norm": 5.567179203033447,
239
- "learning_rate": 9.747010119595217e-05,
240
- "loss": 0.6319,
241
- "step": 160
242
- },
243
- {
244
- "epoch": 0.07275207457087643,
245
- "eval_loss": 0.8225394487380981,
246
- "eval_runtime": 20.7953,
247
- "eval_samples_per_second": 2.404,
248
- "eval_steps_per_second": 2.404,
249
- "step": 160
250
- },
251
- {
252
- "epoch": 0.07729907923155621,
253
- "grad_norm": 6.063055992126465,
254
- "learning_rate": 9.724011039558418e-05,
255
- "loss": 0.7255,
256
- "step": 170
257
- },
258
- {
259
- "epoch": 0.07729907923155621,
260
- "eval_loss": 0.8183558583259583,
261
- "eval_runtime": 20.804,
262
- "eval_samples_per_second": 2.403,
263
- "eval_steps_per_second": 2.403,
264
- "step": 170
265
- },
266
- {
267
- "epoch": 0.081846083892236,
268
- "grad_norm": 8.263747215270996,
269
- "learning_rate": 9.70101195952162e-05,
270
- "loss": 0.6679,
271
- "step": 180
272
- },
273
- {
274
- "epoch": 0.081846083892236,
275
- "eval_loss": 0.8275816440582275,
276
- "eval_runtime": 20.8116,
277
- "eval_samples_per_second": 2.403,
278
- "eval_steps_per_second": 2.403,
279
- "step": 180
280
- },
281
- {
282
- "epoch": 0.08639308855291576,
283
- "grad_norm": 8.163738250732422,
284
- "learning_rate": 9.678012879484822e-05,
285
- "loss": 0.6618,
286
- "step": 190
287
- },
288
- {
289
- "epoch": 0.08639308855291576,
290
- "eval_loss": 0.8254545331001282,
291
- "eval_runtime": 20.8251,
292
- "eval_samples_per_second": 2.401,
293
- "eval_steps_per_second": 2.401,
294
- "step": 190
295
- },
296
- {
297
- "epoch": 0.09094009321359554,
298
- "grad_norm": 10.39909839630127,
299
- "learning_rate": 9.655013799448022e-05,
300
- "loss": 0.7477,
301
- "step": 200
302
- },
303
- {
304
- "epoch": 0.09094009321359554,
305
- "eval_loss": 0.8232752084732056,
306
- "eval_runtime": 20.7836,
307
- "eval_samples_per_second": 2.406,
308
- "eval_steps_per_second": 2.406,
309
- "step": 200
310
- },
311
- {
312
- "epoch": 0.09548709787427533,
313
- "grad_norm": 5.135624885559082,
314
- "learning_rate": 9.632014719411224e-05,
315
- "loss": 0.6157,
316
- "step": 210
317
- },
318
- {
319
- "epoch": 0.09548709787427533,
320
- "eval_loss": 0.8450194597244263,
321
- "eval_runtime": 20.7915,
322
- "eval_samples_per_second": 2.405,
323
- "eval_steps_per_second": 2.405,
324
- "step": 210
325
- },
326
- {
327
- "epoch": 0.1000341025349551,
328
- "grad_norm": 5.748319149017334,
329
- "learning_rate": 9.609015639374425e-05,
330
- "loss": 0.6228,
331
- "step": 220
332
- },
333
- {
334
- "epoch": 0.1000341025349551,
335
- "eval_loss": 0.8414835333824158,
336
- "eval_runtime": 20.8517,
337
- "eval_samples_per_second": 2.398,
338
- "eval_steps_per_second": 2.398,
339
- "step": 220
340
- },
341
- {
342
- "epoch": 0.10458110719563488,
343
- "grad_norm": 6.394444942474365,
344
- "learning_rate": 9.586016559337627e-05,
345
- "loss": 0.846,
346
- "step": 230
347
- },
348
- {
349
- "epoch": 0.10458110719563488,
350
- "eval_loss": 0.8268595933914185,
351
- "eval_runtime": 20.7853,
352
- "eval_samples_per_second": 2.406,
353
- "eval_steps_per_second": 2.406,
354
- "step": 230
355
- },
356
- {
357
- "epoch": 0.10912811185631466,
358
- "grad_norm": 5.815967082977295,
359
- "learning_rate": 9.563017479300829e-05,
360
- "loss": 0.7647,
361
- "step": 240
362
- },
363
- {
364
- "epoch": 0.10912811185631466,
365
- "eval_loss": 0.8257486820220947,
366
- "eval_runtime": 20.7865,
367
- "eval_samples_per_second": 2.405,
368
- "eval_steps_per_second": 2.405,
369
- "step": 240
370
- },
371
- {
372
- "epoch": 0.11367511651699443,
373
- "grad_norm": 7.9722161293029785,
374
- "learning_rate": 9.54001839926403e-05,
375
- "loss": 0.6884,
376
- "step": 250
377
- },
378
- {
379
- "epoch": 0.11367511651699443,
380
- "eval_loss": 0.8330263495445251,
381
- "eval_runtime": 20.7919,
382
- "eval_samples_per_second": 2.405,
383
- "eval_steps_per_second": 2.405,
384
- "step": 250
385
- },
386
- {
387
- "epoch": 0.11822212117767421,
388
- "grad_norm": 6.1814799308776855,
389
- "learning_rate": 9.517019319227232e-05,
390
- "loss": 0.7325,
391
- "step": 260
392
- },
393
- {
394
- "epoch": 0.11822212117767421,
395
- "eval_loss": 0.8296122550964355,
396
- "eval_runtime": 20.7856,
397
- "eval_samples_per_second": 2.406,
398
- "eval_steps_per_second": 2.406,
399
- "step": 260
400
- },
401
- {
402
- "epoch": 0.12276912583835399,
403
- "grad_norm": 14.889198303222656,
404
- "learning_rate": 9.494020239190432e-05,
405
- "loss": 0.6521,
406
- "step": 270
407
- },
408
- {
409
- "epoch": 0.12276912583835399,
410
- "eval_loss": 0.8349407911300659,
411
- "eval_runtime": 20.7358,
412
- "eval_samples_per_second": 2.411,
413
- "eval_steps_per_second": 2.411,
414
- "step": 270
415
- },
416
- {
417
- "epoch": 0.12731613049903376,
418
- "grad_norm": 9.311092376708984,
419
- "learning_rate": 9.471021159153634e-05,
420
- "loss": 0.6413,
421
- "step": 280
422
- },
423
- {
424
- "epoch": 0.12731613049903376,
425
- "eval_loss": 0.8341807723045349,
426
- "eval_runtime": 20.7945,
427
- "eval_samples_per_second": 2.404,
428
- "eval_steps_per_second": 2.404,
429
- "step": 280
430
- },
431
- {
432
- "epoch": 0.13186313515971354,
433
- "grad_norm": 2.8647501468658447,
434
- "learning_rate": 9.448022079116836e-05,
435
- "loss": 0.6104,
436
- "step": 290
437
- },
438
- {
439
- "epoch": 0.13186313515971354,
440
- "eval_loss": 0.8275405764579773,
441
- "eval_runtime": 20.7773,
442
- "eval_samples_per_second": 2.406,
443
- "eval_steps_per_second": 2.406,
444
- "step": 290
445
- },
446
- {
447
- "epoch": 0.13641013982039332,
448
- "grad_norm": 6.136713027954102,
449
- "learning_rate": 9.425022999080037e-05,
450
- "loss": 0.5325,
451
- "step": 300
452
- },
453
- {
454
- "epoch": 0.13641013982039332,
455
- "eval_loss": 0.8468616008758545,
456
- "eval_runtime": 20.7933,
457
- "eval_samples_per_second": 2.405,
458
- "eval_steps_per_second": 2.405,
459
- "step": 300
460
- },
461
- {
462
- "epoch": 0.1409571444810731,
463
- "grad_norm": 9.0726900100708,
464
- "learning_rate": 9.402023919043239e-05,
465
- "loss": 0.6315,
466
- "step": 310
467
- },
468
- {
469
- "epoch": 0.1409571444810731,
470
- "eval_loss": 0.8461413383483887,
471
- "eval_runtime": 20.7716,
472
- "eval_samples_per_second": 2.407,
473
- "eval_steps_per_second": 2.407,
474
- "step": 310
475
- },
476
- {
477
- "epoch": 0.14550414914175286,
478
- "grad_norm": 7.699874401092529,
479
- "learning_rate": 9.379024839006441e-05,
480
- "loss": 0.6473,
481
- "step": 320
482
- },
483
- {
484
- "epoch": 0.14550414914175286,
485
- "eval_loss": 0.8356756567955017,
486
- "eval_runtime": 20.7971,
487
- "eval_samples_per_second": 2.404,
488
- "eval_steps_per_second": 2.404,
489
- "step": 320
490
- },
491
- {
492
- "epoch": 0.15005115380243264,
493
- "grad_norm": 7.28955602645874,
494
- "learning_rate": 9.356025758969642e-05,
495
- "loss": 0.5936,
496
- "step": 330
497
- },
498
- {
499
- "epoch": 0.15005115380243264,
500
- "eval_loss": 0.8357100486755371,
501
- "eval_runtime": 20.7802,
502
- "eval_samples_per_second": 2.406,
503
- "eval_steps_per_second": 2.406,
504
- "step": 330
505
- },
506
- {
507
- "epoch": 0.15459815846311242,
508
- "grad_norm": 4.673826217651367,
509
- "learning_rate": 9.333026678932844e-05,
510
- "loss": 0.5555,
511
- "step": 340
512
- },
513
- {
514
- "epoch": 0.15459815846311242,
515
- "eval_loss": 0.8365601897239685,
516
- "eval_runtime": 20.7353,
517
- "eval_samples_per_second": 2.411,
518
- "eval_steps_per_second": 2.411,
519
- "step": 340
520
- },
521
- {
522
- "epoch": 0.1591451631237922,
523
- "grad_norm": 5.104877948760986,
524
- "learning_rate": 9.310027598896044e-05,
525
- "loss": 0.5903,
526
- "step": 350
527
- },
528
- {
529
- "epoch": 0.1591451631237922,
530
- "eval_loss": 0.8396795392036438,
531
- "eval_runtime": 20.7413,
532
- "eval_samples_per_second": 2.411,
533
- "eval_steps_per_second": 2.411,
534
- "step": 350
535
  }
536
  ],
537
  "logging_steps": 10,
@@ -551,7 +191,7 @@
551
  "attributes": {}
552
  }
553
  },
554
- "total_flos": 2.854748142605779e+16,
555
  "train_batch_size": 1,
556
  "trial_name": null,
557
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.8372765183448792,
3
+ "best_model_checkpoint": "/content/Nasa_Mars_Model_Outputs/checkpoint-10",
4
+ "epoch": 0.05001705126747755,
5
  "eval_steps": 10,
6
+ "global_step": 110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.004547004660679777,
13
+ "grad_norm": 8.836596488952637,
14
  "learning_rate": 2e-05,
15
+ "loss": 0.6073,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.004547004660679777,
20
+ "eval_loss": 0.8372765183448792,
21
+ "eval_runtime": 20.6797,
22
+ "eval_samples_per_second": 2.418,
23
+ "eval_steps_per_second": 2.418,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 0.009094009321359554,
28
+ "grad_norm": 9.525543212890625,
29
  "learning_rate": 4e-05,
30
+ "loss": 0.5308,
31
  "step": 20
32
  },
33
  {
34
  "epoch": 0.009094009321359554,
35
+ "eval_loss": 0.8388195037841797,
36
+ "eval_runtime": 20.8936,
37
+ "eval_samples_per_second": 2.393,
38
+ "eval_steps_per_second": 2.393,
39
  "step": 20
40
  },
41
  {
42
  "epoch": 0.013641013982039332,
43
+ "grad_norm": 8.243130683898926,
44
  "learning_rate": 6e-05,
45
+ "loss": 0.4263,
46
  "step": 30
47
  },
48
  {
49
  "epoch": 0.013641013982039332,
50
+ "eval_loss": 0.9063456654548645,
51
+ "eval_runtime": 20.8341,
52
+ "eval_samples_per_second": 2.4,
53
+ "eval_steps_per_second": 2.4,
54
  "step": 30
55
  },
56
  {
57
  "epoch": 0.018188018642719107,
58
+ "grad_norm": 5.157824993133545,
59
  "learning_rate": 8e-05,
60
+ "loss": 0.3455,
61
  "step": 40
62
  },
63
  {
64
  "epoch": 0.018188018642719107,
65
+ "eval_loss": 0.9459993839263916,
66
+ "eval_runtime": 20.8498,
67
+ "eval_samples_per_second": 2.398,
68
+ "eval_steps_per_second": 2.398,
69
  "step": 40
70
  },
71
  {
72
  "epoch": 0.022735023303398886,
73
+ "grad_norm": 9.51492977142334,
74
  "learning_rate": 0.0001,
75
+ "loss": 0.317,
76
  "step": 50
77
  },
78
  {
79
  "epoch": 0.022735023303398886,
80
+ "eval_loss": 0.9320575594902039,
81
+ "eval_runtime": 20.8158,
82
+ "eval_samples_per_second": 2.402,
83
+ "eval_steps_per_second": 2.402,
84
  "step": 50
85
  },
86
  {
87
  "epoch": 0.027282027964078664,
88
+ "grad_norm": 8.233291625976562,
89
  "learning_rate": 9.977000919963202e-05,
90
+ "loss": 0.3006,
91
  "step": 60
92
  },
93
  {
94
  "epoch": 0.027282027964078664,
95
+ "eval_loss": 0.9950161576271057,
96
+ "eval_runtime": 20.7989,
97
  "eval_samples_per_second": 2.404,
98
  "eval_steps_per_second": 2.404,
99
  "step": 60
100
  },
101
  {
102
  "epoch": 0.03182903262475844,
103
+ "grad_norm": 11.2651948928833,
104
  "learning_rate": 9.954001839926403e-05,
105
+ "loss": 0.3331,
106
  "step": 70
107
  },
108
  {
109
  "epoch": 0.03182903262475844,
110
+ "eval_loss": 0.9437124729156494,
111
+ "eval_runtime": 20.8698,
112
+ "eval_samples_per_second": 2.396,
113
+ "eval_steps_per_second": 2.396,
114
  "step": 70
115
  },
116
  {
117
  "epoch": 0.036376037285438215,
118
+ "grad_norm": 7.479197025299072,
119
  "learning_rate": 9.931002759889605e-05,
120
+ "loss": 0.2957,
121
  "step": 80
122
  },
123
  {
124
  "epoch": 0.036376037285438215,
125
+ "eval_loss": 0.9456791877746582,
126
+ "eval_runtime": 20.8644,
127
+ "eval_samples_per_second": 2.396,
128
+ "eval_steps_per_second": 2.396,
129
  "step": 80
130
  },
131
  {
132
  "epoch": 0.040923041946118,
133
+ "grad_norm": 8.659111976623535,
134
  "learning_rate": 9.908003679852806e-05,
135
+ "loss": 0.3325,
136
  "step": 90
137
  },
138
  {
139
  "epoch": 0.040923041946118,
140
+ "eval_loss": 0.9326030015945435,
141
+ "eval_runtime": 20.8374,
142
+ "eval_samples_per_second": 2.4,
143
+ "eval_steps_per_second": 2.4,
144
  "step": 90
145
  },
146
  {
147
  "epoch": 0.04547004660679777,
148
+ "grad_norm": 4.722876071929932,
149
  "learning_rate": 9.885004599816008e-05,
150
+ "loss": 0.2857,
151
  "step": 100
152
  },
153
  {
154
  "epoch": 0.04547004660679777,
155
+ "eval_loss": 0.9288083910942078,
156
+ "eval_runtime": 20.844,
157
+ "eval_samples_per_second": 2.399,
158
+ "eval_steps_per_second": 2.399,
159
  "step": 100
160
  },
161
  {
162
  "epoch": 0.05001705126747755,
163
+ "grad_norm": 6.449366569519043,
164
  "learning_rate": 9.86200551977921e-05,
165
+ "loss": 0.3643,
166
  "step": 110
167
  },
168
  {
169
  "epoch": 0.05001705126747755,
170
+ "eval_loss": 0.9256532192230225,
171
+ "eval_runtime": 20.8316,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  "eval_samples_per_second": 2.4,
173
  "eval_steps_per_second": 2.4,
174
+ "step": 110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  }
176
  ],
177
  "logging_steps": 10,
 
191
  "attributes": {}
192
  }
193
  },
194
+ "total_flos": 9148674554486016.0,
195
  "train_batch_size": 1,
196
  "trial_name": null,
197
  "trial_params": null