indiejoseph commited on
Commit
e469930
1 Parent(s): 74c55f8

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +132 -12
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 0.3749944605037362,
4
- "train_runtime": 1805.8732,
5
  "train_samples": 128272,
6
- "train_samples_per_second": 355.152,
7
- "train_steps_per_second": 2.774
8
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "train_loss": 0.34533324811340976,
4
+ "train_runtime": 2399.0328,
5
  "train_samples": 128272,
6
+ "train_samples_per_second": 374.277,
7
+ "train_steps_per_second": 2.924
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 0.3749944605037362,
4
- "train_runtime": 1805.8732,
5
  "train_samples": 128272,
6
- "train_samples_per_second": 355.152,
7
- "train_steps_per_second": 2.774
8
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "train_loss": 0.34533324811340976,
4
+ "train_runtime": 2399.0328,
5
  "train_samples": 128272,
6
+ "train_samples_per_second": 374.277,
7
+ "train_steps_per_second": 2.924
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.999750530123488,
5
  "eval_steps": 500,
6
- "global_step": 5010,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -309,20 +309,140 @@
309
  "step": 5000
310
  },
311
  {
312
- "epoch": 5.0,
313
- "step": 5010,
314
- "total_flos": 8.106812063308723e+16,
315
- "train_loss": 0.3749944605037362,
316
- "train_runtime": 1805.8732,
317
- "train_samples_per_second": 355.152,
318
- "train_steps_per_second": 2.774
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  }
320
  ],
321
  "logging_steps": 100,
322
- "max_steps": 5010,
323
- "num_train_epochs": 5,
324
  "save_steps": 500,
325
- "total_flos": 8.106812063308723e+16,
326
  "trial_name": null,
327
  "trial_params": null
328
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.999625795185231,
5
  "eval_steps": 500,
6
+ "global_step": 7014,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
309
  "step": 5000
310
  },
311
  {
312
+ "epoch": 5.09,
313
+ "learning_rate": 1.364414029084688e-05,
314
+ "loss": 1.2276,
315
+ "step": 5100
316
+ },
317
+ {
318
+ "epoch": 5.19,
319
+ "learning_rate": 1.2931280296549758e-05,
320
+ "loss": 1.2064,
321
+ "step": 5200
322
+ },
323
+ {
324
+ "epoch": 5.29,
325
+ "learning_rate": 1.2218420302252638e-05,
326
+ "loss": 1.2102,
327
+ "step": 5300
328
+ },
329
+ {
330
+ "epoch": 5.39,
331
+ "learning_rate": 1.1505560307955517e-05,
332
+ "loss": 1.2095,
333
+ "step": 5400
334
+ },
335
+ {
336
+ "epoch": 5.49,
337
+ "learning_rate": 1.0792700313658398e-05,
338
+ "loss": 1.2239,
339
+ "step": 5500
340
+ },
341
+ {
342
+ "epoch": 5.59,
343
+ "learning_rate": 1.0079840319361278e-05,
344
+ "loss": 1.216,
345
+ "step": 5600
346
+ },
347
+ {
348
+ "epoch": 5.69,
349
+ "learning_rate": 9.366980325064158e-06,
350
+ "loss": 1.2206,
351
+ "step": 5700
352
+ },
353
+ {
354
+ "epoch": 5.79,
355
+ "learning_rate": 8.654120330767037e-06,
356
+ "loss": 1.2133,
357
+ "step": 5800
358
+ },
359
+ {
360
+ "epoch": 5.89,
361
+ "learning_rate": 7.941260336469917e-06,
362
+ "loss": 1.2059,
363
+ "step": 5900
364
+ },
365
+ {
366
+ "epoch": 5.99,
367
+ "learning_rate": 7.228400342172797e-06,
368
+ "loss": 1.206,
369
+ "step": 6000
370
+ },
371
+ {
372
+ "epoch": 6.09,
373
+ "learning_rate": 6.5155403478756775e-06,
374
+ "loss": 1.1976,
375
+ "step": 6100
376
+ },
377
+ {
378
+ "epoch": 6.19,
379
+ "learning_rate": 5.802680353578557e-06,
380
+ "loss": 1.1966,
381
+ "step": 6200
382
+ },
383
+ {
384
+ "epoch": 6.29,
385
+ "learning_rate": 5.0898203592814375e-06,
386
+ "loss": 1.1992,
387
+ "step": 6300
388
+ },
389
+ {
390
+ "epoch": 6.39,
391
+ "learning_rate": 4.376960364984317e-06,
392
+ "loss": 1.1946,
393
+ "step": 6400
394
+ },
395
+ {
396
+ "epoch": 6.49,
397
+ "learning_rate": 3.664100370687197e-06,
398
+ "loss": 1.1879,
399
+ "step": 6500
400
+ },
401
+ {
402
+ "epoch": 6.59,
403
+ "learning_rate": 2.951240376390077e-06,
404
+ "loss": 1.1863,
405
+ "step": 6600
406
+ },
407
+ {
408
+ "epoch": 6.69,
409
+ "learning_rate": 2.238380382092957e-06,
410
+ "loss": 1.182,
411
+ "step": 6700
412
+ },
413
+ {
414
+ "epoch": 6.79,
415
+ "learning_rate": 1.525520387795837e-06,
416
+ "loss": 1.1914,
417
+ "step": 6800
418
+ },
419
+ {
420
+ "epoch": 6.89,
421
+ "learning_rate": 8.12660393498717e-07,
422
+ "loss": 1.1892,
423
+ "step": 6900
424
+ },
425
+ {
426
+ "epoch": 6.99,
427
+ "learning_rate": 9.98003992015968e-08,
428
+ "loss": 1.1943,
429
+ "step": 7000
430
+ },
431
+ {
432
+ "epoch": 7.0,
433
+ "step": 7014,
434
+ "total_flos": 1.1685618503900851e+17,
435
+ "train_loss": 0.34533324811340976,
436
+ "train_runtime": 2399.0328,
437
+ "train_samples_per_second": 374.277,
438
+ "train_steps_per_second": 2.924
439
  }
440
  ],
441
  "logging_steps": 100,
442
+ "max_steps": 7014,
443
+ "num_train_epochs": 7,
444
  "save_steps": 500,
445
+ "total_flos": 1.1685618503900851e+17,
446
  "trial_name": null,
447
  "trial_params": null
448
  }