augustocsc commited on
Commit
ffd4726
1 Parent(s): 7e1da94

Training in progress, step 9000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa57558f82a576e3b94f619c220c003782fbe5c1533390c8a365306af6e14297
3
  size 497780352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ca19d4a9c3a66cc062755613586eb9666cc8033031890aa61853fd9c4cf2041
3
  size 497780352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74c617b957b1b0066ed0cce4639b6fb3a6abb26677afc855950df159d4a6291f
3
  size 995654586
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfda32c2df088cee5c680d79751e871a539eaf0989bb84c9f148a0e06f01717c
3
  size 995654586
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da7a55020780f531c20dc8877ede6e098965b594b1e88a01dc9f722d9dee4ac7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1be58c6095331490031ab4484c5ffe4bf36bc8c2ec9130e8e665c8f156e2c90
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1f8795cd71b0ad726fc421a5a3758672326d602a89558c9fec480df511fbfa0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac32590bb4b92b786de3b7715c46f1c43487e1605b133d8603403d8e65638b0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.0,
5
  "eval_steps": 200,
6
- "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -439,6 +439,60 @@
439
  "eval_samples_per_second": 421.963,
440
  "eval_steps_per_second": 6.593,
441
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
442
  }
443
  ],
444
  "logging_steps": 500,
@@ -446,7 +500,7 @@
446
  "num_input_tokens_seen": 0,
447
  "num_train_epochs": 10,
448
  "save_steps": 1000,
449
- "total_flos": 3.3445380096e+16,
450
  "train_batch_size": 64,
451
  "trial_name": null,
452
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.0,
5
  "eval_steps": 200,
6
+ "global_step": 9000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
439
  "eval_samples_per_second": 421.963,
440
  "eval_steps_per_second": 6.593,
441
  "step": 8000
442
+ },
443
+ {
444
+ "epoch": 8.2,
445
+ "eval_loss": 0.02438100427389145,
446
+ "eval_runtime": 37.7187,
447
+ "eval_samples_per_second": 424.193,
448
+ "eval_steps_per_second": 6.628,
449
+ "step": 8200
450
+ },
451
+ {
452
+ "epoch": 8.4,
453
+ "eval_loss": 0.02455132268369198,
454
+ "eval_runtime": 37.679,
455
+ "eval_samples_per_second": 424.64,
456
+ "eval_steps_per_second": 6.635,
457
+ "step": 8400
458
+ },
459
+ {
460
+ "epoch": 8.5,
461
+ "grad_norm": 0.09001246094703674,
462
+ "learning_rate": 7.5e-06,
463
+ "loss": 0.0252,
464
+ "step": 8500
465
+ },
466
+ {
467
+ "epoch": 8.6,
468
+ "eval_loss": 0.024252494797110558,
469
+ "eval_runtime": 37.731,
470
+ "eval_samples_per_second": 424.054,
471
+ "eval_steps_per_second": 6.626,
472
+ "step": 8600
473
+ },
474
+ {
475
+ "epoch": 8.8,
476
+ "eval_loss": 0.02421058714389801,
477
+ "eval_runtime": 37.6831,
478
+ "eval_samples_per_second": 424.593,
479
+ "eval_steps_per_second": 6.634,
480
+ "step": 8800
481
+ },
482
+ {
483
+ "epoch": 9.0,
484
+ "grad_norm": 0.07999344915151596,
485
+ "learning_rate": 5e-06,
486
+ "loss": 0.0244,
487
+ "step": 9000
488
+ },
489
+ {
490
+ "epoch": 9.0,
491
+ "eval_loss": 0.024233995005488396,
492
+ "eval_runtime": 37.8728,
493
+ "eval_samples_per_second": 422.467,
494
+ "eval_steps_per_second": 6.601,
495
+ "step": 9000
496
  }
497
  ],
498
  "logging_steps": 500,
 
500
  "num_input_tokens_seen": 0,
501
  "num_train_epochs": 10,
502
  "save_steps": 1000,
503
+ "total_flos": 3.7626052608e+16,
504
  "train_batch_size": 64,
505
  "trial_name": null,
506
  "trial_params": null