leixa commited on
Commit
ca420f5
·
verified ·
1 Parent(s): 5ecccb7

Training in progress, step 210, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e31caa0549662895406b35f60ee272ab02940a6d1169d24cf30843d63008545
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2791b511c4630b21fd991533625ec1ec52da3e5cc1609da7a4c2cfedc1bcba6d
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a691ecd73b2e1973b2873ff3d47ecc200e0a85a6854d7fd2dc11400647afb9e
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a884f4320934e16da5143deade141b5396382f6dfdc0784d68105ea5d71bc6b2
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4d35c87f5d0e8ea62bc673d149da7d93d79a8b9b3951cb1475638c45526807b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec363189963dc133232a1202530bba3901933ae6ee2483645557d8ee2922117
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd12c7442104c6bee1178cd38829bd00c01ded478e430b03fe72cfaff700e4be
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16a7801db1aa9f181cf78d5699e3a7862ab42bf9c452e31cb54501196abe18a0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.290030211480363,
5
  "eval_steps": 21,
6
- "global_step": 189,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -528,6 +528,63 @@
528
  "eval_samples_per_second": 540.238,
529
  "eval_steps_per_second": 69.459,
530
  "step": 189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
  }
532
  ],
533
  "logging_steps": 3,
@@ -547,7 +604,7 @@
547
  "attributes": {}
548
  }
549
  },
550
- "total_flos": 21083074854912.0,
551
  "train_batch_size": 8,
552
  "trial_name": null,
553
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.5438066465256797,
5
  "eval_steps": 21,
6
+ "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
528
  "eval_samples_per_second": 540.238,
529
  "eval_steps_per_second": 69.459,
530
  "step": 189
531
+ },
532
+ {
533
+ "epoch": 2.326283987915408,
534
+ "grad_norm": 0.17823714017868042,
535
+ "learning_rate": 1.3390009847968504e-05,
536
+ "loss": 10.1773,
537
+ "step": 192
538
+ },
539
+ {
540
+ "epoch": 2.3625377643504533,
541
+ "grad_norm": 0.21689902245998383,
542
+ "learning_rate": 1.2075907148663579e-05,
543
+ "loss": 10.1772,
544
+ "step": 195
545
+ },
546
+ {
547
+ "epoch": 2.3987915407854983,
548
+ "grad_norm": 0.3612368106842041,
549
+ "learning_rate": 1.0820770952526155e-05,
550
+ "loss": 10.1826,
551
+ "step": 198
552
+ },
553
+ {
554
+ "epoch": 2.4350453172205437,
555
+ "grad_norm": 0.19127142429351807,
556
+ "learning_rate": 9.62655281559679e-06,
557
+ "loss": 10.1821,
558
+ "step": 201
559
+ },
560
+ {
561
+ "epoch": 2.471299093655589,
562
+ "grad_norm": 0.21965357661247253,
563
+ "learning_rate": 8.49510957510633e-06,
564
+ "loss": 10.1765,
565
+ "step": 204
566
+ },
567
+ {
568
+ "epoch": 2.5075528700906347,
569
+ "grad_norm": 0.1769980639219284,
570
+ "learning_rate": 7.4282004623615396e-06,
571
+ "loss": 10.1756,
572
+ "step": 207
573
+ },
574
+ {
575
+ "epoch": 2.5438066465256797,
576
+ "grad_norm": 0.20193351805210114,
577
+ "learning_rate": 6.427484367393699e-06,
578
+ "loss": 10.178,
579
+ "step": 210
580
+ },
581
+ {
582
+ "epoch": 2.5438066465256797,
583
+ "eval_loss": 10.170087814331055,
584
+ "eval_runtime": 0.2628,
585
+ "eval_samples_per_second": 532.826,
586
+ "eval_steps_per_second": 68.506,
587
+ "step": 210
588
  }
589
  ],
590
  "logging_steps": 3,
 
604
  "attributes": {}
605
  }
606
  },
607
+ "total_flos": 23425638727680.0,
608
  "train_batch_size": 8,
609
  "trial_name": null,
610
  "trial_params": null