alicegoesdown commited on
Commit
e6e89b5
·
verified ·
1 Parent(s): f9f2f9f

Training in progress, step 3600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25ba111dd83279dabb0d19e7d4e8585af8e57bae04b236555189b549cfcfd971
3
  size 6832520
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cadaf5fbfd4e6aee40554c146ca75dc1f95cbd1b5133ac28d790b5d9edc490b
3
  size 6832520
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f0e29c4c6a37e6d71db5da8939c18bc60a1bc43441cb1e059da6fc84bc0b009
3
  size 13739450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d77f59571dc19ec2ae97f48b762fc9c7104b450b1dd4f4f44c20086f61be99a4
3
  size 13739450
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25fc5c5fd85eab708d82da53acf51c735610623a3928c8f6735fd352feda164f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61f9e60c11be639e03bf81d78cbe30bf20382df1ba6584029adec3134d967f6d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:237c7f92f9adb36b78a572488f13feee11b6568d1f9b3d2334cf9a8a16d76d71
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cea58db64ba456c38a9f8ab6236e784932b8260cdaa3af8426d157163570e03c
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.9334535598754883,
3
  "best_model_checkpoint": "./output/checkpoint-1500",
4
- "epoch": 4.406130268199234,
5
  "eval_steps": 150,
6
- "global_step": 3450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2606,6 +2606,119 @@
2606
  "eval_samples_per_second": 20.4,
2607
  "eval_steps_per_second": 20.4,
2608
  "step": 3450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2609
  }
2610
  ],
2611
  "logging_steps": 10,
@@ -2625,7 +2738,7 @@
2625
  "attributes": {}
2626
  }
2627
  },
2628
- "total_flos": 4.782792928016794e+16,
2629
  "train_batch_size": 16,
2630
  "trial_name": null,
2631
  "trial_params": null
 
1
  {
2
  "best_metric": 1.9334535598754883,
3
  "best_model_checkpoint": "./output/checkpoint-1500",
4
+ "epoch": 4.597701149425287,
5
  "eval_steps": 150,
6
+ "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2606
  "eval_samples_per_second": 20.4,
2607
  "eval_steps_per_second": 20.4,
2608
  "step": 3450
2609
+ },
2610
+ {
2611
+ "epoch": 4.41890166028097,
2612
+ "grad_norm": 2.3654282093048096,
2613
+ "learning_rate": 3.0975050967969045e-06,
2614
+ "loss": 1.7632,
2615
+ "step": 3460
2616
+ },
2617
+ {
2618
+ "epoch": 4.431673052362708,
2619
+ "grad_norm": 2.546827793121338,
2620
+ "learning_rate": 3.097407623305086e-06,
2621
+ "loss": 1.7426,
2622
+ "step": 3470
2623
+ },
2624
+ {
2625
+ "epoch": 4.444444444444445,
2626
+ "grad_norm": 2.499905824661255,
2627
+ "learning_rate": 3.0972451720296086e-06,
2628
+ "loss": 1.7915,
2629
+ "step": 3480
2630
+ },
2631
+ {
2632
+ "epoch": 4.457215836526181,
2633
+ "grad_norm": 2.484066963195801,
2634
+ "learning_rate": 3.097017749786602e-06,
2635
+ "loss": 1.7089,
2636
+ "step": 3490
2637
+ },
2638
+ {
2639
+ "epoch": 4.469987228607918,
2640
+ "grad_norm": 2.5977776050567627,
2641
+ "learning_rate": 3.096725366118249e-06,
2642
+ "loss": 1.7957,
2643
+ "step": 3500
2644
+ },
2645
+ {
2646
+ "epoch": 4.482758620689655,
2647
+ "grad_norm": 2.3807027339935303,
2648
+ "learning_rate": 3.096368033292382e-06,
2649
+ "loss": 1.7295,
2650
+ "step": 3510
2651
+ },
2652
+ {
2653
+ "epoch": 4.495530012771392,
2654
+ "grad_norm": 2.4631288051605225,
2655
+ "learning_rate": 3.095945766301971e-06,
2656
+ "loss": 1.7719,
2657
+ "step": 3520
2658
+ },
2659
+ {
2660
+ "epoch": 4.508301404853129,
2661
+ "grad_norm": 2.4563748836517334,
2662
+ "learning_rate": 3.095458582864493e-06,
2663
+ "loss": 1.7191,
2664
+ "step": 3530
2665
+ },
2666
+ {
2667
+ "epoch": 4.521072796934866,
2668
+ "grad_norm": 2.3577940464019775,
2669
+ "learning_rate": 3.09490650342119e-06,
2670
+ "loss": 1.725,
2671
+ "step": 3540
2672
+ },
2673
+ {
2674
+ "epoch": 4.533844189016603,
2675
+ "grad_norm": 2.4217264652252197,
2676
+ "learning_rate": 3.0942895511362085e-06,
2677
+ "loss": 1.7284,
2678
+ "step": 3550
2679
+ },
2680
+ {
2681
+ "epoch": 4.54661558109834,
2682
+ "grad_norm": 2.335932493209839,
2683
+ "learning_rate": 3.093607751895632e-06,
2684
+ "loss": 1.7966,
2685
+ "step": 3560
2686
+ },
2687
+ {
2688
+ "epoch": 4.559386973180077,
2689
+ "grad_norm": 2.4453847408294678,
2690
+ "learning_rate": 3.0928611343063904e-06,
2691
+ "loss": 1.787,
2692
+ "step": 3570
2693
+ },
2694
+ {
2695
+ "epoch": 4.572158365261814,
2696
+ "grad_norm": 2.350062608718872,
2697
+ "learning_rate": 3.092049729695062e-06,
2698
+ "loss": 1.7184,
2699
+ "step": 3580
2700
+ },
2701
+ {
2702
+ "epoch": 4.58492975734355,
2703
+ "grad_norm": 2.446969985961914,
2704
+ "learning_rate": 3.091173572106561e-06,
2705
+ "loss": 1.8097,
2706
+ "step": 3590
2707
+ },
2708
+ {
2709
+ "epoch": 4.597701149425287,
2710
+ "grad_norm": 2.3991920948028564,
2711
+ "learning_rate": 3.090232698302703e-06,
2712
+ "loss": 1.7007,
2713
+ "step": 3600
2714
+ },
2715
+ {
2716
+ "epoch": 4.597701149425287,
2717
+ "eval_loss": 1.9395427703857422,
2718
+ "eval_runtime": 24.7084,
2719
+ "eval_samples_per_second": 20.236,
2720
+ "eval_steps_per_second": 20.236,
2721
+ "step": 3600
2722
  }
2723
  ],
2724
  "logging_steps": 10,
 
2738
  "attributes": {}
2739
  }
2740
  },
2741
+ "total_flos": 4.988587672549786e+16,
2742
  "train_batch_size": 16,
2743
  "trial_name": null,
2744
  "trial_params": null