rakhman-llm commited on
Commit
2e88c03
·
verified ·
1 Parent(s): 1b7d868

Training in progress, step 44500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:440a49b94440cacff83f386734abd67ba7ebf0590110ed4471a084cfe5123375
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c3a41320d77f15c9a600c514111a9e25e4cbd36ae0205d94fa0c4cb342f789a
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa8b9c57b8597bab044debeab7cc337cd4ec0227fbde8dd82185f8b22bf3a034
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6c9fdfcdbc56237f182572da63342d4ff7b8e0b0e45095657eb0e743d19f6ce
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d5486da749b598a61d62adc9f231f7450ebff3f0e201a6edd91a104ce06d0fc
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:377a47bd0e88de4a26ae8d013d8cc57599e57dfed8febc9da0fea06bb8f5c79f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91c87f62d5aed9d7a6d425bf9b7734b81e555049c990c49c2d251a8a04bf9890
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c2f59490e3dd2b05891cf4a120f5b3dd4417dfcdb085e76dd005b201e1dc531
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.9333333333333336,
5
  "eval_steps": 500,
6
- "global_step": 44000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3103,6 +3103,41 @@
3103
  "learning_rate": 4.5155555555555554e-07,
3104
  "loss": 0.051,
3105
  "step": 44000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3106
  }
3107
  ],
3108
  "logging_steps": 100,
@@ -3122,7 +3157,7 @@
3122
  "attributes": {}
3123
  }
3124
  },
3125
- "total_flos": 1.0717658873856e+17,
3126
  "train_batch_size": 4,
3127
  "trial_name": null,
3128
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.966666666666667,
5
  "eval_steps": 500,
6
+ "global_step": 44500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3103
  "learning_rate": 4.5155555555555554e-07,
3104
  "loss": 0.051,
3105
  "step": 44000
3106
+ },
3107
+ {
3108
+ "epoch": 2.94,
3109
+ "grad_norm": 0.1060103327035904,
3110
+ "learning_rate": 4.071111111111112e-07,
3111
+ "loss": 0.0461,
3112
+ "step": 44100
3113
+ },
3114
+ {
3115
+ "epoch": 2.9466666666666668,
3116
+ "grad_norm": 0.43563324213027954,
3117
+ "learning_rate": 3.6266666666666674e-07,
3118
+ "loss": 0.0553,
3119
+ "step": 44200
3120
+ },
3121
+ {
3122
+ "epoch": 2.953333333333333,
3123
+ "grad_norm": 0.11668545007705688,
3124
+ "learning_rate": 3.182222222222223e-07,
3125
+ "loss": 0.0514,
3126
+ "step": 44300
3127
+ },
3128
+ {
3129
+ "epoch": 2.96,
3130
+ "grad_norm": 0.20122574269771576,
3131
+ "learning_rate": 2.7377777777777783e-07,
3132
+ "loss": 0.0582,
3133
+ "step": 44400
3134
+ },
3135
+ {
3136
+ "epoch": 2.966666666666667,
3137
+ "grad_norm": 0.2473125159740448,
3138
+ "learning_rate": 2.2933333333333335e-07,
3139
+ "loss": 0.0569,
3140
+ "step": 44500
3141
  }
3142
  ],
3143
  "logging_steps": 100,
 
3157
  "attributes": {}
3158
  }
3159
  },
3160
+ "total_flos": 1.0839450451968e+17,
3161
  "train_batch_size": 4,
3162
  "trial_name": null,
3163
  "trial_params": null