rakhman-llm commited on
Commit
cc04718
·
verified ·
1 Parent(s): 070c4c6

Training in progress, step 45000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c3a41320d77f15c9a600c514111a9e25e4cbd36ae0205d94fa0c4cb342f789a
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8a815a94d192095646a02d8b6a79105e9260920970fbe97d2aa19def46b4c90
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6c9fdfcdbc56237f182572da63342d4ff7b8e0b0e45095657eb0e743d19f6ce
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c02017f320f6b1caddf1618e07ec690b785b4a529ef34025e3e117b762e0fc78
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:377a47bd0e88de4a26ae8d013d8cc57599e57dfed8febc9da0fea06bb8f5c79f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e270dd652a0994b32575b44314119dedc30073f859dfa0aeff422823bc4d4be
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c2f59490e3dd2b05891cf4a120f5b3dd4417dfcdb085e76dd005b201e1dc531
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ee7ee57a2874715dabf5aac57b903a1055eceed18fa43649c597404d6a55c2f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.966666666666667,
5
  "eval_steps": 500,
6
- "global_step": 44500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3138,6 +3138,41 @@
3138
  "learning_rate": 2.2933333333333335e-07,
3139
  "loss": 0.0569,
3140
  "step": 44500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3141
  }
3142
  ],
3143
  "logging_steps": 100,
@@ -3152,12 +3187,12 @@
3152
  "should_evaluate": false,
3153
  "should_log": false,
3154
  "should_save": true,
3155
- "should_training_stop": false
3156
  },
3157
  "attributes": {}
3158
  }
3159
  },
3160
- "total_flos": 1.0839450451968e+17,
3161
  "train_batch_size": 4,
3162
  "trial_name": null,
3163
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 45000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3138
  "learning_rate": 2.2933333333333335e-07,
3139
  "loss": 0.0569,
3140
  "step": 44500
3141
+ },
3142
+ {
3143
+ "epoch": 2.9733333333333336,
3144
+ "grad_norm": 0.1887609213590622,
3145
+ "learning_rate": 1.848888888888889e-07,
3146
+ "loss": 0.0479,
3147
+ "step": 44600
3148
+ },
3149
+ {
3150
+ "epoch": 2.98,
3151
+ "grad_norm": 0.19531774520874023,
3152
+ "learning_rate": 1.4044444444444447e-07,
3153
+ "loss": 0.0453,
3154
+ "step": 44700
3155
+ },
3156
+ {
3157
+ "epoch": 2.986666666666667,
3158
+ "grad_norm": 0.3343108594417572,
3159
+ "learning_rate": 9.6e-08,
3160
+ "loss": 0.0503,
3161
+ "step": 44800
3162
+ },
3163
+ {
3164
+ "epoch": 2.993333333333333,
3165
+ "grad_norm": 0.2250046730041504,
3166
+ "learning_rate": 5.155555555555556e-08,
3167
+ "loss": 0.053,
3168
+ "step": 44900
3169
+ },
3170
+ {
3171
+ "epoch": 3.0,
3172
+ "grad_norm": 0.10516126453876495,
3173
+ "learning_rate": 7.111111111111112e-09,
3174
+ "loss": 0.0502,
3175
+ "step": 45000
3176
  }
3177
  ],
3178
  "logging_steps": 100,
 
3187
  "should_evaluate": false,
3188
  "should_log": false,
3189
  "should_save": true,
3190
+ "should_training_stop": true
3191
  },
3192
  "attributes": {}
3193
  }
3194
  },
3195
+ "total_flos": 1.096124203008e+17,
3196
  "train_batch_size": 4,
3197
  "trial_name": null,
3198
  "trial_params": null