ncbateman commited on
Commit
97d2c58
·
verified ·
1 Parent(s): 58ae81a

Training in progress, step 860, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2446e323705b3c535d054969f9761f5b59a5e3bbc07424bc218a9034170b3134
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99b129df98bdc306d1bc4565ee000cc0b871ca7381053214c2914dac7ae77608
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fc67b9db032061bdb48e9268ce036f9baffb6470d5aa76f5eb0276949db998b
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb976eeabc1d40bc1ef543f0d6b42e69810a3b568ef7fec526ce855dcd53f250
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:085f468c9ca1512d3e0074132a3b99e1cdcd42473eeebeb3fe393de996efa9e0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:798b54db949c4ae9de08b62eac89d6111767c04dba8dc38518460f18e2c13d16
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc74520f1262c2228ad975fd3e788ebc1722dd5506dd79c24c13063ed9e2e823
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2d74cdd7773895240aff3837ca564e1fc035a5a8a0853fee30d28f2c6ee4c25
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1064380459398253,
5
  "eval_steps": 386,
6
- "global_step": 855,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6016,6 +6016,41 @@
6016
  "learning_rate": 8.938360996464048e-05,
6017
  "loss": 0.821,
6018
  "step": 855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6019
  }
6020
  ],
6021
  "logging_steps": 1,
@@ -6035,7 +6070,7 @@
6035
  "attributes": {}
6036
  }
6037
  },
6038
- "total_flos": 9.557664107247698e+17,
6039
  "train_batch_size": 4,
6040
  "trial_name": null,
6041
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.112908443869298,
5
  "eval_steps": 386,
6
+ "global_step": 860,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6016
  "learning_rate": 8.938360996464048e-05,
6017
  "loss": 0.821,
6018
  "step": 855
6019
+ },
6020
+ {
6021
+ "epoch": 1.1077321255257198,
6022
+ "grad_norm": 0.910192608833313,
6023
+ "learning_rate": 8.93581960783559e-05,
6024
+ "loss": 0.7756,
6025
+ "step": 856
6026
+ },
6027
+ {
6028
+ "epoch": 1.1090262051116144,
6029
+ "grad_norm": 0.8553609848022461,
6030
+ "learning_rate": 8.933275543214245e-05,
6031
+ "loss": 0.8378,
6032
+ "step": 857
6033
+ },
6034
+ {
6035
+ "epoch": 1.1103202846975089,
6036
+ "grad_norm": 0.9897130131721497,
6037
+ "learning_rate": 8.930728804329744e-05,
6038
+ "loss": 0.8233,
6039
+ "step": 858
6040
+ },
6041
+ {
6042
+ "epoch": 1.1116143642834033,
6043
+ "grad_norm": 0.8921964168548584,
6044
+ "learning_rate": 8.928179392913633e-05,
6045
+ "loss": 0.9355,
6046
+ "step": 859
6047
+ },
6048
+ {
6049
+ "epoch": 1.112908443869298,
6050
+ "grad_norm": 1.06694757938385,
6051
+ "learning_rate": 8.925627310699275e-05,
6052
+ "loss": 0.9271,
6053
+ "step": 860
6054
  }
6055
  ],
6056
  "logging_steps": 1,
 
6070
  "attributes": {}
6071
  }
6072
  },
6073
+ "total_flos": 9.613560965832376e+17,
6074
  "train_batch_size": 4,
6075
  "trial_name": null,
6076
  "trial_params": null