ardaspear commited on
Commit
5c9d999
·
verified ·
1 Parent(s): f3215d4

Training in progress, step 51, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef352b3755cd2c3514a70e0f1913121f3c90f77b381ac85eeb778430e7a0492f
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7509a38f253530a371c4f75f1b5dcbea50b136a890c57a3ee3bc66254766e63f
3
  size 159967880
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:255220845625b8a532eac68bcefb4bfe60d87d73a6ff5f76ef1a4f52d1f270d8
3
  size 81730196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:943bc6dada57397735ca818193ed0b16c7c4f3889837989d8dfbb6097b4a594f
3
  size 81730196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:624617eb67c5ae66a26f74399b042214fe4beaf1a0d11ffcede0e4228de76c62
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c543948a535496f0dde46e00d90041e4b64736cb4a715d793f78a309b0450ac5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0af27ed5b5e5c3013f1da7a97e494138e751399ceff1f8e7486b6e269bc7092f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9fa892ecd236e652150058649b13d9161331d85e374c4bec1d60ad6a556c3a9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00872633942893808,
5
  "eval_steps": 17,
6
- "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -108,6 +108,56 @@
108
  "eval_samples_per_second": 13.292,
109
  "eval_steps_per_second": 1.663,
110
  "step": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  }
112
  ],
113
  "logging_steps": 3,
@@ -127,7 +177,7 @@
127
  "attributes": {}
128
  }
129
  },
130
- "total_flos": 4.977191316514406e+16,
131
  "train_batch_size": 8,
132
  "trial_name": null,
133
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.013089509143407122,
5
  "eval_steps": 17,
6
+ "global_step": 51,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
108
  "eval_samples_per_second": 13.292,
109
  "eval_steps_per_second": 1.663,
110
  "step": 34
111
+ },
112
+ {
113
+ "epoch": 0.009239653512993263,
114
+ "grad_norm": 0.3209523558616638,
115
+ "learning_rate": 9.545032675245813e-05,
116
+ "loss": 2.4035,
117
+ "step": 36
118
+ },
119
+ {
120
+ "epoch": 0.010009624639076035,
121
+ "grad_norm": 0.33573678135871887,
122
+ "learning_rate": 9.43611409721806e-05,
123
+ "loss": 2.3783,
124
+ "step": 39
125
+ },
126
+ {
127
+ "epoch": 0.010779595765158807,
128
+ "grad_norm": 0.3218136131763458,
129
+ "learning_rate": 9.316282404787871e-05,
130
+ "loss": 2.4247,
131
+ "step": 42
132
+ },
133
+ {
134
+ "epoch": 0.011549566891241578,
135
+ "grad_norm": 0.30115804076194763,
136
+ "learning_rate": 9.185832391312644e-05,
137
+ "loss": 2.3915,
138
+ "step": 45
139
+ },
140
+ {
141
+ "epoch": 0.01231953801732435,
142
+ "grad_norm": 0.34325042366981506,
143
+ "learning_rate": 9.045084971874738e-05,
144
+ "loss": 2.3639,
145
+ "step": 48
146
+ },
147
+ {
148
+ "epoch": 0.013089509143407122,
149
+ "grad_norm": 0.3467099666595459,
150
+ "learning_rate": 8.894386393810563e-05,
151
+ "loss": 2.3171,
152
+ "step": 51
153
+ },
154
+ {
155
+ "epoch": 0.013089509143407122,
156
+ "eval_loss": 2.341045379638672,
157
+ "eval_runtime": 494.2874,
158
+ "eval_samples_per_second": 13.278,
159
+ "eval_steps_per_second": 1.661,
160
+ "step": 51
161
  }
162
  ],
163
  "logging_steps": 3,
 
177
  "attributes": {}
178
  }
179
  },
180
+ "total_flos": 7.41634467692544e+16,
181
  "train_batch_size": 8,
182
  "trial_name": null,
183
  "trial_params": null