diagonalge commited on
Commit
5022ab3
·
verified ·
1 Parent(s): 3a4a479

Training in progress, step 30, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:492482922737f774b1c554ae6b0df707f45da2ec1fea952f8d3cdc663975eaf9
3
  size 101752088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b768e3a4d249cd9bb3425b75641f3e3d66c2119e027046042a354d0900705a9c
3
  size 101752088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c7677d437b82578dbb78df01b1f16713302d84d2d5beb0bcf6fc95926b4f2e6
3
  size 52046596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8bc270d065791693bfe545dd959cf593a80072a2fe6e4cb2f28f6520970ef91
3
  size 52046596
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49762064c622a61f788bab27148eaa124166994071a98ad304aeda08b72320f4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4c6a3932b0c6757b2a554606edacf63dde2370212156fc61645da06ea61feaa
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79fd9663c22308e7cda458f2f27a3161480f323121be11ab10f5e1ea3f30fc6d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:624cde959d3a917007c76687e7ed04f5f5ce5a570abfa20dd466a4e55f6684fa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0025174649128327773,
5
  "eval_steps": 25,
6
- "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -155,6 +155,84 @@
155
  "learning_rate": 0.00019396926207859084,
156
  "loss": 0.4888,
157
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  }
159
  ],
160
  "logging_steps": 1,
@@ -174,7 +252,7 @@
174
  "attributes": {}
175
  }
176
  },
177
- "total_flos": 1.31707680325632e+16,
178
  "train_batch_size": 2,
179
  "trial_name": null,
180
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.003776197369249166,
5
  "eval_steps": 25,
6
+ "global_step": 30,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
155
  "learning_rate": 0.00019396926207859084,
156
  "loss": 0.4888,
157
  "step": 20
158
+ },
159
+ {
160
+ "epoch": 0.002643338158474416,
161
+ "grad_norm": 6.595178127288818,
162
+ "learning_rate": 0.00019271838545667876,
163
+ "loss": 0.6385,
164
+ "step": 21
165
+ },
166
+ {
167
+ "epoch": 0.002769211404116055,
168
+ "grad_norm": 3.2132580280303955,
169
+ "learning_rate": 0.0001913545457642601,
170
+ "loss": 0.518,
171
+ "step": 22
172
+ },
173
+ {
174
+ "epoch": 0.002895084649757694,
175
+ "grad_norm": 16.919002532958984,
176
+ "learning_rate": 0.0001898794046299167,
177
+ "loss": 0.3769,
178
+ "step": 23
179
+ },
180
+ {
181
+ "epoch": 0.0030209578953993327,
182
+ "grad_norm": 0.020209377631545067,
183
+ "learning_rate": 0.00018829475928589271,
184
+ "loss": 0.0002,
185
+ "step": 24
186
+ },
187
+ {
188
+ "epoch": 0.0031468311410409715,
189
+ "grad_norm": 0.8074254989624023,
190
+ "learning_rate": 0.00018660254037844388,
191
+ "loss": 0.0241,
192
+ "step": 25
193
+ },
194
+ {
195
+ "epoch": 0.0031468311410409715,
196
+ "eval_loss": NaN,
197
+ "eval_runtime": 3493.5914,
198
+ "eval_samples_per_second": 0.957,
199
+ "eval_steps_per_second": 0.479,
200
+ "step": 25
201
+ },
202
+ {
203
+ "epoch": 0.003272704386682611,
204
+ "grad_norm": 19.329341888427734,
205
+ "learning_rate": 0.0001848048096156426,
206
+ "loss": 0.7896,
207
+ "step": 26
208
+ },
209
+ {
210
+ "epoch": 0.0033985776323242497,
211
+ "grad_norm": 8.426005363464355,
212
+ "learning_rate": 0.00018290375725550417,
213
+ "loss": 0.4921,
214
+ "step": 27
215
+ },
216
+ {
217
+ "epoch": 0.0035244508779658885,
218
+ "grad_norm": 6.269211292266846,
219
+ "learning_rate": 0.00018090169943749476,
220
+ "loss": 0.5567,
221
+ "step": 28
222
+ },
223
+ {
224
+ "epoch": 0.0036503241236075274,
225
+ "grad_norm": 4.1337480545043945,
226
+ "learning_rate": 0.00017880107536067218,
227
+ "loss": 0.0482,
228
+ "step": 29
229
+ },
230
+ {
231
+ "epoch": 0.003776197369249166,
232
+ "grad_norm": 0.4672463834285736,
233
+ "learning_rate": 0.0001766044443118978,
234
+ "loss": 0.0424,
235
+ "step": 30
236
  }
237
  ],
238
  "logging_steps": 1,
 
252
  "attributes": {}
253
  }
254
  },
255
+ "total_flos": 1.97561520488448e+16,
256
  "train_batch_size": 2,
257
  "trial_name": null,
258
  "trial_params": null