MeedoSam commited on
Commit
433f15a
1 Parent(s): 0175872

Uploaded checkpoint-2000

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:821c0ac7a30d0c43834f06feb7fd15b70ef13d1f6aeac25967123bc26b176cf0
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529afbfd214e67f0cd6ed38c46a882f69bf7229384ab1df0ce60cb1f5e4f2965
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1df28215a0916dd28c5a5b8284ab78a1c942bf5b3cbc4f05dd051c1fd89566ed
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0cf9aa45dd205493bc09191810b39b693ad29080f038d14b0cea034dec265cb
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ca7c0296e5cbbfde7db0a0bddc642b682b5cf589c67faf9042bea2615f4d80e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6953db975b289c1d4893316fe618b5891abd5920bad079a04f9bc032f0d6a4f
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c69e416a1c1c7cfbd03505995e73f15a3c32b1cc0b0dc09a821d33b4e92286e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78180a777fabc8bdc6e37d70a05529fbfe5bbeb093c49dd0124cc2bdeb32db78
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.31330774653403304,
5
  "eval_steps": 100,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,156 @@
157
  "eval_samples_per_second": 5.198,
158
  "eval_steps_per_second": 5.198,
159
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 100,
@@ -164,7 +314,7 @@
164
  "num_input_tokens_seen": 0,
165
  "num_train_epochs": 2,
166
  "save_steps": 1000,
167
- "total_flos": 1.6102125993984e+16,
168
  "train_batch_size": 1,
169
  "trial_name": null,
170
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6266154930680661,
5
  "eval_steps": 100,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "eval_samples_per_second": 5.198,
158
  "eval_steps_per_second": 5.198,
159
  "step": 1000
160
+ },
161
+ {
162
+ "epoch": 0.34,
163
+ "grad_norm": 0.007412207778543234,
164
+ "learning_rate": 1.7333333333333336e-05,
165
+ "loss": 0.0631,
166
+ "step": 1100
167
+ },
168
+ {
169
+ "epoch": 0.34,
170
+ "eval_loss": 0.025782734155654907,
171
+ "eval_runtime": 192.5677,
172
+ "eval_samples_per_second": 5.193,
173
+ "eval_steps_per_second": 5.193,
174
+ "step": 1100
175
+ },
176
+ {
177
+ "epoch": 0.38,
178
+ "grad_norm": 0.004010587465018034,
179
+ "learning_rate": 1.688888888888889e-05,
180
+ "loss": 0.0645,
181
+ "step": 1200
182
+ },
183
+ {
184
+ "epoch": 0.38,
185
+ "eval_loss": 0.026380345225334167,
186
+ "eval_runtime": 192.1928,
187
+ "eval_samples_per_second": 5.203,
188
+ "eval_steps_per_second": 5.203,
189
+ "step": 1200
190
+ },
191
+ {
192
+ "epoch": 0.41,
193
+ "grad_norm": 0.0805005207657814,
194
+ "learning_rate": 1.6444444444444444e-05,
195
+ "loss": 0.0419,
196
+ "step": 1300
197
+ },
198
+ {
199
+ "epoch": 0.41,
200
+ "eval_loss": 0.027041926980018616,
201
+ "eval_runtime": 192.535,
202
+ "eval_samples_per_second": 5.194,
203
+ "eval_steps_per_second": 5.194,
204
+ "step": 1300
205
+ },
206
+ {
207
+ "epoch": 0.44,
208
+ "grad_norm": 0.010087325237691402,
209
+ "learning_rate": 1.6000000000000003e-05,
210
+ "loss": 0.0612,
211
+ "step": 1400
212
+ },
213
+ {
214
+ "epoch": 0.44,
215
+ "eval_loss": 0.018431425094604492,
216
+ "eval_runtime": 192.3449,
217
+ "eval_samples_per_second": 5.199,
218
+ "eval_steps_per_second": 5.199,
219
+ "step": 1400
220
+ },
221
+ {
222
+ "epoch": 0.47,
223
+ "grad_norm": 0.6794092655181885,
224
+ "learning_rate": 1.555555555555556e-05,
225
+ "loss": 0.0569,
226
+ "step": 1500
227
+ },
228
+ {
229
+ "epoch": 0.47,
230
+ "eval_loss": 0.017885498702526093,
231
+ "eval_runtime": 192.6531,
232
+ "eval_samples_per_second": 5.191,
233
+ "eval_steps_per_second": 5.191,
234
+ "step": 1500
235
+ },
236
+ {
237
+ "epoch": 0.5,
238
+ "grad_norm": 1.6970280408859253,
239
+ "learning_rate": 1.5111111111111112e-05,
240
+ "loss": 0.0546,
241
+ "step": 1600
242
+ },
243
+ {
244
+ "epoch": 0.5,
245
+ "eval_loss": 0.022140460088849068,
246
+ "eval_runtime": 192.8968,
247
+ "eval_samples_per_second": 5.184,
248
+ "eval_steps_per_second": 5.184,
249
+ "step": 1600
250
+ },
251
+ {
252
+ "epoch": 0.53,
253
+ "grad_norm": 0.013943832367658615,
254
+ "learning_rate": 1.4666666666666666e-05,
255
+ "loss": 0.069,
256
+ "step": 1700
257
+ },
258
+ {
259
+ "epoch": 0.53,
260
+ "eval_loss": 0.020227737724781036,
261
+ "eval_runtime": 193.1829,
262
+ "eval_samples_per_second": 5.176,
263
+ "eval_steps_per_second": 5.176,
264
+ "step": 1700
265
+ },
266
+ {
267
+ "epoch": 0.56,
268
+ "grad_norm": 2.067197322845459,
269
+ "learning_rate": 1.4222222222222224e-05,
270
+ "loss": 0.0509,
271
+ "step": 1800
272
+ },
273
+ {
274
+ "epoch": 0.56,
275
+ "eval_loss": 0.01612325944006443,
276
+ "eval_runtime": 192.6314,
277
+ "eval_samples_per_second": 5.191,
278
+ "eval_steps_per_second": 5.191,
279
+ "step": 1800
280
+ },
281
+ {
282
+ "epoch": 0.6,
283
+ "grad_norm": 2.2480263710021973,
284
+ "learning_rate": 1.377777777777778e-05,
285
+ "loss": 0.0495,
286
+ "step": 1900
287
+ },
288
+ {
289
+ "epoch": 0.6,
290
+ "eval_loss": 0.01796303130686283,
291
+ "eval_runtime": 192.3154,
292
+ "eval_samples_per_second": 5.2,
293
+ "eval_steps_per_second": 5.2,
294
+ "step": 1900
295
+ },
296
+ {
297
+ "epoch": 0.63,
298
+ "grad_norm": 0.0029044542461633682,
299
+ "learning_rate": 1.3333333333333333e-05,
300
+ "loss": 0.0444,
301
+ "step": 2000
302
+ },
303
+ {
304
+ "epoch": 0.63,
305
+ "eval_loss": 0.02335376851260662,
306
+ "eval_runtime": 192.3608,
307
+ "eval_samples_per_second": 5.199,
308
+ "eval_steps_per_second": 5.199,
309
+ "step": 2000
310
  }
311
  ],
312
  "logging_steps": 100,
 
314
  "num_input_tokens_seen": 0,
315
  "num_train_epochs": 2,
316
  "save_steps": 1000,
317
+ "total_flos": 3.2204251987968e+16,
318
  "train_batch_size": 1,
319
  "trial_name": null,
320
  "trial_params": null