Upload checkpoint 180
Browse files- config.json +1 -1
- model-00001-of-00002.safetensors +1 -1
- model-00002-of-00002.safetensors +1 -1
- optimizer.pt +1 -1
- scheduler.pt +1 -1
- trainer_state.json +143 -3
- training_args.bin +1 -1
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"Qwen2ForCausalLM"
|
5 |
],
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "./checkpoint",
|
3 |
"architectures": [
|
4 |
"Qwen2ForCausalLM"
|
5 |
],
|
model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4957560304
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af37e95ecc0ecd4629ac364d7b97888000a52d9916d6991d8447b3b3fd7a54ae
|
3 |
size 4957560304
|
model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3989163248
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8d9966edb82684811ef31b9ac43b224ab6dfafa3e5dbeafadf4597657723661f
|
3 |
size 3989163248
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17893865224
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c192c58b938c4dd2cbc5530ccbfd1a3a4117252427e1d93e0ae78e04c0e874bc
|
3 |
size 17893865224
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f1cd7659be8558e55e3a42a030452706b8961a2d1477b7bac223479e7473b2c
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -287,6 +287,146 @@
|
|
287 |
"learning_rate": 0.00019992097609676073,
|
288 |
"loss": 1.8332,
|
289 |
"step": 120
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
}
|
291 |
],
|
292 |
"logging_steps": 3,
|
@@ -306,7 +446,7 @@
|
|
306 |
"attributes": {}
|
307 |
}
|
308 |
},
|
309 |
-
"total_flos":
|
310 |
"train_batch_size": 4,
|
311 |
"trial_name": null,
|
312 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.02068371157713301,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 180,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
287 |
"learning_rate": 0.00019992097609676073,
|
288 |
"loss": 1.8332,
|
289 |
"step": 120
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.014133869577707556,
|
293 |
+
"grad_norm": 0.78125,
|
294 |
+
"learning_rate": 0.00019991660753128755,
|
295 |
+
"loss": 1.9227,
|
296 |
+
"step": 123
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.014478598103993105,
|
300 |
+
"grad_norm": 0.71484375,
|
301 |
+
"learning_rate": 0.00019991212149216597,
|
302 |
+
"loss": 1.8356,
|
303 |
+
"step": 126
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.014823326630278655,
|
307 |
+
"grad_norm": 0.64453125,
|
308 |
+
"learning_rate": 0.0001999075179846703,
|
309 |
+
"loss": 1.8882,
|
310 |
+
"step": 129
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.015168055156564206,
|
314 |
+
"grad_norm": 0.7578125,
|
315 |
+
"learning_rate": 0.00019990279701421294,
|
316 |
+
"loss": 1.7091,
|
317 |
+
"step": 132
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.015512783682849756,
|
321 |
+
"grad_norm": 0.69140625,
|
322 |
+
"learning_rate": 0.0001998979585863444,
|
323 |
+
"loss": 1.8811,
|
324 |
+
"step": 135
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.015857512209135307,
|
328 |
+
"grad_norm": 0.7265625,
|
329 |
+
"learning_rate": 0.00019989300270675334,
|
330 |
+
"loss": 1.8558,
|
331 |
+
"step": 138
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.016202240735420856,
|
335 |
+
"grad_norm": 0.6015625,
|
336 |
+
"learning_rate": 0.0001998879293812664,
|
337 |
+
"loss": 1.8161,
|
338 |
+
"step": 141
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.016546969261706405,
|
342 |
+
"grad_norm": 0.87890625,
|
343 |
+
"learning_rate": 0.0001998827386158485,
|
344 |
+
"loss": 1.8826,
|
345 |
+
"step": 144
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.016891697787991957,
|
349 |
+
"grad_norm": 0.8125,
|
350 |
+
"learning_rate": 0.0001998774304166024,
|
351 |
+
"loss": 1.8912,
|
352 |
+
"step": 147
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 0.017236426314277506,
|
356 |
+
"grad_norm": 1.0859375,
|
357 |
+
"learning_rate": 0.00019987200478976909,
|
358 |
+
"loss": 1.9116,
|
359 |
+
"step": 150
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 0.01758115484056306,
|
363 |
+
"grad_norm": 36.25,
|
364 |
+
"learning_rate": 0.00019986646174172755,
|
365 |
+
"loss": 1.9378,
|
366 |
+
"step": 153
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"epoch": 0.017925883366848607,
|
370 |
+
"grad_norm": 0.86328125,
|
371 |
+
"learning_rate": 0.00019986080127899487,
|
372 |
+
"loss": 1.8727,
|
373 |
+
"step": 156
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"epoch": 0.018270611893134156,
|
377 |
+
"grad_norm": 0.94921875,
|
378 |
+
"learning_rate": 0.0001998550234082261,
|
379 |
+
"loss": 1.8471,
|
380 |
+
"step": 159
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"epoch": 0.01861534041941971,
|
384 |
+
"grad_norm": 1.1328125,
|
385 |
+
"learning_rate": 0.00019984912813621438,
|
386 |
+
"loss": 1.8372,
|
387 |
+
"step": 162
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"epoch": 0.018960068945705257,
|
391 |
+
"grad_norm": 0.7578125,
|
392 |
+
"learning_rate": 0.00019984311546989098,
|
393 |
+
"loss": 1.8197,
|
394 |
+
"step": 165
|
395 |
+
},
|
396 |
+
{
|
397 |
+
"epoch": 0.019304797471990806,
|
398 |
+
"grad_norm": 0.7734375,
|
399 |
+
"learning_rate": 0.00019983698541632498,
|
400 |
+
"loss": 1.809,
|
401 |
+
"step": 168
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"epoch": 0.01964952599827636,
|
405 |
+
"grad_norm": 0.80859375,
|
406 |
+
"learning_rate": 0.00019983073798272364,
|
407 |
+
"loss": 1.829,
|
408 |
+
"step": 171
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"epoch": 0.019994254524561907,
|
412 |
+
"grad_norm": 0.6484375,
|
413 |
+
"learning_rate": 0.00019982437317643217,
|
414 |
+
"loss": 1.8843,
|
415 |
+
"step": 174
|
416 |
+
},
|
417 |
+
{
|
418 |
+
"epoch": 0.020338983050847456,
|
419 |
+
"grad_norm": 0.66015625,
|
420 |
+
"learning_rate": 0.00019981789100493376,
|
421 |
+
"loss": 1.9159,
|
422 |
+
"step": 177
|
423 |
+
},
|
424 |
+
{
|
425 |
+
"epoch": 0.02068371157713301,
|
426 |
+
"grad_norm": 0.56640625,
|
427 |
+
"learning_rate": 0.0001998112914758496,
|
428 |
+
"loss": 1.8781,
|
429 |
+
"step": 180
|
430 |
}
|
431 |
],
|
432 |
"logging_steps": 3,
|
|
|
446 |
"attributes": {}
|
447 |
}
|
448 |
},
|
449 |
+
"total_flos": 5.891859920034202e+17,
|
450 |
"train_batch_size": 4,
|
451 |
"trial_name": null,
|
452 |
"trial_params": null
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5368
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7ca5d0fd565f49515cc3a135b4552fcc038d4359977a97b00c246603ed40a99
|
3 |
size 5368
|