qingy2024 commited on
Commit
fc60b0e
·
verified ·
1 Parent(s): 6821333

Upload checkpoint 180

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "qingy2024/Qwark-4B",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "./checkpoint",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e4cc05f999e21289ed5e7e1944f9b825b650201d4b6e49efc4600c1de13ce97
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af37e95ecc0ecd4629ac364d7b97888000a52d9916d6991d8447b3b3fd7a54ae
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9807d8aed988b93061377c25d688ea15054695319ec4186ac5e81721288c0c8c
3
  size 3989163248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d9966edb82684811ef31b9ac43b224ab6dfafa3e5dbeafadf4597657723661f
3
  size 3989163248
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59665e5ce11955e76a7a3977561998693ce16b3501d05df3615652f01a071033
3
  size 17893865224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c192c58b938c4dd2cbc5530ccbfd1a3a4117252427e1d93e0ae78e04c0e874bc
3
  size 17893865224
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9ef06ee3c7cfc29655a12c0d2daf141b73e0c056780dd35aec1b5a285207c10
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f1cd7659be8558e55e3a42a030452706b8961a2d1477b7bac223479e7473b2c
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.013789141051422005,
5
  "eval_steps": 500,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -287,6 +287,146 @@
287
  "learning_rate": 0.00019992097609676073,
288
  "loss": 1.8332,
289
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  }
291
  ],
292
  "logging_steps": 3,
@@ -306,7 +446,7 @@
306
  "attributes": {}
307
  }
308
  },
309
- "total_flos": 3.9279066133561344e+17,
310
  "train_batch_size": 4,
311
  "trial_name": null,
312
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.02068371157713301,
5
  "eval_steps": 500,
6
+ "global_step": 180,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
287
  "learning_rate": 0.00019992097609676073,
288
  "loss": 1.8332,
289
  "step": 120
290
+ },
291
+ {
292
+ "epoch": 0.014133869577707556,
293
+ "grad_norm": 0.78125,
294
+ "learning_rate": 0.00019991660753128755,
295
+ "loss": 1.9227,
296
+ "step": 123
297
+ },
298
+ {
299
+ "epoch": 0.014478598103993105,
300
+ "grad_norm": 0.71484375,
301
+ "learning_rate": 0.00019991212149216597,
302
+ "loss": 1.8356,
303
+ "step": 126
304
+ },
305
+ {
306
+ "epoch": 0.014823326630278655,
307
+ "grad_norm": 0.64453125,
308
+ "learning_rate": 0.0001999075179846703,
309
+ "loss": 1.8882,
310
+ "step": 129
311
+ },
312
+ {
313
+ "epoch": 0.015168055156564206,
314
+ "grad_norm": 0.7578125,
315
+ "learning_rate": 0.00019990279701421294,
316
+ "loss": 1.7091,
317
+ "step": 132
318
+ },
319
+ {
320
+ "epoch": 0.015512783682849756,
321
+ "grad_norm": 0.69140625,
322
+ "learning_rate": 0.0001998979585863444,
323
+ "loss": 1.8811,
324
+ "step": 135
325
+ },
326
+ {
327
+ "epoch": 0.015857512209135307,
328
+ "grad_norm": 0.7265625,
329
+ "learning_rate": 0.00019989300270675334,
330
+ "loss": 1.8558,
331
+ "step": 138
332
+ },
333
+ {
334
+ "epoch": 0.016202240735420856,
335
+ "grad_norm": 0.6015625,
336
+ "learning_rate": 0.0001998879293812664,
337
+ "loss": 1.8161,
338
+ "step": 141
339
+ },
340
+ {
341
+ "epoch": 0.016546969261706405,
342
+ "grad_norm": 0.87890625,
343
+ "learning_rate": 0.0001998827386158485,
344
+ "loss": 1.8826,
345
+ "step": 144
346
+ },
347
+ {
348
+ "epoch": 0.016891697787991957,
349
+ "grad_norm": 0.8125,
350
+ "learning_rate": 0.0001998774304166024,
351
+ "loss": 1.8912,
352
+ "step": 147
353
+ },
354
+ {
355
+ "epoch": 0.017236426314277506,
356
+ "grad_norm": 1.0859375,
357
+ "learning_rate": 0.00019987200478976909,
358
+ "loss": 1.9116,
359
+ "step": 150
360
+ },
361
+ {
362
+ "epoch": 0.01758115484056306,
363
+ "grad_norm": 36.25,
364
+ "learning_rate": 0.00019986646174172755,
365
+ "loss": 1.9378,
366
+ "step": 153
367
+ },
368
+ {
369
+ "epoch": 0.017925883366848607,
370
+ "grad_norm": 0.86328125,
371
+ "learning_rate": 0.00019986080127899487,
372
+ "loss": 1.8727,
373
+ "step": 156
374
+ },
375
+ {
376
+ "epoch": 0.018270611893134156,
377
+ "grad_norm": 0.94921875,
378
+ "learning_rate": 0.0001998550234082261,
379
+ "loss": 1.8471,
380
+ "step": 159
381
+ },
382
+ {
383
+ "epoch": 0.01861534041941971,
384
+ "grad_norm": 1.1328125,
385
+ "learning_rate": 0.00019984912813621438,
386
+ "loss": 1.8372,
387
+ "step": 162
388
+ },
389
+ {
390
+ "epoch": 0.018960068945705257,
391
+ "grad_norm": 0.7578125,
392
+ "learning_rate": 0.00019984311546989098,
393
+ "loss": 1.8197,
394
+ "step": 165
395
+ },
396
+ {
397
+ "epoch": 0.019304797471990806,
398
+ "grad_norm": 0.7734375,
399
+ "learning_rate": 0.00019983698541632498,
400
+ "loss": 1.809,
401
+ "step": 168
402
+ },
403
+ {
404
+ "epoch": 0.01964952599827636,
405
+ "grad_norm": 0.80859375,
406
+ "learning_rate": 0.00019983073798272364,
407
+ "loss": 1.829,
408
+ "step": 171
409
+ },
410
+ {
411
+ "epoch": 0.019994254524561907,
412
+ "grad_norm": 0.6484375,
413
+ "learning_rate": 0.00019982437317643217,
414
+ "loss": 1.8843,
415
+ "step": 174
416
+ },
417
+ {
418
+ "epoch": 0.020338983050847456,
419
+ "grad_norm": 0.66015625,
420
+ "learning_rate": 0.00019981789100493376,
421
+ "loss": 1.9159,
422
+ "step": 177
423
+ },
424
+ {
425
+ "epoch": 0.02068371157713301,
426
+ "grad_norm": 0.56640625,
427
+ "learning_rate": 0.0001998112914758496,
428
+ "loss": 1.8781,
429
+ "step": 180
430
  }
431
  ],
432
  "logging_steps": 3,
 
446
  "attributes": {}
447
  }
448
  },
449
+ "total_flos": 5.891859920034202e+17,
450
  "train_batch_size": 4,
451
  "trial_name": null,
452
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5692f259194658f6923f4b51ca2a607b4d71f7a19644f7edb031aba7b29cf648
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7ca5d0fd565f49515cc3a135b4552fcc038d4359977a97b00c246603ed40a99
3
  size 5368