philschmid HF staff commited on
Commit
5b652bd
β€’
1 Parent(s): 42b7ff7

Training in progress, step 1000

Browse files
Files changed (39) hide show
  1. {checkpoint-600 β†’ checkpoint-1000}/config.json +0 -0
  2. {checkpoint-600 β†’ checkpoint-1000}/generation_config.json +0 -0
  3. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +1 -1
  4. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +1 -1
  5. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +1 -1
  6. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +1 -1
  7. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +1 -1
  8. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +1 -1
  9. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +1 -1
  10. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +1 -1
  11. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_0_mp_rank_00_model_states.pt +1 -1
  12. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_1_mp_rank_00_model_states.pt +1 -1
  13. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_2_mp_rank_00_model_states.pt +1 -1
  14. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_3_mp_rank_00_model_states.pt +1 -1
  15. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_4_mp_rank_00_model_states.pt +1 -1
  16. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_5_mp_rank_00_model_states.pt +1 -1
  17. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_6_mp_rank_00_model_states.pt +1 -1
  18. {checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_7_mp_rank_00_model_states.pt +1 -1
  19. checkpoint-1000/latest +1 -0
  20. {checkpoint-600 β†’ checkpoint-1000}/model-00001-of-00002.safetensors +1 -1
  21. {checkpoint-600 β†’ checkpoint-1000}/model-00002-of-00002.safetensors +1 -1
  22. {checkpoint-600 β†’ checkpoint-1000}/model.safetensors.index.json +0 -0
  23. {checkpoint-600 β†’ checkpoint-1000}/rng_state_0.pth +0 -0
  24. {checkpoint-600 β†’ checkpoint-1000}/rng_state_1.pth +0 -0
  25. {checkpoint-600 β†’ checkpoint-1000}/rng_state_2.pth +0 -0
  26. {checkpoint-600 β†’ checkpoint-1000}/rng_state_3.pth +0 -0
  27. {checkpoint-600 β†’ checkpoint-1000}/rng_state_4.pth +0 -0
  28. {checkpoint-600 β†’ checkpoint-1000}/rng_state_5.pth +0 -0
  29. {checkpoint-600 β†’ checkpoint-1000}/rng_state_6.pth +0 -0
  30. {checkpoint-600 β†’ checkpoint-1000}/rng_state_7.pth +0 -0
  31. {checkpoint-600 β†’ checkpoint-1000}/special_tokens_map.json +0 -0
  32. {checkpoint-600 β†’ checkpoint-1000}/tokenizer.json +0 -0
  33. {checkpoint-600 β†’ checkpoint-1000}/tokenizer.model +0 -0
  34. {checkpoint-600 β†’ checkpoint-1000}/tokenizer_config.json +0 -0
  35. {checkpoint-600 β†’ checkpoint-1000}/trainer_state.json +243 -3
  36. {checkpoint-600 β†’ checkpoint-1000}/training_args.bin +0 -0
  37. {checkpoint-600 β†’ checkpoint-1000}/zero_to_fp32.py +0 -0
  38. checkpoint-600/latest +0 -1
  39. runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 +2 -2
{checkpoint-600 β†’ checkpoint-1000}/config.json RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/generation_config.json RENAMED
File without changes
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66eb01a0e90463b9da96b92c71aa113fb0cc395d468d494cbaaf30b0c996c900
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5703ed353a444bcb887496e437dcc7b16f8bdaae831e4e6d4f7ae38a220a5df3
3
  size 10107626487
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a9c27857331e866aa180ca994d30ecdeb152ce200686244186589fd2b6c5399
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65d6937e778c81679ecadc56c59bb90e942cd63ffc72959111931f27b22bea57
3
  size 10107626487
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94cc70fd746640a3708551bd27076cca0c4493fc0ec627adc422dc06e217b4f1
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7078c4a9cb10247701dadff610cea1a671df2436f25c85ce7ec7f830d5864cff
3
  size 10107626487
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53f785e4989207b0d802f03d21811f0310e6887f491becf18aab59dc2a8e34dc
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:632c044c424e36e9ac4d04c3db4d08e0453493e42ad88a6bbfdceadec83fd685
3
  size 10107626487
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:055b189cd86f4ab7885ffd7733ca11939b1c02c7b961887c23ff3765b85b2c26
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c31e28791a65e07de46a9f174419774d315db15b86b1c34705af7a4bc7d99a66
3
  size 10107626487
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b598e84707f1b589d6e34c06fddfcfd1c635efdeb8c34cf5ca13a510f0949837
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07b03db6da0fdf58a5abbc86c3e32353bc4ea225df4814dd9b0c87094cba0a38
3
  size 10107626487
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15519b6df3b12db386d8c11e652af04badf693c055d7c3d49091aad19745e337
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1621abeb507c8e04e54646297d7dfc104a12de8750d5ef2e450703166e2e6a23
3
  size 10107626487
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a17fc5b79f53abceedb3cc4c49fba56335c0c00a6ffa5320a11cacac9ad4e106
3
  size 10107626487
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb1c8f3f16701d2f9fe90ecb59b247f72b3a0bde6d3db2267a5c44807cf77842
3
  size 10107626487
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_0_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c17dd6ac6428c5d94c99148aa50c12df93df8b2462cbf19e8e84d9d333fcbd1e
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4658b6b3c7f7a48dd296534d1ec3dc6d546541ef01a0a3c11c403c23380b9050
3
  size 168086
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_1_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dade817edf76921be5c0796cbcfdf915c904c183acf9c931777f623390eb1f3
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc1c982bfd73913563caf1b20156081f202141ef2b4942de7b04a60a2735599a
3
  size 168086
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_2_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87c1c5de42abbdea85c5b809d6ad1ea6ea81c0292569eae30054d35b7568f9e0
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c42c53335aafb877cfbba5749485ee3cd61082a493faf56f57f8709eb5e6252
3
  size 168086
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_3_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d2bc5f999fb8ccc85ce968f9308e1832cc481d0a36fbfee21231ee0251223b9
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccd7e1a57d35ed384ee88e36e9a684ec710295b14dacde4ea9e97a2e0dd5e2f4
3
  size 168086
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_4_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:287d7ae815a009508c9d5763cdb2b95cdccbcb41047461ddb5942fe4be828cd4
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa0a5cc62566f10c81232feb782efb1f9f3ef3a38928981584b06095d904ad93
3
  size 168086
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_5_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c563d457d97887b6b8ebca491ddbbb5f7d02940671baa85e856e70f2373dd1e8
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8585a4986217ec7db11028c99ffd2fb8f753ad9973841c9b2be8b872fa0f5af7
3
  size 168086
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_6_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbaa5dfcd7813a949984070a065f4d96fd979a8486c397c0fe82e466df55719e
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:689aff8732618feb10643e53dc24544b9e68c3fe79059dd39c4324f792c95f8e
3
  size 168086
{checkpoint-600/global_step600 β†’ checkpoint-1000/global_step1000}/zero_pp_rank_7_mp_rank_00_model_states.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cfe0ea806988e2741a7bc673de2fae1d546b08c34f0cf088d122f77c3098bac
3
  size 168086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:200527511d1203250da233f27a50a276bf203ca991380ba5c7c096f1955a2752
3
  size 168086
checkpoint-1000/latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step1000
{checkpoint-600 β†’ checkpoint-1000}/model-00001-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d5217aa0cdf2a3e998ac2276247c8b950f52424ef53ba6bedbb47fe566772c4
3
  size 9976576392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11913c7f1753ca039f36c4c2b6bbefb982404ad4203fe8ba96a2f892793f3922
3
  size 9976576392
{checkpoint-600 β†’ checkpoint-1000}/model-00002-of-00002.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15c35bdf2b64df17b13e8aa89bf76a22e3fe647fb7a427d6d581f7b18dbbea71
3
  size 3500296504
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9877b1573d48279b0f52a3d16935fdb3bf67e40baa64bc8774bbb8005dd3519
3
  size 3500296504
{checkpoint-600 β†’ checkpoint-1000}/model.safetensors.index.json RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/rng_state_0.pth RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/rng_state_1.pth RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/rng_state_2.pth RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/rng_state_3.pth RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/rng_state_4.pth RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/rng_state_5.pth RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/rng_state_6.pth RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/rng_state_7.pth RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/special_tokens_map.json RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/tokenizer.json RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/tokenizer.model RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/tokenizer_config.json RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2,
5
- "global_step": 600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -366,11 +366,251 @@
366
  "learning_rate": 0.0003,
367
  "loss": 1.706,
368
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  }
370
  ],
371
  "max_steps": 3000,
372
  "num_train_epochs": 9223372036854775807,
373
- "total_flos": 251255586816000.0,
374
  "trial_name": null,
375
  "trial_params": null
376
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3333333333333333,
5
+ "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
366
  "learning_rate": 0.0003,
367
  "loss": 1.706,
368
  "step": 600
369
+ },
370
+ {
371
+ "epoch": 0.2,
372
+ "learning_rate": 0.0003,
373
+ "loss": 1.6328,
374
+ "step": 610
375
+ },
376
+ {
377
+ "epoch": 0.21,
378
+ "learning_rate": 0.0003,
379
+ "loss": 1.6197,
380
+ "step": 620
381
+ },
382
+ {
383
+ "epoch": 0.21,
384
+ "learning_rate": 0.0003,
385
+ "loss": 1.6106,
386
+ "step": 630
387
+ },
388
+ {
389
+ "epoch": 0.21,
390
+ "learning_rate": 0.0003,
391
+ "loss": 1.5649,
392
+ "step": 640
393
+ },
394
+ {
395
+ "epoch": 0.22,
396
+ "learning_rate": 0.0003,
397
+ "loss": 1.5001,
398
+ "step": 650
399
+ },
400
+ {
401
+ "epoch": 0.22,
402
+ "learning_rate": 0.0003,
403
+ "loss": 1.4926,
404
+ "step": 660
405
+ },
406
+ {
407
+ "epoch": 0.22,
408
+ "learning_rate": 0.0003,
409
+ "loss": 1.4682,
410
+ "step": 670
411
+ },
412
+ {
413
+ "epoch": 0.23,
414
+ "learning_rate": 0.0003,
415
+ "loss": 1.442,
416
+ "step": 680
417
+ },
418
+ {
419
+ "epoch": 0.23,
420
+ "learning_rate": 0.0003,
421
+ "loss": 1.4117,
422
+ "step": 690
423
+ },
424
+ {
425
+ "epoch": 0.23,
426
+ "learning_rate": 0.0003,
427
+ "loss": 1.379,
428
+ "step": 700
429
+ },
430
+ {
431
+ "epoch": 0.24,
432
+ "learning_rate": 0.0003,
433
+ "loss": 1.3609,
434
+ "step": 710
435
+ },
436
+ {
437
+ "epoch": 0.24,
438
+ "learning_rate": 0.0003,
439
+ "loss": 1.3224,
440
+ "step": 720
441
+ },
442
+ {
443
+ "epoch": 0.24,
444
+ "learning_rate": 0.0003,
445
+ "loss": 1.2853,
446
+ "step": 730
447
+ },
448
+ {
449
+ "epoch": 0.25,
450
+ "learning_rate": 0.0003,
451
+ "loss": 1.2762,
452
+ "step": 740
453
+ },
454
+ {
455
+ "epoch": 0.25,
456
+ "learning_rate": 0.0003,
457
+ "loss": 1.2265,
458
+ "step": 750
459
+ },
460
+ {
461
+ "epoch": 0.25,
462
+ "learning_rate": 0.0003,
463
+ "loss": 1.2232,
464
+ "step": 760
465
+ },
466
+ {
467
+ "epoch": 0.26,
468
+ "learning_rate": 0.0003,
469
+ "loss": 1.2075,
470
+ "step": 770
471
+ },
472
+ {
473
+ "epoch": 0.26,
474
+ "learning_rate": 0.0003,
475
+ "loss": 1.168,
476
+ "step": 780
477
+ },
478
+ {
479
+ "epoch": 0.26,
480
+ "learning_rate": 0.0003,
481
+ "loss": 1.1472,
482
+ "step": 790
483
+ },
484
+ {
485
+ "epoch": 0.27,
486
+ "learning_rate": 0.0003,
487
+ "loss": 1.118,
488
+ "step": 800
489
+ },
490
+ {
491
+ "epoch": 0.27,
492
+ "learning_rate": 0.0003,
493
+ "loss": 1.1228,
494
+ "step": 810
495
+ },
496
+ {
497
+ "epoch": 0.27,
498
+ "learning_rate": 0.0003,
499
+ "loss": 1.1339,
500
+ "step": 820
501
+ },
502
+ {
503
+ "epoch": 0.28,
504
+ "learning_rate": 0.0003,
505
+ "loss": 1.0853,
506
+ "step": 830
507
+ },
508
+ {
509
+ "epoch": 0.28,
510
+ "learning_rate": 0.0003,
511
+ "loss": 1.0676,
512
+ "step": 840
513
+ },
514
+ {
515
+ "epoch": 0.28,
516
+ "learning_rate": 0.0003,
517
+ "loss": 1.0905,
518
+ "step": 850
519
+ },
520
+ {
521
+ "epoch": 0.29,
522
+ "learning_rate": 0.0003,
523
+ "loss": 1.076,
524
+ "step": 860
525
+ },
526
+ {
527
+ "epoch": 0.29,
528
+ "learning_rate": 0.0003,
529
+ "loss": 1.0202,
530
+ "step": 870
531
+ },
532
+ {
533
+ "epoch": 0.29,
534
+ "learning_rate": 0.0003,
535
+ "loss": 1.0123,
536
+ "step": 880
537
+ },
538
+ {
539
+ "epoch": 0.3,
540
+ "learning_rate": 0.0003,
541
+ "loss": 0.9863,
542
+ "step": 890
543
+ },
544
+ {
545
+ "epoch": 0.3,
546
+ "learning_rate": 0.0003,
547
+ "loss": 0.9347,
548
+ "step": 900
549
+ },
550
+ {
551
+ "epoch": 0.3,
552
+ "learning_rate": 0.0003,
553
+ "loss": 0.9416,
554
+ "step": 910
555
+ },
556
+ {
557
+ "epoch": 0.31,
558
+ "learning_rate": 0.0003,
559
+ "loss": 0.9165,
560
+ "step": 920
561
+ },
562
+ {
563
+ "epoch": 0.31,
564
+ "learning_rate": 0.0003,
565
+ "loss": 0.8996,
566
+ "step": 930
567
+ },
568
+ {
569
+ "epoch": 0.31,
570
+ "learning_rate": 0.0003,
571
+ "loss": 0.8673,
572
+ "step": 940
573
+ },
574
+ {
575
+ "epoch": 0.32,
576
+ "learning_rate": 0.0003,
577
+ "loss": 0.8449,
578
+ "step": 950
579
+ },
580
+ {
581
+ "epoch": 0.32,
582
+ "learning_rate": 0.0003,
583
+ "loss": 0.8468,
584
+ "step": 960
585
+ },
586
+ {
587
+ "epoch": 0.32,
588
+ "learning_rate": 0.0003,
589
+ "loss": 0.817,
590
+ "step": 970
591
+ },
592
+ {
593
+ "epoch": 0.33,
594
+ "learning_rate": 0.0003,
595
+ "loss": 0.7947,
596
+ "step": 980
597
+ },
598
+ {
599
+ "epoch": 0.33,
600
+ "learning_rate": 0.0003,
601
+ "loss": 0.7706,
602
+ "step": 990
603
+ },
604
+ {
605
+ "epoch": 0.33,
606
+ "learning_rate": 0.0003,
607
+ "loss": 0.7357,
608
+ "step": 1000
609
  }
610
  ],
611
  "max_steps": 3000,
612
  "num_train_epochs": 9223372036854775807,
613
+ "total_flos": 418759311360000.0,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
{checkpoint-600 β†’ checkpoint-1000}/training_args.bin RENAMED
File without changes
{checkpoint-600 β†’ checkpoint-1000}/zero_to_fp32.py RENAMED
File without changes
checkpoint-600/latest DELETED
@@ -1 +0,0 @@
1
- global_step600
 
 
runs/Aug22_18-42-03_ip-26-0-150-12/events.out.tfevents.1692729850.ip-26-0-150-12.2895584.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf0edb971b8a90918c57c5b33c853a049ca331754786d7d176c54919449c9aaa
3
- size 18281
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fef6847951e7a7a5d7472c77aeb6d8f614b223d630dfc7f7950ed07e82dfab5
3
+ size 19851