TracyTank commited on
Commit
58a33f5
·
verified ·
1 Parent(s): 1434d0a

Training in progress, step 88, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4b99b6435bdbda33125afd7fe238d3ed28f782b15ba364c99d51263a904f084
3
  size 13587864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7895063bc153429fb44800b2c3252440c135b7a6bc7fa23b3cc0f163c4638654
3
  size 13587864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7638555db4d48919c88fa87a5bb83b47fb091a92f85aa1fe191201b7954aca4b
3
  size 27273018
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcb0cda1ac662978d3072180eff8a5caa6bf351e8ed6212498565e562b3dc76f
3
  size 27273018
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a11260a573a7e812b0f124ada989da776530c9fb5fbad96a536627e3c118a7e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4700b6487422ef8ab6e86454d16ee872c60505b20ded39d6a1b9e8be3d2168ab
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:446f393ad01ec616f7cc05c80366d751e89dbc85b76d83870c310e59e6c835f3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd443eb75c362a6b4a0030ef7f8a0b5f18deaf25f00080ceab6771adbb305bf
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:999c82ad0e9a3671d352858fcb74199f3c15fb5b86fff3307d5d249836ea915b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ce5e023beb7bd14d822f65f919e0026ee018c9b2f1369b8ca98d304a04fb864
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5485f013c0be7e42efb4385ed27f58c1a8ab907eab8f5ae8c382b84b1a3d7c12
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e7abd048f13572298538e3bfc0ceb6d606481ac0501f87d4647218380bab91
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f216e6b722b1c31e52ee2520a6fefbf2571d53970bd2683b508b5b2d9eb6051b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57777f406d2869170e5875b2991a1e3a92ec1440f7320c85a571752c42f8b802
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.11645537614822388,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 1.1429587482219061,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,280 @@
381
  "eval_samples_per_second": 174.617,
382
  "eval_steps_per_second": 45.4,
383
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -404,12 +678,12 @@
404
  "should_evaluate": false,
405
  "should_log": false,
406
  "should_save": true,
407
- "should_training_stop": false
408
  },
409
  "attributes": {}
410
  }
411
  },
412
- "total_flos": 2.22140137734144e+16,
413
  "train_batch_size": 1,
414
  "trial_name": null,
415
  "trial_params": null
 
1
  {
2
  "best_metric": 0.11645537614822388,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 2.012802275960171,
5
  "eval_steps": 25,
6
+ "global_step": 88,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 174.617,
382
  "eval_steps_per_second": 45.4,
383
  "step": 50
384
+ },
385
+ {
386
+ "epoch": 1.1657183499288761,
387
+ "grad_norm": 0.35609501600265503,
388
+ "learning_rate": 0.00013564692629648982,
389
+ "loss": 0.0858,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 1.1884779516358464,
394
+ "grad_norm": 0.1918356865644455,
395
+ "learning_rate": 0.00013085399337712307,
396
+ "loss": 0.0959,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 1.2112375533428166,
401
+ "grad_norm": 0.2323223352432251,
402
+ "learning_rate": 0.00012610662158569293,
403
+ "loss": 0.0919,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 1.2339971550497866,
408
+ "grad_norm": 0.2522870600223541,
409
+ "learning_rate": 0.0001214111453558548,
410
+ "loss": 0.0706,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 1.2567567567567568,
415
+ "grad_norm": 0.29997286200523376,
416
+ "learning_rate": 0.00011677382987684708,
417
+ "loss": 0.1814,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 1.2795163584637268,
422
+ "grad_norm": 0.30940982699394226,
423
+ "learning_rate": 0.00011220086273382896,
424
+ "loss": 0.1421,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 1.302275960170697,
429
+ "grad_norm": 0.37868553400039673,
430
+ "learning_rate": 0.00010769834565176498,
431
+ "loss": 0.1265,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 1.3250355618776672,
436
+ "grad_norm": 0.23006294667720795,
437
+ "learning_rate": 0.0001032722863538738,
438
+ "loss": 0.1177,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 1.3477951635846372,
443
+ "grad_norm": 0.26485735177993774,
444
+ "learning_rate": 9.892859054550347e-05,
445
+ "loss": 0.1212,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 1.3705547652916075,
450
+ "grad_norm": 0.31136396527290344,
451
+ "learning_rate": 9.467305403412942e-05,
452
+ "loss": 0.1419,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 1.3933143669985775,
457
+ "grad_norm": 0.2314603179693222,
458
+ "learning_rate": 9.05113549959898e-05,
459
+ "loss": 0.0957,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 1.4160739687055477,
464
+ "grad_norm": 0.19551394879817963,
465
+ "learning_rate": 8.644904639967639e-05,
466
+ "loss": 0.0842,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 1.438833570412518,
471
+ "grad_norm": 0.28223028779029846,
472
+ "learning_rate": 8.249154859679033e-05,
473
+ "loss": 0.0833,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 1.461593172119488,
478
+ "grad_norm": 0.2497485727071762,
479
+ "learning_rate": 7.864414208954971e-05,
480
+ "loss": 0.0865,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 1.484352773826458,
485
+ "grad_norm": 0.20309872925281525,
486
+ "learning_rate": 7.491196048499769e-05,
487
+ "loss": 0.0712,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 1.5071123755334281,
492
+ "grad_norm": 0.33666595816612244,
493
+ "learning_rate": 7.12999836452144e-05,
494
+ "loss": 0.135,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 1.5298719772403984,
499
+ "grad_norm": 0.24082504212856293,
500
+ "learning_rate": 6.781303104267059e-05,
501
+ "loss": 0.1203,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 1.5526315789473686,
506
+ "grad_norm": 0.1645207554101944,
507
+ "learning_rate": 6.445575532958945e-05,
508
+ "loss": 0.0944,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 1.5753911806543386,
513
+ "grad_norm": 0.1779041886329651,
514
+ "learning_rate": 6.123263612989815e-05,
515
+ "loss": 0.1227,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 1.5981507823613086,
520
+ "grad_norm": 0.21718856692314148,
521
+ "learning_rate": 5.81479740620507e-05,
522
+ "loss": 0.1008,
523
+ "step": 70
524
+ },
525
+ {
526
+ "epoch": 1.6209103840682788,
527
+ "grad_norm": 0.23834489285945892,
528
+ "learning_rate": 5.520588500069867e-05,
529
+ "loss": 0.124,
530
+ "step": 71
531
+ },
532
+ {
533
+ "epoch": 1.643669985775249,
534
+ "grad_norm": 0.22460603713989258,
535
+ "learning_rate": 5.241029458486649e-05,
536
+ "loss": 0.0758,
537
+ "step": 72
538
+ },
539
+ {
540
+ "epoch": 1.666429587482219,
541
+ "grad_norm": 0.13075967133045197,
542
+ "learning_rate": 4.976493297995823e-05,
543
+ "loss": 0.0748,
544
+ "step": 73
545
+ },
546
+ {
547
+ "epoch": 1.689189189189189,
548
+ "grad_norm": 0.1683950275182724,
549
+ "learning_rate": 4.7273329900585954e-05,
550
+ "loss": 0.0806,
551
+ "step": 74
552
+ },
553
+ {
554
+ "epoch": 1.7119487908961593,
555
+ "grad_norm": 0.13967812061309814,
556
+ "learning_rate": 4.4938809900859955e-05,
557
+ "loss": 0.0732,
558
+ "step": 75
559
+ },
560
+ {
561
+ "epoch": 1.7119487908961593,
562
+ "eval_loss": 0.09383056312799454,
563
+ "eval_runtime": 0.284,
564
+ "eval_samples_per_second": 176.043,
565
+ "eval_steps_per_second": 45.771,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 1.7347083926031295,
570
+ "grad_norm": 0.1622483730316162,
571
+ "learning_rate": 4.2764487938425205e-05,
572
+ "loss": 0.0564,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 1.7574679943100997,
577
+ "grad_norm": 0.26513513922691345,
578
+ "learning_rate": 4.0753265218163486e-05,
579
+ "loss": 0.1552,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 1.7802275960170697,
584
+ "grad_norm": 0.25839948654174805,
585
+ "learning_rate": 3.890782532110603e-05,
586
+ "loss": 0.1107,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 1.8029871977240397,
591
+ "grad_norm": 0.24427573382854462,
592
+ "learning_rate": 3.7230630623722724e-05,
593
+ "loss": 0.1011,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 1.82574679943101,
598
+ "grad_norm": 0.22686628997325897,
599
+ "learning_rate": 3.572391901236521e-05,
600
+ "loss": 0.1001,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 1.8485064011379801,
605
+ "grad_norm": 0.15434470772743225,
606
+ "learning_rate": 3.4389700897247595e-05,
607
+ "loss": 0.0999,
608
+ "step": 81
609
+ },
610
+ {
611
+ "epoch": 1.8712660028449504,
612
+ "grad_norm": 0.15275193750858307,
613
+ "learning_rate": 3.322975652994985e-05,
614
+ "loss": 0.0832,
615
+ "step": 82
616
+ },
617
+ {
618
+ "epoch": 1.8940256045519204,
619
+ "grad_norm": 0.28379786014556885,
620
+ "learning_rate": 3.2245633628022074e-05,
621
+ "loss": 0.0678,
622
+ "step": 83
623
+ },
624
+ {
625
+ "epoch": 1.9167852062588904,
626
+ "grad_norm": 0.11858035624027252,
627
+ "learning_rate": 3.143864530986012e-05,
628
+ "loss": 0.071,
629
+ "step": 84
630
+ },
631
+ {
632
+ "epoch": 1.9395448079658606,
633
+ "grad_norm": 0.1222662478685379,
634
+ "learning_rate": 3.08098683426073e-05,
635
+ "loss": 0.0855,
636
+ "step": 85
637
+ },
638
+ {
639
+ "epoch": 1.9623044096728308,
640
+ "grad_norm": 0.2606646418571472,
641
+ "learning_rate": 3.0360141705420527e-05,
642
+ "loss": 0.0727,
643
+ "step": 86
644
+ },
645
+ {
646
+ "epoch": 1.9850640113798008,
647
+ "grad_norm": 0.21433259546756744,
648
+ "learning_rate": 3.009006547001768e-05,
649
+ "loss": 0.0841,
650
+ "step": 87
651
+ },
652
+ {
653
+ "epoch": 2.012802275960171,
654
+ "grad_norm": 0.21917670965194702,
655
+ "learning_rate": 2.9999999999999997e-05,
656
+ "loss": 0.1182,
657
+ "step": 88
658
  }
659
  ],
660
  "logging_steps": 1,
 
678
  "should_evaluate": false,
679
  "should_log": false,
680
  "should_save": true,
681
+ "should_training_stop": true
682
  },
683
  "attributes": {}
684
  }
685
  },
686
+ "total_flos": 3.909666424120934e+16,
687
  "train_batch_size": 1,
688
  "trial_name": null,
689
  "trial_params": null