joelniklaus commited on
Commit
9076e22
1 Parent(s): dbccfdf

Training in progress, step 450000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4adbd51f12f37875580b0dc03a54ab186184f725b7059c35d96ffc874e71eecb
3
  size 2693742553
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d9fd74a3d7db813f8faa7594f4fc6b6d334c4b2c5b74ed5d009587c72e18d9a
3
  size 2693742553
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ead6458d093c2c299dbf7a5f0fafd5c3d00c89c8e8d26b98e4f4fbf42560dad2
3
  size 1346893675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8a5e1b62dd95881e02c04a76de9ac1f4faee2bb1833f6d1760bd79e8781b461
3
  size 1346893675
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea2f3eb2c168189aedcdcd44a88bc11907a5ecbe1197c3bd115e4a03797f0f3d
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10d8c5bd5065fbc931ce5e630117e1e3c06957d862169b69f98034128a7eb278
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e4ecef8b58c710458716a0153f8519567dd2a15c4728bc445f0af4d3fb15782
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78e735efa7e40e0dd22dcac5cb3724b0cbe120563d603ea4b62f22b0f40fc602
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.094734,
5
- "global_step": 400000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2470,11 +2470,319 @@
2470
  "eval_samples_per_second": 275.486,
2471
  "eval_steps_per_second": 4.353,
2472
  "step": 400000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2473
  }
2474
  ],
2475
  "max_steps": 1000000,
2476
  "num_train_epochs": 9223372036854775807,
2477
- "total_flos": 2.385998169502227e+19,
2478
  "trial_name": null,
2479
  "trial_params": null
2480
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.144733,
5
+ "global_step": 450000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2470
  "eval_samples_per_second": 275.486,
2471
  "eval_steps_per_second": 4.353,
2472
  "step": 400000
2473
+ },
2474
+ {
2475
+ "epoch": 1.1,
2476
+ "learning_rate": 6.993324133116726e-05,
2477
+ "loss": 0.8315,
2478
+ "step": 401000
2479
+ },
2480
+ {
2481
+ "epoch": 1.1,
2482
+ "learning_rate": 6.978149344295242e-05,
2483
+ "loss": 0.786,
2484
+ "step": 402000
2485
+ },
2486
+ {
2487
+ "epoch": 1.1,
2488
+ "learning_rate": 6.962952922749457e-05,
2489
+ "loss": 0.9338,
2490
+ "step": 403000
2491
+ },
2492
+ {
2493
+ "epoch": 1.1,
2494
+ "learning_rate": 6.947735034665002e-05,
2495
+ "loss": 0.9648,
2496
+ "step": 404000
2497
+ },
2498
+ {
2499
+ "epoch": 1.1,
2500
+ "learning_rate": 6.932495846462261e-05,
2501
+ "loss": 0.8149,
2502
+ "step": 405000
2503
+ },
2504
+ {
2505
+ "epoch": 1.1,
2506
+ "learning_rate": 6.917235524794558e-05,
2507
+ "loss": 0.7549,
2508
+ "step": 406000
2509
+ },
2510
+ {
2511
+ "epoch": 1.1,
2512
+ "learning_rate": 6.901954236546323e-05,
2513
+ "loss": 0.7084,
2514
+ "step": 407000
2515
+ },
2516
+ {
2517
+ "epoch": 1.1,
2518
+ "learning_rate": 6.886652148831279e-05,
2519
+ "loss": 0.7488,
2520
+ "step": 408000
2521
+ },
2522
+ {
2523
+ "epoch": 1.1,
2524
+ "learning_rate": 6.871329428990602e-05,
2525
+ "loss": 0.7645,
2526
+ "step": 409000
2527
+ },
2528
+ {
2529
+ "epoch": 1.1,
2530
+ "learning_rate": 6.855986244591104e-05,
2531
+ "loss": 0.7303,
2532
+ "step": 410000
2533
+ },
2534
+ {
2535
+ "epoch": 1.11,
2536
+ "learning_rate": 6.840622763423391e-05,
2537
+ "loss": 0.6498,
2538
+ "step": 411000
2539
+ },
2540
+ {
2541
+ "epoch": 1.11,
2542
+ "learning_rate": 6.825239153500029e-05,
2543
+ "loss": 0.7127,
2544
+ "step": 412000
2545
+ },
2546
+ {
2547
+ "epoch": 1.11,
2548
+ "learning_rate": 6.809835583053715e-05,
2549
+ "loss": 0.8216,
2550
+ "step": 413000
2551
+ },
2552
+ {
2553
+ "epoch": 1.11,
2554
+ "learning_rate": 6.794412220535426e-05,
2555
+ "loss": 0.817,
2556
+ "step": 414000
2557
+ },
2558
+ {
2559
+ "epoch": 1.11,
2560
+ "learning_rate": 6.778969234612584e-05,
2561
+ "loss": 0.7474,
2562
+ "step": 415000
2563
+ },
2564
+ {
2565
+ "epoch": 1.11,
2566
+ "learning_rate": 6.763506794167208e-05,
2567
+ "loss": 0.7504,
2568
+ "step": 416000
2569
+ },
2570
+ {
2571
+ "epoch": 1.11,
2572
+ "learning_rate": 6.748025068294067e-05,
2573
+ "loss": 0.8277,
2574
+ "step": 417000
2575
+ },
2576
+ {
2577
+ "epoch": 1.11,
2578
+ "learning_rate": 6.732524226298841e-05,
2579
+ "loss": 0.9038,
2580
+ "step": 418000
2581
+ },
2582
+ {
2583
+ "epoch": 1.11,
2584
+ "learning_rate": 6.71700443769625e-05,
2585
+ "loss": 0.9412,
2586
+ "step": 419000
2587
+ },
2588
+ {
2589
+ "epoch": 1.11,
2590
+ "learning_rate": 6.701465872208216e-05,
2591
+ "loss": 0.8328,
2592
+ "step": 420000
2593
+ },
2594
+ {
2595
+ "epoch": 1.12,
2596
+ "learning_rate": 6.685908699762002e-05,
2597
+ "loss": 0.7013,
2598
+ "step": 421000
2599
+ },
2600
+ {
2601
+ "epoch": 1.12,
2602
+ "learning_rate": 6.670333090488356e-05,
2603
+ "loss": 0.8064,
2604
+ "step": 422000
2605
+ },
2606
+ {
2607
+ "epoch": 1.12,
2608
+ "learning_rate": 6.654739214719641e-05,
2609
+ "loss": 0.8228,
2610
+ "step": 423000
2611
+ },
2612
+ {
2613
+ "epoch": 1.12,
2614
+ "learning_rate": 6.639127242987988e-05,
2615
+ "loss": 0.831,
2616
+ "step": 424000
2617
+ },
2618
+ {
2619
+ "epoch": 1.12,
2620
+ "learning_rate": 6.623497346023418e-05,
2621
+ "loss": 0.606,
2622
+ "step": 425000
2623
+ },
2624
+ {
2625
+ "epoch": 1.12,
2626
+ "learning_rate": 6.607849694751977e-05,
2627
+ "loss": 0.596,
2628
+ "step": 426000
2629
+ },
2630
+ {
2631
+ "epoch": 1.12,
2632
+ "learning_rate": 6.592184460293877e-05,
2633
+ "loss": 0.7339,
2634
+ "step": 427000
2635
+ },
2636
+ {
2637
+ "epoch": 1.12,
2638
+ "learning_rate": 6.576501813961609e-05,
2639
+ "loss": 0.8095,
2640
+ "step": 428000
2641
+ },
2642
+ {
2643
+ "epoch": 1.12,
2644
+ "learning_rate": 6.56080192725808e-05,
2645
+ "loss": 0.8368,
2646
+ "step": 429000
2647
+ },
2648
+ {
2649
+ "epoch": 1.12,
2650
+ "learning_rate": 6.545084971874738e-05,
2651
+ "loss": 0.6778,
2652
+ "step": 430000
2653
+ },
2654
+ {
2655
+ "epoch": 1.13,
2656
+ "learning_rate": 6.529351119689688e-05,
2657
+ "loss": 0.602,
2658
+ "step": 431000
2659
+ },
2660
+ {
2661
+ "epoch": 1.13,
2662
+ "learning_rate": 6.513600542765817e-05,
2663
+ "loss": 0.8553,
2664
+ "step": 432000
2665
+ },
2666
+ {
2667
+ "epoch": 1.13,
2668
+ "learning_rate": 6.497833413348909e-05,
2669
+ "loss": 0.9017,
2670
+ "step": 433000
2671
+ },
2672
+ {
2673
+ "epoch": 1.13,
2674
+ "learning_rate": 6.48204990386577e-05,
2675
+ "loss": 0.8937,
2676
+ "step": 434000
2677
+ },
2678
+ {
2679
+ "epoch": 1.13,
2680
+ "learning_rate": 6.466250186922325e-05,
2681
+ "loss": 0.7829,
2682
+ "step": 435000
2683
+ },
2684
+ {
2685
+ "epoch": 1.13,
2686
+ "learning_rate": 6.450434435301751e-05,
2687
+ "loss": 0.7589,
2688
+ "step": 436000
2689
+ },
2690
+ {
2691
+ "epoch": 1.13,
2692
+ "learning_rate": 6.43460282196257e-05,
2693
+ "loss": 0.893,
2694
+ "step": 437000
2695
+ },
2696
+ {
2697
+ "epoch": 1.13,
2698
+ "learning_rate": 6.418755520036775e-05,
2699
+ "loss": 0.8418,
2700
+ "step": 438000
2701
+ },
2702
+ {
2703
+ "epoch": 1.13,
2704
+ "learning_rate": 6.402892702827916e-05,
2705
+ "loss": 0.8263,
2706
+ "step": 439000
2707
+ },
2708
+ {
2709
+ "epoch": 1.13,
2710
+ "learning_rate": 6.387014543809223e-05,
2711
+ "loss": 0.6969,
2712
+ "step": 440000
2713
+ },
2714
+ {
2715
+ "epoch": 1.14,
2716
+ "learning_rate": 6.371121216621698e-05,
2717
+ "loss": 0.5887,
2718
+ "step": 441000
2719
+ },
2720
+ {
2721
+ "epoch": 1.14,
2722
+ "learning_rate": 6.355212895072223e-05,
2723
+ "loss": 0.723,
2724
+ "step": 442000
2725
+ },
2726
+ {
2727
+ "epoch": 1.14,
2728
+ "learning_rate": 6.339289753131649e-05,
2729
+ "loss": 0.7757,
2730
+ "step": 443000
2731
+ },
2732
+ {
2733
+ "epoch": 1.14,
2734
+ "learning_rate": 6.323351964932908e-05,
2735
+ "loss": 0.7438,
2736
+ "step": 444000
2737
+ },
2738
+ {
2739
+ "epoch": 1.14,
2740
+ "learning_rate": 6.307399704769099e-05,
2741
+ "loss": 0.6629,
2742
+ "step": 445000
2743
+ },
2744
+ {
2745
+ "epoch": 1.14,
2746
+ "learning_rate": 6.291433147091583e-05,
2747
+ "loss": 0.6943,
2748
+ "step": 446000
2749
+ },
2750
+ {
2751
+ "epoch": 1.14,
2752
+ "learning_rate": 6.275452466508077e-05,
2753
+ "loss": 0.769,
2754
+ "step": 447000
2755
+ },
2756
+ {
2757
+ "epoch": 1.14,
2758
+ "learning_rate": 6.259457837780742e-05,
2759
+ "loss": 0.8287,
2760
+ "step": 448000
2761
+ },
2762
+ {
2763
+ "epoch": 1.14,
2764
+ "learning_rate": 6.243449435824276e-05,
2765
+ "loss": 0.8262,
2766
+ "step": 449000
2767
+ },
2768
+ {
2769
+ "epoch": 1.14,
2770
+ "learning_rate": 6.227427435703997e-05,
2771
+ "loss": 0.7464,
2772
+ "step": 450000
2773
+ },
2774
+ {
2775
+ "epoch": 1.14,
2776
+ "eval_loss": 0.45768678188323975,
2777
+ "eval_runtime": 28.0661,
2778
+ "eval_samples_per_second": 178.151,
2779
+ "eval_steps_per_second": 2.815,
2780
+ "step": 450000
2781
  }
2782
  ],
2783
  "max_steps": 1000000,
2784
  "num_train_epochs": 9223372036854775807,
2785
+ "total_flos": 2.684243932962462e+19,
2786
  "trial_name": null,
2787
  "trial_params": null
2788
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0b02673b13f0fc59a49044f6f1aa1cfe1a6854d2087f76c0de0776564c78579
3
- size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f147202664426d6d9fdfd0b21608081a18482651bb03dac1298bc70b03736e88
3
+ size 3503
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ead6458d093c2c299dbf7a5f0fafd5c3d00c89c8e8d26b98e4f4fbf42560dad2
3
  size 1346893675
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8a5e1b62dd95881e02c04a76de9ac1f4faee2bb1833f6d1760bd79e8781b461
3
  size 1346893675
runs/Apr07_17-56-12_t1v-n-7a44a9fa-w-0/1680890667.0265427/events.out.tfevents.1680890667.t1v-n-7a44a9fa-w-0.506706.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf326db418547e90b063718701d268428b727a818a8711e385b7d51bb6ee3de
3
+ size 5494
runs/Apr07_17-56-12_t1v-n-7a44a9fa-w-0/events.out.tfevents.1680890667.t1v-n-7a44a9fa-w-0.506706.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d255afb91fa33400a4d29ba888bd907306caeb39a18e8b1914abc3ef5beccab0
3
+ size 12106
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0b02673b13f0fc59a49044f6f1aa1cfe1a6854d2087f76c0de0776564c78579
3
- size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f147202664426d6d9fdfd0b21608081a18482651bb03dac1298bc70b03736e88
3
+ size 3503