nicolasdupuisroy commited on
Commit
c05124b
1 Parent(s): a65ec9c

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
 
 
5
  - generated_from_trainer
6
  datasets:
7
  - imagefolder
@@ -22,7 +24,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.7538461538461538
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +34,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 1.3683
36
- - Accuracy: 0.7538
37
 
38
  ## Model description
39
 
 
2
  license: apache-2.0
3
  base_model: google/vit-base-patch16-224-in21k
4
  tags:
5
+ - image-classification
6
+ - vision
7
  - generated_from_trainer
8
  datasets:
9
  - imagefolder
 
24
  metrics:
25
  - name: Accuracy
26
  type: accuracy
27
+ value: 0.7615384615384615
28
  ---
29
 
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
34
 
35
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the imagefolder dataset.
36
  It achieves the following results on the evaluation set:
37
+ - Loss: 1.3440
38
+ - Accuracy: 0.7615
39
 
40
  ## Model description
41
 
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 200.0,
3
- "eval_accuracy": 0.7769230769230769,
4
- "eval_loss": 1.6097954511642456,
5
- "eval_runtime": 2.2046,
6
- "eval_samples_per_second": 58.968,
7
- "eval_steps_per_second": 0.907,
8
- "train_loss": 0.39147548845836094,
9
- "train_runtime": 3403.5317,
10
- "train_samples_per_second": 30.556,
11
- "train_steps_per_second": 0.411
12
  }
 
1
  {
2
+ "epoch": 250.0,
3
+ "eval_accuracy": 0.7615384615384615,
4
+ "eval_loss": 1.3439607620239258,
5
+ "eval_runtime": 2.1945,
6
+ "eval_samples_per_second": 59.239,
7
+ "eval_steps_per_second": 0.911,
8
+ "train_loss": 0.09994067628043038,
9
+ "train_runtime": 2169.5998,
10
+ "train_samples_per_second": 59.919,
11
+ "train_steps_per_second": 0.807
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 200.0,
3
- "eval_accuracy": 0.7769230769230769,
4
- "eval_loss": 1.6097954511642456,
5
- "eval_runtime": 2.2046,
6
- "eval_samples_per_second": 58.968,
7
- "eval_steps_per_second": 0.907
8
  }
 
1
  {
2
+ "epoch": 250.0,
3
+ "eval_accuracy": 0.7615384615384615,
4
+ "eval_loss": 1.3439607620239258,
5
+ "eval_runtime": 2.1945,
6
+ "eval_samples_per_second": 59.239,
7
+ "eval_steps_per_second": 0.911
8
  }
runs/Jan17_22-32-09_c6ad14a30b7d/events.out.tfevents.1705532949.c6ad14a30b7d.23778.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7183f6d2fc8c073961f22def15c7cc3b4a4196910ca6a9a40c935f4fd32b53ce
3
+ size 411
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 200.0,
3
- "train_loss": 0.39147548845836094,
4
- "train_runtime": 3403.5317,
5
- "train_samples_per_second": 30.556,
6
- "train_steps_per_second": 0.411
7
  }
 
1
  {
2
+ "epoch": 250.0,
3
+ "train_loss": 0.09994067628043038,
4
+ "train_runtime": 2169.5998,
5
+ "train_samples_per_second": 59.919,
6
+ "train_steps_per_second": 0.807
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.6097954511642456,
3
- "best_model_checkpoint": "./outputs_letter3/checkpoint-1386",
4
- "epoch": 200.0,
5
  "eval_steps": 500,
6
- "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2674,14 +2674,683 @@
2674
  "train_runtime": 3403.5317,
2675
  "train_samples_per_second": 30.556,
2676
  "train_steps_per_second": 0.411
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2677
  }
2678
  ],
2679
  "logging_steps": 10,
2680
- "max_steps": 1400,
2681
  "num_input_tokens_seen": 0,
2682
- "num_train_epochs": 200,
2683
  "save_steps": 500,
2684
- "total_flos": 8.062778508115968e+18,
2685
  "train_batch_size": 80,
2686
  "trial_name": null,
2687
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.3439607620239258,
3
+ "best_model_checkpoint": "./outputs_letter3/checkpoint-1736",
4
+ "epoch": 250.0,
5
  "eval_steps": 500,
6
+ "global_step": 1750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2674
  "train_runtime": 3403.5317,
2675
  "train_samples_per_second": 30.556,
2676
  "train_steps_per_second": 0.411
2677
+ },
2678
+ {
2679
+ "epoch": 201.0,
2680
+ "eval_accuracy": 0.7461538461538462,
2681
+ "eval_loss": 1.6449843645095825,
2682
+ "eval_runtime": 2.095,
2683
+ "eval_samples_per_second": 62.053,
2684
+ "eval_steps_per_second": 0.955,
2685
+ "step": 1407
2686
+ },
2687
+ {
2688
+ "epoch": 201.43,
2689
+ "learning_rate": 1.988571428571429e-05,
2690
+ "loss": 0.706,
2691
+ "step": 1410
2692
+ },
2693
+ {
2694
+ "epoch": 202.0,
2695
+ "eval_accuracy": 0.7538461538461538,
2696
+ "eval_loss": 1.646350622177124,
2697
+ "eval_runtime": 2.1984,
2698
+ "eval_samples_per_second": 59.134,
2699
+ "eval_steps_per_second": 0.91,
2700
+ "step": 1414
2701
+ },
2702
+ {
2703
+ "epoch": 202.86,
2704
+ "learning_rate": 1.9771428571428574e-05,
2705
+ "loss": 0.6746,
2706
+ "step": 1420
2707
+ },
2708
+ {
2709
+ "epoch": 203.0,
2710
+ "eval_accuracy": 0.7615384615384615,
2711
+ "eval_loss": 1.6431909799575806,
2712
+ "eval_runtime": 2.1397,
2713
+ "eval_samples_per_second": 60.755,
2714
+ "eval_steps_per_second": 0.935,
2715
+ "step": 1421
2716
+ },
2717
+ {
2718
+ "epoch": 204.0,
2719
+ "eval_accuracy": 0.7615384615384615,
2720
+ "eval_loss": 1.6220970153808594,
2721
+ "eval_runtime": 2.2125,
2722
+ "eval_samples_per_second": 58.756,
2723
+ "eval_steps_per_second": 0.904,
2724
+ "step": 1428
2725
+ },
2726
+ {
2727
+ "epoch": 204.29,
2728
+ "learning_rate": 1.9657142857142858e-05,
2729
+ "loss": 0.6282,
2730
+ "step": 1430
2731
+ },
2732
+ {
2733
+ "epoch": 205.0,
2734
+ "eval_accuracy": 0.7769230769230769,
2735
+ "eval_loss": 1.5988324880599976,
2736
+ "eval_runtime": 2.3558,
2737
+ "eval_samples_per_second": 55.182,
2738
+ "eval_steps_per_second": 0.849,
2739
+ "step": 1435
2740
+ },
2741
+ {
2742
+ "epoch": 205.71,
2743
+ "learning_rate": 1.9542857142857143e-05,
2744
+ "loss": 0.618,
2745
+ "step": 1440
2746
+ },
2747
+ {
2748
+ "epoch": 206.0,
2749
+ "eval_accuracy": 0.7461538461538462,
2750
+ "eval_loss": 1.6031513214111328,
2751
+ "eval_runtime": 2.2121,
2752
+ "eval_samples_per_second": 58.767,
2753
+ "eval_steps_per_second": 0.904,
2754
+ "step": 1442
2755
+ },
2756
+ {
2757
+ "epoch": 207.0,
2758
+ "eval_accuracy": 0.7923076923076923,
2759
+ "eval_loss": 1.561190128326416,
2760
+ "eval_runtime": 2.2132,
2761
+ "eval_samples_per_second": 58.739,
2762
+ "eval_steps_per_second": 0.904,
2763
+ "step": 1449
2764
+ },
2765
+ {
2766
+ "epoch": 207.14,
2767
+ "learning_rate": 1.942857142857143e-05,
2768
+ "loss": 0.6053,
2769
+ "step": 1450
2770
+ },
2771
+ {
2772
+ "epoch": 208.0,
2773
+ "eval_accuracy": 0.7769230769230769,
2774
+ "eval_loss": 1.5888561010360718,
2775
+ "eval_runtime": 2.1847,
2776
+ "eval_samples_per_second": 59.504,
2777
+ "eval_steps_per_second": 0.915,
2778
+ "step": 1456
2779
+ },
2780
+ {
2781
+ "epoch": 208.57,
2782
+ "learning_rate": 1.9314285714285718e-05,
2783
+ "loss": 0.5977,
2784
+ "step": 1460
2785
+ },
2786
+ {
2787
+ "epoch": 209.0,
2788
+ "eval_accuracy": 0.7692307692307693,
2789
+ "eval_loss": 1.566961646080017,
2790
+ "eval_runtime": 2.2023,
2791
+ "eval_samples_per_second": 59.03,
2792
+ "eval_steps_per_second": 0.908,
2793
+ "step": 1463
2794
+ },
2795
+ {
2796
+ "epoch": 210.0,
2797
+ "learning_rate": 1.9200000000000003e-05,
2798
+ "loss": 0.6131,
2799
+ "step": 1470
2800
+ },
2801
+ {
2802
+ "epoch": 210.0,
2803
+ "eval_accuracy": 0.7461538461538462,
2804
+ "eval_loss": 1.5975637435913086,
2805
+ "eval_runtime": 2.3397,
2806
+ "eval_samples_per_second": 55.564,
2807
+ "eval_steps_per_second": 0.855,
2808
+ "step": 1470
2809
+ },
2810
+ {
2811
+ "epoch": 211.0,
2812
+ "eval_accuracy": 0.7538461538461538,
2813
+ "eval_loss": 1.5593103170394897,
2814
+ "eval_runtime": 2.2361,
2815
+ "eval_samples_per_second": 58.136,
2816
+ "eval_steps_per_second": 0.894,
2817
+ "step": 1477
2818
+ },
2819
+ {
2820
+ "epoch": 211.43,
2821
+ "learning_rate": 1.9085714285714287e-05,
2822
+ "loss": 0.5753,
2823
+ "step": 1480
2824
+ },
2825
+ {
2826
+ "epoch": 212.0,
2827
+ "eval_accuracy": 0.7384615384615385,
2828
+ "eval_loss": 1.5974284410476685,
2829
+ "eval_runtime": 2.3233,
2830
+ "eval_samples_per_second": 55.955,
2831
+ "eval_steps_per_second": 0.861,
2832
+ "step": 1484
2833
+ },
2834
+ {
2835
+ "epoch": 212.86,
2836
+ "learning_rate": 1.8971428571428575e-05,
2837
+ "loss": 0.576,
2838
+ "step": 1490
2839
+ },
2840
+ {
2841
+ "epoch": 213.0,
2842
+ "eval_accuracy": 0.7769230769230769,
2843
+ "eval_loss": 1.573266863822937,
2844
+ "eval_runtime": 2.2312,
2845
+ "eval_samples_per_second": 58.264,
2846
+ "eval_steps_per_second": 0.896,
2847
+ "step": 1491
2848
+ },
2849
+ {
2850
+ "epoch": 214.0,
2851
+ "eval_accuracy": 0.7769230769230769,
2852
+ "eval_loss": 1.497517466545105,
2853
+ "eval_runtime": 2.3929,
2854
+ "eval_samples_per_second": 54.328,
2855
+ "eval_steps_per_second": 0.836,
2856
+ "step": 1498
2857
+ },
2858
+ {
2859
+ "epoch": 214.29,
2860
+ "learning_rate": 1.885714285714286e-05,
2861
+ "loss": 0.5744,
2862
+ "step": 1500
2863
+ },
2864
+ {
2865
+ "epoch": 215.0,
2866
+ "eval_accuracy": 0.7461538461538462,
2867
+ "eval_loss": 1.562217116355896,
2868
+ "eval_runtime": 2.2169,
2869
+ "eval_samples_per_second": 58.642,
2870
+ "eval_steps_per_second": 0.902,
2871
+ "step": 1505
2872
+ },
2873
+ {
2874
+ "epoch": 215.71,
2875
+ "learning_rate": 1.8742857142857143e-05,
2876
+ "loss": 0.5658,
2877
+ "step": 1510
2878
+ },
2879
+ {
2880
+ "epoch": 216.0,
2881
+ "eval_accuracy": 0.7538461538461538,
2882
+ "eval_loss": 1.5334222316741943,
2883
+ "eval_runtime": 2.3054,
2884
+ "eval_samples_per_second": 56.388,
2885
+ "eval_steps_per_second": 0.868,
2886
+ "step": 1512
2887
+ },
2888
+ {
2889
+ "epoch": 217.0,
2890
+ "eval_accuracy": 0.7538461538461538,
2891
+ "eval_loss": 1.5438138246536255,
2892
+ "eval_runtime": 2.3277,
2893
+ "eval_samples_per_second": 55.849,
2894
+ "eval_steps_per_second": 0.859,
2895
+ "step": 1519
2896
+ },
2897
+ {
2898
+ "epoch": 217.14,
2899
+ "learning_rate": 1.8628571428571428e-05,
2900
+ "loss": 0.5507,
2901
+ "step": 1520
2902
+ },
2903
+ {
2904
+ "epoch": 218.0,
2905
+ "eval_accuracy": 0.7307692307692307,
2906
+ "eval_loss": 1.5617175102233887,
2907
+ "eval_runtime": 2.3945,
2908
+ "eval_samples_per_second": 54.291,
2909
+ "eval_steps_per_second": 0.835,
2910
+ "step": 1526
2911
+ },
2912
+ {
2913
+ "epoch": 218.57,
2914
+ "learning_rate": 1.8514285714285716e-05,
2915
+ "loss": 0.5474,
2916
+ "step": 1530
2917
+ },
2918
+ {
2919
+ "epoch": 219.0,
2920
+ "eval_accuracy": 0.7461538461538462,
2921
+ "eval_loss": 1.542279601097107,
2922
+ "eval_runtime": 2.1837,
2923
+ "eval_samples_per_second": 59.533,
2924
+ "eval_steps_per_second": 0.916,
2925
+ "step": 1533
2926
+ },
2927
+ {
2928
+ "epoch": 220.0,
2929
+ "learning_rate": 1.8400000000000003e-05,
2930
+ "loss": 0.5095,
2931
+ "step": 1540
2932
+ },
2933
+ {
2934
+ "epoch": 220.0,
2935
+ "eval_accuracy": 0.7769230769230769,
2936
+ "eval_loss": 1.509877324104309,
2937
+ "eval_runtime": 2.1925,
2938
+ "eval_samples_per_second": 59.293,
2939
+ "eval_steps_per_second": 0.912,
2940
+ "step": 1540
2941
+ },
2942
+ {
2943
+ "epoch": 221.0,
2944
+ "eval_accuracy": 0.7692307692307693,
2945
+ "eval_loss": 1.5009371042251587,
2946
+ "eval_runtime": 2.1917,
2947
+ "eval_samples_per_second": 59.315,
2948
+ "eval_steps_per_second": 0.913,
2949
+ "step": 1547
2950
+ },
2951
+ {
2952
+ "epoch": 221.43,
2953
+ "learning_rate": 1.8285714285714288e-05,
2954
+ "loss": 0.5263,
2955
+ "step": 1550
2956
+ },
2957
+ {
2958
+ "epoch": 222.0,
2959
+ "eval_accuracy": 0.7615384615384615,
2960
+ "eval_loss": 1.4717506170272827,
2961
+ "eval_runtime": 2.1826,
2962
+ "eval_samples_per_second": 59.561,
2963
+ "eval_steps_per_second": 0.916,
2964
+ "step": 1554
2965
+ },
2966
+ {
2967
+ "epoch": 222.86,
2968
+ "learning_rate": 1.8171428571428572e-05,
2969
+ "loss": 0.4934,
2970
+ "step": 1560
2971
+ },
2972
+ {
2973
+ "epoch": 223.0,
2974
+ "eval_accuracy": 0.7461538461538462,
2975
+ "eval_loss": 1.5150481462478638,
2976
+ "eval_runtime": 2.1689,
2977
+ "eval_samples_per_second": 59.939,
2978
+ "eval_steps_per_second": 0.922,
2979
+ "step": 1561
2980
+ },
2981
+ {
2982
+ "epoch": 224.0,
2983
+ "eval_accuracy": 0.7615384615384615,
2984
+ "eval_loss": 1.5133140087127686,
2985
+ "eval_runtime": 2.2493,
2986
+ "eval_samples_per_second": 57.795,
2987
+ "eval_steps_per_second": 0.889,
2988
+ "step": 1568
2989
+ },
2990
+ {
2991
+ "epoch": 224.29,
2992
+ "learning_rate": 1.8057142857142857e-05,
2993
+ "loss": 0.4935,
2994
+ "step": 1570
2995
+ },
2996
+ {
2997
+ "epoch": 225.0,
2998
+ "eval_accuracy": 0.7461538461538462,
2999
+ "eval_loss": 1.5053058862686157,
3000
+ "eval_runtime": 2.2888,
3001
+ "eval_samples_per_second": 56.799,
3002
+ "eval_steps_per_second": 0.874,
3003
+ "step": 1575
3004
+ },
3005
+ {
3006
+ "epoch": 225.71,
3007
+ "learning_rate": 1.7942857142857144e-05,
3008
+ "loss": 0.4808,
3009
+ "step": 1580
3010
+ },
3011
+ {
3012
+ "epoch": 226.0,
3013
+ "eval_accuracy": 0.7461538461538462,
3014
+ "eval_loss": 1.5152372121810913,
3015
+ "eval_runtime": 2.3287,
3016
+ "eval_samples_per_second": 55.824,
3017
+ "eval_steps_per_second": 0.859,
3018
+ "step": 1582
3019
+ },
3020
+ {
3021
+ "epoch": 227.0,
3022
+ "eval_accuracy": 0.7384615384615385,
3023
+ "eval_loss": 1.521767497062683,
3024
+ "eval_runtime": 2.2464,
3025
+ "eval_samples_per_second": 57.871,
3026
+ "eval_steps_per_second": 0.89,
3027
+ "step": 1589
3028
+ },
3029
+ {
3030
+ "epoch": 227.14,
3031
+ "learning_rate": 1.7828571428571432e-05,
3032
+ "loss": 0.5039,
3033
+ "step": 1590
3034
+ },
3035
+ {
3036
+ "epoch": 228.0,
3037
+ "eval_accuracy": 0.7615384615384615,
3038
+ "eval_loss": 1.4631297588348389,
3039
+ "eval_runtime": 2.1868,
3040
+ "eval_samples_per_second": 59.449,
3041
+ "eval_steps_per_second": 0.915,
3042
+ "step": 1596
3043
+ },
3044
+ {
3045
+ "epoch": 228.57,
3046
+ "learning_rate": 1.7714285714285717e-05,
3047
+ "loss": 0.4516,
3048
+ "step": 1600
3049
+ },
3050
+ {
3051
+ "epoch": 229.0,
3052
+ "eval_accuracy": 0.7692307692307693,
3053
+ "eval_loss": 1.4795747995376587,
3054
+ "eval_runtime": 2.3162,
3055
+ "eval_samples_per_second": 56.126,
3056
+ "eval_steps_per_second": 0.863,
3057
+ "step": 1603
3058
+ },
3059
+ {
3060
+ "epoch": 230.0,
3061
+ "learning_rate": 1.76e-05,
3062
+ "loss": 0.4655,
3063
+ "step": 1610
3064
+ },
3065
+ {
3066
+ "epoch": 230.0,
3067
+ "eval_accuracy": 0.7769230769230769,
3068
+ "eval_loss": 1.4648711681365967,
3069
+ "eval_runtime": 2.3186,
3070
+ "eval_samples_per_second": 56.068,
3071
+ "eval_steps_per_second": 0.863,
3072
+ "step": 1610
3073
+ },
3074
+ {
3075
+ "epoch": 231.0,
3076
+ "eval_accuracy": 0.7846153846153846,
3077
+ "eval_loss": 1.4528415203094482,
3078
+ "eval_runtime": 2.2367,
3079
+ "eval_samples_per_second": 58.12,
3080
+ "eval_steps_per_second": 0.894,
3081
+ "step": 1617
3082
+ },
3083
+ {
3084
+ "epoch": 231.43,
3085
+ "learning_rate": 1.748571428571429e-05,
3086
+ "loss": 0.4487,
3087
+ "step": 1620
3088
+ },
3089
+ {
3090
+ "epoch": 232.0,
3091
+ "eval_accuracy": 0.7769230769230769,
3092
+ "eval_loss": 1.4157787561416626,
3093
+ "eval_runtime": 2.2157,
3094
+ "eval_samples_per_second": 58.673,
3095
+ "eval_steps_per_second": 0.903,
3096
+ "step": 1624
3097
+ },
3098
+ {
3099
+ "epoch": 232.86,
3100
+ "learning_rate": 1.7371428571428573e-05,
3101
+ "loss": 0.453,
3102
+ "step": 1630
3103
+ },
3104
+ {
3105
+ "epoch": 233.0,
3106
+ "eval_accuracy": 0.7538461538461538,
3107
+ "eval_loss": 1.4378057718276978,
3108
+ "eval_runtime": 2.2298,
3109
+ "eval_samples_per_second": 58.301,
3110
+ "eval_steps_per_second": 0.897,
3111
+ "step": 1631
3112
+ },
3113
+ {
3114
+ "epoch": 234.0,
3115
+ "eval_accuracy": 0.7461538461538462,
3116
+ "eval_loss": 1.4233053922653198,
3117
+ "eval_runtime": 2.2254,
3118
+ "eval_samples_per_second": 58.416,
3119
+ "eval_steps_per_second": 0.899,
3120
+ "step": 1638
3121
+ },
3122
+ {
3123
+ "epoch": 234.29,
3124
+ "learning_rate": 1.7257142857142857e-05,
3125
+ "loss": 0.4248,
3126
+ "step": 1640
3127
+ },
3128
+ {
3129
+ "epoch": 235.0,
3130
+ "eval_accuracy": 0.7615384615384615,
3131
+ "eval_loss": 1.4437103271484375,
3132
+ "eval_runtime": 2.2018,
3133
+ "eval_samples_per_second": 59.042,
3134
+ "eval_steps_per_second": 0.908,
3135
+ "step": 1645
3136
+ },
3137
+ {
3138
+ "epoch": 235.71,
3139
+ "learning_rate": 1.7142857142857142e-05,
3140
+ "loss": 0.4103,
3141
+ "step": 1650
3142
+ },
3143
+ {
3144
+ "epoch": 236.0,
3145
+ "eval_accuracy": 0.7384615384615385,
3146
+ "eval_loss": 1.4489341974258423,
3147
+ "eval_runtime": 2.3267,
3148
+ "eval_samples_per_second": 55.872,
3149
+ "eval_steps_per_second": 0.86,
3150
+ "step": 1652
3151
+ },
3152
+ {
3153
+ "epoch": 237.0,
3154
+ "eval_accuracy": 0.7461538461538462,
3155
+ "eval_loss": 1.437820315361023,
3156
+ "eval_runtime": 2.2082,
3157
+ "eval_samples_per_second": 58.871,
3158
+ "eval_steps_per_second": 0.906,
3159
+ "step": 1659
3160
+ },
3161
+ {
3162
+ "epoch": 237.14,
3163
+ "learning_rate": 1.702857142857143e-05,
3164
+ "loss": 0.4146,
3165
+ "step": 1660
3166
+ },
3167
+ {
3168
+ "epoch": 238.0,
3169
+ "eval_accuracy": 0.7384615384615385,
3170
+ "eval_loss": 1.4532761573791504,
3171
+ "eval_runtime": 2.2013,
3172
+ "eval_samples_per_second": 59.057,
3173
+ "eval_steps_per_second": 0.909,
3174
+ "step": 1666
3175
+ },
3176
+ {
3177
+ "epoch": 238.57,
3178
+ "learning_rate": 1.6914285714285717e-05,
3179
+ "loss": 0.4313,
3180
+ "step": 1670
3181
+ },
3182
+ {
3183
+ "epoch": 239.0,
3184
+ "eval_accuracy": 0.7615384615384615,
3185
+ "eval_loss": 1.444754958152771,
3186
+ "eval_runtime": 2.2195,
3187
+ "eval_samples_per_second": 58.573,
3188
+ "eval_steps_per_second": 0.901,
3189
+ "step": 1673
3190
+ },
3191
+ {
3192
+ "epoch": 240.0,
3193
+ "learning_rate": 1.6800000000000002e-05,
3194
+ "loss": 0.408,
3195
+ "step": 1680
3196
+ },
3197
+ {
3198
+ "epoch": 240.0,
3199
+ "eval_accuracy": 0.7692307692307693,
3200
+ "eval_loss": 1.428984522819519,
3201
+ "eval_runtime": 2.3419,
3202
+ "eval_samples_per_second": 55.51,
3203
+ "eval_steps_per_second": 0.854,
3204
+ "step": 1680
3205
+ },
3206
+ {
3207
+ "epoch": 241.0,
3208
+ "eval_accuracy": 0.7461538461538462,
3209
+ "eval_loss": 1.4546881914138794,
3210
+ "eval_runtime": 2.2356,
3211
+ "eval_samples_per_second": 58.149,
3212
+ "eval_steps_per_second": 0.895,
3213
+ "step": 1687
3214
+ },
3215
+ {
3216
+ "epoch": 241.43,
3217
+ "learning_rate": 1.6685714285714286e-05,
3218
+ "loss": 0.4145,
3219
+ "step": 1690
3220
+ },
3221
+ {
3222
+ "epoch": 242.0,
3223
+ "eval_accuracy": 0.7461538461538462,
3224
+ "eval_loss": 1.3930408954620361,
3225
+ "eval_runtime": 2.2411,
3226
+ "eval_samples_per_second": 58.006,
3227
+ "eval_steps_per_second": 0.892,
3228
+ "step": 1694
3229
+ },
3230
+ {
3231
+ "epoch": 242.86,
3232
+ "learning_rate": 1.6571428571428574e-05,
3233
+ "loss": 0.4193,
3234
+ "step": 1700
3235
+ },
3236
+ {
3237
+ "epoch": 243.0,
3238
+ "eval_accuracy": 0.7538461538461538,
3239
+ "eval_loss": 1.4186941385269165,
3240
+ "eval_runtime": 2.3409,
3241
+ "eval_samples_per_second": 55.534,
3242
+ "eval_steps_per_second": 0.854,
3243
+ "step": 1701
3244
+ },
3245
+ {
3246
+ "epoch": 244.0,
3247
+ "eval_accuracy": 0.7615384615384615,
3248
+ "eval_loss": 1.3854211568832397,
3249
+ "eval_runtime": 2.288,
3250
+ "eval_samples_per_second": 56.817,
3251
+ "eval_steps_per_second": 0.874,
3252
+ "step": 1708
3253
+ },
3254
+ {
3255
+ "epoch": 244.29,
3256
+ "learning_rate": 1.645714285714286e-05,
3257
+ "loss": 0.3951,
3258
+ "step": 1710
3259
+ },
3260
+ {
3261
+ "epoch": 245.0,
3262
+ "eval_accuracy": 0.7615384615384615,
3263
+ "eval_loss": 1.3977891206741333,
3264
+ "eval_runtime": 2.2608,
3265
+ "eval_samples_per_second": 57.502,
3266
+ "eval_steps_per_second": 0.885,
3267
+ "step": 1715
3268
+ },
3269
+ {
3270
+ "epoch": 245.71,
3271
+ "learning_rate": 1.6342857142857146e-05,
3272
+ "loss": 0.3871,
3273
+ "step": 1720
3274
+ },
3275
+ {
3276
+ "epoch": 246.0,
3277
+ "eval_accuracy": 0.7538461538461538,
3278
+ "eval_loss": 1.3596566915512085,
3279
+ "eval_runtime": 2.3275,
3280
+ "eval_samples_per_second": 55.853,
3281
+ "eval_steps_per_second": 0.859,
3282
+ "step": 1722
3283
+ },
3284
+ {
3285
+ "epoch": 247.0,
3286
+ "eval_accuracy": 0.7692307692307693,
3287
+ "eval_loss": 1.346314549446106,
3288
+ "eval_runtime": 2.3219,
3289
+ "eval_samples_per_second": 55.989,
3290
+ "eval_steps_per_second": 0.861,
3291
+ "step": 1729
3292
+ },
3293
+ {
3294
+ "epoch": 247.14,
3295
+ "learning_rate": 1.622857142857143e-05,
3296
+ "loss": 0.3679,
3297
+ "step": 1730
3298
+ },
3299
+ {
3300
+ "epoch": 248.0,
3301
+ "eval_accuracy": 0.7615384615384615,
3302
+ "eval_loss": 1.3439607620239258,
3303
+ "eval_runtime": 2.249,
3304
+ "eval_samples_per_second": 57.803,
3305
+ "eval_steps_per_second": 0.889,
3306
+ "step": 1736
3307
+ },
3308
+ {
3309
+ "epoch": 248.57,
3310
+ "learning_rate": 1.6114285714285715e-05,
3311
+ "loss": 0.3788,
3312
+ "step": 1740
3313
+ },
3314
+ {
3315
+ "epoch": 249.0,
3316
+ "eval_accuracy": 0.7692307692307693,
3317
+ "eval_loss": 1.3646103143692017,
3318
+ "eval_runtime": 2.1853,
3319
+ "eval_samples_per_second": 59.489,
3320
+ "eval_steps_per_second": 0.915,
3321
+ "step": 1743
3322
+ },
3323
+ {
3324
+ "epoch": 250.0,
3325
+ "learning_rate": 1.6000000000000003e-05,
3326
+ "loss": 0.379,
3327
+ "step": 1750
3328
+ },
3329
+ {
3330
+ "epoch": 250.0,
3331
+ "eval_accuracy": 0.7538461538461538,
3332
+ "eval_loss": 1.3682621717453003,
3333
+ "eval_runtime": 2.2333,
3334
+ "eval_samples_per_second": 58.209,
3335
+ "eval_steps_per_second": 0.896,
3336
+ "step": 1750
3337
+ },
3338
+ {
3339
+ "epoch": 250.0,
3340
+ "step": 1750,
3341
+ "total_flos": 1.007847313514496e+19,
3342
+ "train_loss": 0.09994067628043038,
3343
+ "train_runtime": 2169.5998,
3344
+ "train_samples_per_second": 59.919,
3345
+ "train_steps_per_second": 0.807
3346
  }
3347
  ],
3348
  "logging_steps": 10,
3349
+ "max_steps": 1750,
3350
  "num_input_tokens_seen": 0,
3351
+ "num_train_epochs": 250,
3352
  "save_steps": 500,
3353
+ "total_flos": 1.007847313514496e+19,
3354
  "train_batch_size": 80,
3355
  "trial_name": null,
3356
  "trial_params": null