Fu-chiang commited on
Commit
58e1e97
1 Parent(s): 6290aa7

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/bit-50",
3
+ "architectures": [
4
+ "BitForImageClassification"
5
+ ],
6
+ "conv_layer": "std_conv",
7
+ "depths": [
8
+ 3,
9
+ 4,
10
+ 6,
11
+ 3
12
+ ],
13
+ "drop_path_rate": 0.0,
14
+ "embedding_dynamic_padding": false,
15
+ "embedding_size": 64,
16
+ "global_padding": null,
17
+ "hidden_act": "relu",
18
+ "hidden_sizes": [
19
+ 256,
20
+ 512,
21
+ 1024,
22
+ 2048
23
+ ],
24
+ "id2label": {
25
+ "0": "glaucoma",
26
+ "1": "normal"
27
+ },
28
+ "label2id": {
29
+ "glaucoma": 0,
30
+ "normal": 1
31
+ },
32
+ "layer_type": "preactivation",
33
+ "model_type": "bit",
34
+ "num_channels": 3,
35
+ "num_groups": 32,
36
+ "out_features": [
37
+ "stage4"
38
+ ],
39
+ "out_indices": [
40
+ 4
41
+ ],
42
+ "output_stride": 32,
43
+ "problem_type": "single_label_classification",
44
+ "stage_names": [
45
+ "stem",
46
+ "stage1",
47
+ "stage2",
48
+ "stage3",
49
+ "stage4"
50
+ ],
51
+ "torch_dtype": "float32",
52
+ "transformers_version": "4.31.0",
53
+ "width_factor": 1
54
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caa6a00ab903d697709af2442e051d8636ccdf44c907569aaad3e640d96f322a
3
+ size 188126085
preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 448,
4
+ "width": 448
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "image_processor_type": "BitImageProcessor",
17
+ "image_std": [
18
+ 0.5,
19
+ 0.5,
20
+ 0.5
21
+ ],
22
+ "resample": 2,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 448
26
+ }
27
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:540bd946cbf8df39e5d8716dbb3953e50c0f14956c8db0ac7c0223dab7585df1
3
+ size 94069425
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62f00a8afa2437c04569329e0b1ef3cc347ac6d475d155806975dad22a1ac07a
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0e6e669f2e3e9ee0ef63d22b9d88fba05e247e5b55055948219b85580f27d4d
3
+ size 627
trainer_state.json ADDED
@@ -0,0 +1,2638 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9184013322231473,
3
+ "best_model_checkpoint": "bit-50-Glaucoma\\checkpoint-3718",
4
+ "epoch": 44.0,
5
+ "global_step": 3718,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.12,
12
+ "learning_rate": 1.1904761904761904e-06,
13
+ "loss": 0.8071,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.24,
18
+ "learning_rate": 2.3809523809523808e-06,
19
+ "loss": 0.7134,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.36,
24
+ "learning_rate": 3.5714285714285714e-06,
25
+ "loss": 0.647,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.47,
30
+ "learning_rate": 4.7619047619047615e-06,
31
+ "loss": 0.547,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.59,
36
+ "learning_rate": 5.9523809523809525e-06,
37
+ "loss": 0.519,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.71,
42
+ "learning_rate": 7.142857142857143e-06,
43
+ "loss": 0.4948,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.83,
48
+ "learning_rate": 8.333333333333334e-06,
49
+ "loss": 0.4692,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.95,
54
+ "learning_rate": 9.523809523809523e-06,
55
+ "loss": 0.4444,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.99,
60
+ "eval_accuracy": 0.813488759367194,
61
+ "eval_loss": 0.4316416382789612,
62
+ "eval_runtime": 16.626,
63
+ "eval_samples_per_second": 72.236,
64
+ "eval_steps_per_second": 2.286,
65
+ "step": 84
66
+ },
67
+ {
68
+ "epoch": 1.07,
69
+ "learning_rate": 1.0714285714285714e-05,
70
+ "loss": 0.4251,
71
+ "step": 90
72
+ },
73
+ {
74
+ "epoch": 1.18,
75
+ "learning_rate": 1.1904761904761905e-05,
76
+ "loss": 0.4638,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 1.3,
81
+ "learning_rate": 1.3095238095238096e-05,
82
+ "loss": 0.4005,
83
+ "step": 110
84
+ },
85
+ {
86
+ "epoch": 1.42,
87
+ "learning_rate": 1.4285714285714285e-05,
88
+ "loss": 0.4005,
89
+ "step": 120
90
+ },
91
+ {
92
+ "epoch": 1.54,
93
+ "learning_rate": 1.5476190476190476e-05,
94
+ "loss": 0.4023,
95
+ "step": 130
96
+ },
97
+ {
98
+ "epoch": 1.66,
99
+ "learning_rate": 1.6666666666666667e-05,
100
+ "loss": 0.3837,
101
+ "step": 140
102
+ },
103
+ {
104
+ "epoch": 1.78,
105
+ "learning_rate": 1.785714285714286e-05,
106
+ "loss": 0.3927,
107
+ "step": 150
108
+ },
109
+ {
110
+ "epoch": 1.89,
111
+ "learning_rate": 1.9047619047619046e-05,
112
+ "loss": 0.3947,
113
+ "step": 160
114
+ },
115
+ {
116
+ "epoch": 2.0,
117
+ "eval_accuracy": 0.8592839300582847,
118
+ "eval_loss": 0.31854528188705444,
119
+ "eval_runtime": 15.8188,
120
+ "eval_samples_per_second": 75.922,
121
+ "eval_steps_per_second": 2.402,
122
+ "step": 169
123
+ },
124
+ {
125
+ "epoch": 2.01,
126
+ "learning_rate": 2.023809523809524e-05,
127
+ "loss": 0.3676,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 2.13,
132
+ "learning_rate": 2.1428571428571428e-05,
133
+ "loss": 0.3767,
134
+ "step": 180
135
+ },
136
+ {
137
+ "epoch": 2.25,
138
+ "learning_rate": 2.261904761904762e-05,
139
+ "loss": 0.385,
140
+ "step": 190
141
+ },
142
+ {
143
+ "epoch": 2.37,
144
+ "learning_rate": 2.380952380952381e-05,
145
+ "loss": 0.3561,
146
+ "step": 200
147
+ },
148
+ {
149
+ "epoch": 2.49,
150
+ "learning_rate": 2.5e-05,
151
+ "loss": 0.356,
152
+ "step": 210
153
+ },
154
+ {
155
+ "epoch": 2.6,
156
+ "learning_rate": 2.6190476190476192e-05,
157
+ "loss": 0.3683,
158
+ "step": 220
159
+ },
160
+ {
161
+ "epoch": 2.72,
162
+ "learning_rate": 2.7380952380952383e-05,
163
+ "loss": 0.3506,
164
+ "step": 230
165
+ },
166
+ {
167
+ "epoch": 2.84,
168
+ "learning_rate": 2.857142857142857e-05,
169
+ "loss": 0.3569,
170
+ "step": 240
171
+ },
172
+ {
173
+ "epoch": 2.96,
174
+ "learning_rate": 2.9761904761904762e-05,
175
+ "loss": 0.3475,
176
+ "step": 250
177
+ },
178
+ {
179
+ "epoch": 2.99,
180
+ "eval_accuracy": 0.8759367194004996,
181
+ "eval_loss": 0.31600221991539,
182
+ "eval_runtime": 16.0329,
183
+ "eval_samples_per_second": 74.908,
184
+ "eval_steps_per_second": 2.37,
185
+ "step": 253
186
+ },
187
+ {
188
+ "epoch": 3.08,
189
+ "learning_rate": 3.095238095238095e-05,
190
+ "loss": 0.3372,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 3.2,
195
+ "learning_rate": 3.2142857142857144e-05,
196
+ "loss": 0.3394,
197
+ "step": 270
198
+ },
199
+ {
200
+ "epoch": 3.31,
201
+ "learning_rate": 3.3333333333333335e-05,
202
+ "loss": 0.3077,
203
+ "step": 280
204
+ },
205
+ {
206
+ "epoch": 3.43,
207
+ "learning_rate": 3.4523809523809526e-05,
208
+ "loss": 0.3221,
209
+ "step": 290
210
+ },
211
+ {
212
+ "epoch": 3.55,
213
+ "learning_rate": 3.571428571428572e-05,
214
+ "loss": 0.3581,
215
+ "step": 300
216
+ },
217
+ {
218
+ "epoch": 3.67,
219
+ "learning_rate": 3.690476190476191e-05,
220
+ "loss": 0.3742,
221
+ "step": 310
222
+ },
223
+ {
224
+ "epoch": 3.79,
225
+ "learning_rate": 3.809523809523809e-05,
226
+ "loss": 0.377,
227
+ "step": 320
228
+ },
229
+ {
230
+ "epoch": 3.91,
231
+ "learning_rate": 3.928571428571429e-05,
232
+ "loss": 0.3796,
233
+ "step": 330
234
+ },
235
+ {
236
+ "epoch": 4.0,
237
+ "eval_accuracy": 0.8834304746044963,
238
+ "eval_loss": 0.29224836826324463,
239
+ "eval_runtime": 15.8421,
240
+ "eval_samples_per_second": 75.811,
241
+ "eval_steps_per_second": 2.399,
242
+ "step": 338
243
+ },
244
+ {
245
+ "epoch": 4.02,
246
+ "learning_rate": 4.047619047619048e-05,
247
+ "loss": 0.3687,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 4.14,
252
+ "learning_rate": 4.166666666666667e-05,
253
+ "loss": 0.3326,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 4.26,
258
+ "learning_rate": 4.2857142857142856e-05,
259
+ "loss": 0.3275,
260
+ "step": 360
261
+ },
262
+ {
263
+ "epoch": 4.38,
264
+ "learning_rate": 4.404761904761905e-05,
265
+ "loss": 0.3296,
266
+ "step": 370
267
+ },
268
+ {
269
+ "epoch": 4.5,
270
+ "learning_rate": 4.523809523809524e-05,
271
+ "loss": 0.3257,
272
+ "step": 380
273
+ },
274
+ {
275
+ "epoch": 4.62,
276
+ "learning_rate": 4.642857142857143e-05,
277
+ "loss": 0.3362,
278
+ "step": 390
279
+ },
280
+ {
281
+ "epoch": 4.73,
282
+ "learning_rate": 4.761904761904762e-05,
283
+ "loss": 0.3471,
284
+ "step": 400
285
+ },
286
+ {
287
+ "epoch": 4.85,
288
+ "learning_rate": 4.880952380952381e-05,
289
+ "loss": 0.3511,
290
+ "step": 410
291
+ },
292
+ {
293
+ "epoch": 4.97,
294
+ "learning_rate": 5e-05,
295
+ "loss": 0.3618,
296
+ "step": 420
297
+ },
298
+ {
299
+ "epoch": 4.99,
300
+ "eval_accuracy": 0.8859283930058285,
301
+ "eval_loss": 0.27965524792671204,
302
+ "eval_runtime": 15.8956,
303
+ "eval_samples_per_second": 75.555,
304
+ "eval_steps_per_second": 2.391,
305
+ "step": 422
306
+ },
307
+ {
308
+ "epoch": 5.09,
309
+ "learning_rate": 4.986772486772487e-05,
310
+ "loss": 0.3145,
311
+ "step": 430
312
+ },
313
+ {
314
+ "epoch": 5.21,
315
+ "learning_rate": 4.973544973544973e-05,
316
+ "loss": 0.3305,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 5.33,
321
+ "learning_rate": 4.960317460317461e-05,
322
+ "loss": 0.3104,
323
+ "step": 450
324
+ },
325
+ {
326
+ "epoch": 5.44,
327
+ "learning_rate": 4.9470899470899475e-05,
328
+ "loss": 0.3015,
329
+ "step": 460
330
+ },
331
+ {
332
+ "epoch": 5.56,
333
+ "learning_rate": 4.933862433862434e-05,
334
+ "loss": 0.3249,
335
+ "step": 470
336
+ },
337
+ {
338
+ "epoch": 5.68,
339
+ "learning_rate": 4.9206349206349204e-05,
340
+ "loss": 0.2987,
341
+ "step": 480
342
+ },
343
+ {
344
+ "epoch": 5.8,
345
+ "learning_rate": 4.9074074074074075e-05,
346
+ "loss": 0.3434,
347
+ "step": 490
348
+ },
349
+ {
350
+ "epoch": 5.92,
351
+ "learning_rate": 4.894179894179895e-05,
352
+ "loss": 0.3205,
353
+ "step": 500
354
+ },
355
+ {
356
+ "epoch": 6.0,
357
+ "eval_accuracy": 0.8409658617818485,
358
+ "eval_loss": 0.3533245027065277,
359
+ "eval_runtime": 15.8631,
360
+ "eval_samples_per_second": 75.71,
361
+ "eval_steps_per_second": 2.395,
362
+ "step": 507
363
+ },
364
+ {
365
+ "epoch": 6.04,
366
+ "learning_rate": 4.880952380952381e-05,
367
+ "loss": 0.3518,
368
+ "step": 510
369
+ },
370
+ {
371
+ "epoch": 6.15,
372
+ "learning_rate": 4.8677248677248676e-05,
373
+ "loss": 0.2728,
374
+ "step": 520
375
+ },
376
+ {
377
+ "epoch": 6.27,
378
+ "learning_rate": 4.854497354497355e-05,
379
+ "loss": 0.3404,
380
+ "step": 530
381
+ },
382
+ {
383
+ "epoch": 6.39,
384
+ "learning_rate": 4.841269841269841e-05,
385
+ "loss": 0.3298,
386
+ "step": 540
387
+ },
388
+ {
389
+ "epoch": 6.51,
390
+ "learning_rate": 4.8280423280423284e-05,
391
+ "loss": 0.3178,
392
+ "step": 550
393
+ },
394
+ {
395
+ "epoch": 6.63,
396
+ "learning_rate": 4.814814814814815e-05,
397
+ "loss": 0.3299,
398
+ "step": 560
399
+ },
400
+ {
401
+ "epoch": 6.75,
402
+ "learning_rate": 4.801587301587302e-05,
403
+ "loss": 0.3369,
404
+ "step": 570
405
+ },
406
+ {
407
+ "epoch": 6.86,
408
+ "learning_rate": 4.7883597883597884e-05,
409
+ "loss": 0.2974,
410
+ "step": 580
411
+ },
412
+ {
413
+ "epoch": 6.98,
414
+ "learning_rate": 4.7751322751322756e-05,
415
+ "loss": 0.2743,
416
+ "step": 590
417
+ },
418
+ {
419
+ "epoch": 6.99,
420
+ "eval_accuracy": 0.8601165695253955,
421
+ "eval_loss": 0.2907889485359192,
422
+ "eval_runtime": 15.8108,
423
+ "eval_samples_per_second": 75.961,
424
+ "eval_steps_per_second": 2.403,
425
+ "step": 591
426
+ },
427
+ {
428
+ "epoch": 7.1,
429
+ "learning_rate": 4.761904761904762e-05,
430
+ "loss": 0.2966,
431
+ "step": 600
432
+ },
433
+ {
434
+ "epoch": 7.22,
435
+ "learning_rate": 4.748677248677249e-05,
436
+ "loss": 0.3083,
437
+ "step": 610
438
+ },
439
+ {
440
+ "epoch": 7.34,
441
+ "learning_rate": 4.7354497354497356e-05,
442
+ "loss": 0.299,
443
+ "step": 620
444
+ },
445
+ {
446
+ "epoch": 7.46,
447
+ "learning_rate": 4.722222222222222e-05,
448
+ "loss": 0.2669,
449
+ "step": 630
450
+ },
451
+ {
452
+ "epoch": 7.57,
453
+ "learning_rate": 4.708994708994709e-05,
454
+ "loss": 0.2996,
455
+ "step": 640
456
+ },
457
+ {
458
+ "epoch": 7.69,
459
+ "learning_rate": 4.6957671957671964e-05,
460
+ "loss": 0.2686,
461
+ "step": 650
462
+ },
463
+ {
464
+ "epoch": 7.81,
465
+ "learning_rate": 4.682539682539683e-05,
466
+ "loss": 0.3172,
467
+ "step": 660
468
+ },
469
+ {
470
+ "epoch": 7.93,
471
+ "learning_rate": 4.669312169312169e-05,
472
+ "loss": 0.2765,
473
+ "step": 670
474
+ },
475
+ {
476
+ "epoch": 8.0,
477
+ "eval_accuracy": 0.8601165695253955,
478
+ "eval_loss": 0.2945786714553833,
479
+ "eval_runtime": 16.0316,
480
+ "eval_samples_per_second": 74.914,
481
+ "eval_steps_per_second": 2.37,
482
+ "step": 676
483
+ },
484
+ {
485
+ "epoch": 8.05,
486
+ "learning_rate": 4.656084656084656e-05,
487
+ "loss": 0.2692,
488
+ "step": 680
489
+ },
490
+ {
491
+ "epoch": 8.17,
492
+ "learning_rate": 4.642857142857143e-05,
493
+ "loss": 0.273,
494
+ "step": 690
495
+ },
496
+ {
497
+ "epoch": 8.28,
498
+ "learning_rate": 4.62962962962963e-05,
499
+ "loss": 0.252,
500
+ "step": 700
501
+ },
502
+ {
503
+ "epoch": 8.4,
504
+ "learning_rate": 4.6164021164021165e-05,
505
+ "loss": 0.2719,
506
+ "step": 710
507
+ },
508
+ {
509
+ "epoch": 8.52,
510
+ "learning_rate": 4.603174603174603e-05,
511
+ "loss": 0.273,
512
+ "step": 720
513
+ },
514
+ {
515
+ "epoch": 8.64,
516
+ "learning_rate": 4.58994708994709e-05,
517
+ "loss": 0.2853,
518
+ "step": 730
519
+ },
520
+ {
521
+ "epoch": 8.76,
522
+ "learning_rate": 4.576719576719577e-05,
523
+ "loss": 0.2965,
524
+ "step": 740
525
+ },
526
+ {
527
+ "epoch": 8.88,
528
+ "learning_rate": 4.563492063492064e-05,
529
+ "loss": 0.2842,
530
+ "step": 750
531
+ },
532
+ {
533
+ "epoch": 8.99,
534
+ "learning_rate": 4.55026455026455e-05,
535
+ "loss": 0.2528,
536
+ "step": 760
537
+ },
538
+ {
539
+ "epoch": 8.99,
540
+ "eval_accuracy": 0.8992506244796004,
541
+ "eval_loss": 0.24736784398555756,
542
+ "eval_runtime": 15.802,
543
+ "eval_samples_per_second": 76.003,
544
+ "eval_steps_per_second": 2.405,
545
+ "step": 760
546
+ },
547
+ {
548
+ "epoch": 9.11,
549
+ "learning_rate": 4.5370370370370374e-05,
550
+ "loss": 0.2585,
551
+ "step": 770
552
+ },
553
+ {
554
+ "epoch": 9.23,
555
+ "learning_rate": 4.523809523809524e-05,
556
+ "loss": 0.2567,
557
+ "step": 780
558
+ },
559
+ {
560
+ "epoch": 9.35,
561
+ "learning_rate": 4.510582010582011e-05,
562
+ "loss": 0.2794,
563
+ "step": 790
564
+ },
565
+ {
566
+ "epoch": 9.47,
567
+ "learning_rate": 4.4973544973544974e-05,
568
+ "loss": 0.2698,
569
+ "step": 800
570
+ },
571
+ {
572
+ "epoch": 9.59,
573
+ "learning_rate": 4.4841269841269846e-05,
574
+ "loss": 0.2472,
575
+ "step": 810
576
+ },
577
+ {
578
+ "epoch": 9.7,
579
+ "learning_rate": 4.470899470899471e-05,
580
+ "loss": 0.2537,
581
+ "step": 820
582
+ },
583
+ {
584
+ "epoch": 9.82,
585
+ "learning_rate": 4.4576719576719575e-05,
586
+ "loss": 0.2635,
587
+ "step": 830
588
+ },
589
+ {
590
+ "epoch": 9.94,
591
+ "learning_rate": 4.4444444444444447e-05,
592
+ "loss": 0.2678,
593
+ "step": 840
594
+ },
595
+ {
596
+ "epoch": 10.0,
597
+ "eval_accuracy": 0.9084096586178185,
598
+ "eval_loss": 0.2332490235567093,
599
+ "eval_runtime": 15.7703,
600
+ "eval_samples_per_second": 76.156,
601
+ "eval_steps_per_second": 2.41,
602
+ "step": 845
603
+ },
604
+ {
605
+ "epoch": 10.06,
606
+ "learning_rate": 4.431216931216932e-05,
607
+ "loss": 0.2777,
608
+ "step": 850
609
+ },
610
+ {
611
+ "epoch": 10.18,
612
+ "learning_rate": 4.417989417989418e-05,
613
+ "loss": 0.2543,
614
+ "step": 860
615
+ },
616
+ {
617
+ "epoch": 10.3,
618
+ "learning_rate": 4.404761904761905e-05,
619
+ "loss": 0.2434,
620
+ "step": 870
621
+ },
622
+ {
623
+ "epoch": 10.41,
624
+ "learning_rate": 4.391534391534391e-05,
625
+ "loss": 0.2629,
626
+ "step": 880
627
+ },
628
+ {
629
+ "epoch": 10.53,
630
+ "learning_rate": 4.378306878306879e-05,
631
+ "loss": 0.2453,
632
+ "step": 890
633
+ },
634
+ {
635
+ "epoch": 10.65,
636
+ "learning_rate": 4.3650793650793655e-05,
637
+ "loss": 0.266,
638
+ "step": 900
639
+ },
640
+ {
641
+ "epoch": 10.77,
642
+ "learning_rate": 4.351851851851852e-05,
643
+ "loss": 0.2349,
644
+ "step": 910
645
+ },
646
+ {
647
+ "epoch": 10.89,
648
+ "learning_rate": 4.3386243386243384e-05,
649
+ "loss": 0.2314,
650
+ "step": 920
651
+ },
652
+ {
653
+ "epoch": 10.99,
654
+ "eval_accuracy": 0.8992506244796004,
655
+ "eval_loss": 0.2373119592666626,
656
+ "eval_runtime": 15.6414,
657
+ "eval_samples_per_second": 76.783,
658
+ "eval_steps_per_second": 2.429,
659
+ "step": 929
660
+ },
661
+ {
662
+ "epoch": 11.01,
663
+ "learning_rate": 4.3253968253968256e-05,
664
+ "loss": 0.2436,
665
+ "step": 930
666
+ },
667
+ {
668
+ "epoch": 11.12,
669
+ "learning_rate": 4.312169312169313e-05,
670
+ "loss": 0.2576,
671
+ "step": 940
672
+ },
673
+ {
674
+ "epoch": 11.24,
675
+ "learning_rate": 4.298941798941799e-05,
676
+ "loss": 0.2982,
677
+ "step": 950
678
+ },
679
+ {
680
+ "epoch": 11.36,
681
+ "learning_rate": 4.2857142857142856e-05,
682
+ "loss": 0.2399,
683
+ "step": 960
684
+ },
685
+ {
686
+ "epoch": 11.48,
687
+ "learning_rate": 4.272486772486773e-05,
688
+ "loss": 0.2442,
689
+ "step": 970
690
+ },
691
+ {
692
+ "epoch": 11.6,
693
+ "learning_rate": 4.259259259259259e-05,
694
+ "loss": 0.2629,
695
+ "step": 980
696
+ },
697
+ {
698
+ "epoch": 11.72,
699
+ "learning_rate": 4.2460317460317464e-05,
700
+ "loss": 0.2484,
701
+ "step": 990
702
+ },
703
+ {
704
+ "epoch": 11.83,
705
+ "learning_rate": 4.232804232804233e-05,
706
+ "loss": 0.2462,
707
+ "step": 1000
708
+ },
709
+ {
710
+ "epoch": 11.95,
711
+ "learning_rate": 4.21957671957672e-05,
712
+ "loss": 0.244,
713
+ "step": 1010
714
+ },
715
+ {
716
+ "epoch": 12.0,
717
+ "eval_accuracy": 0.9034138218151541,
718
+ "eval_loss": 0.2284502238035202,
719
+ "eval_runtime": 15.8599,
720
+ "eval_samples_per_second": 75.726,
721
+ "eval_steps_per_second": 2.396,
722
+ "step": 1014
723
+ },
724
+ {
725
+ "epoch": 12.07,
726
+ "learning_rate": 4.2063492063492065e-05,
727
+ "loss": 0.2496,
728
+ "step": 1020
729
+ },
730
+ {
731
+ "epoch": 12.19,
732
+ "learning_rate": 4.193121693121693e-05,
733
+ "loss": 0.2235,
734
+ "step": 1030
735
+ },
736
+ {
737
+ "epoch": 12.31,
738
+ "learning_rate": 4.17989417989418e-05,
739
+ "loss": 0.2194,
740
+ "step": 1040
741
+ },
742
+ {
743
+ "epoch": 12.43,
744
+ "learning_rate": 4.166666666666667e-05,
745
+ "loss": 0.2358,
746
+ "step": 1050
747
+ },
748
+ {
749
+ "epoch": 12.54,
750
+ "learning_rate": 4.153439153439154e-05,
751
+ "loss": 0.2447,
752
+ "step": 1060
753
+ },
754
+ {
755
+ "epoch": 12.66,
756
+ "learning_rate": 4.14021164021164e-05,
757
+ "loss": 0.2537,
758
+ "step": 1070
759
+ },
760
+ {
761
+ "epoch": 12.78,
762
+ "learning_rate": 4.126984126984127e-05,
763
+ "loss": 0.2246,
764
+ "step": 1080
765
+ },
766
+ {
767
+ "epoch": 12.9,
768
+ "learning_rate": 4.1137566137566144e-05,
769
+ "loss": 0.2313,
770
+ "step": 1090
771
+ },
772
+ {
773
+ "epoch": 12.99,
774
+ "eval_accuracy": 0.9017485428809325,
775
+ "eval_loss": 0.2355087250471115,
776
+ "eval_runtime": 15.8512,
777
+ "eval_samples_per_second": 75.767,
778
+ "eval_steps_per_second": 2.397,
779
+ "step": 1098
780
+ },
781
+ {
782
+ "epoch": 13.02,
783
+ "learning_rate": 4.100529100529101e-05,
784
+ "loss": 0.2217,
785
+ "step": 1100
786
+ },
787
+ {
788
+ "epoch": 13.14,
789
+ "learning_rate": 4.0873015873015874e-05,
790
+ "loss": 0.196,
791
+ "step": 1110
792
+ },
793
+ {
794
+ "epoch": 13.25,
795
+ "learning_rate": 4.074074074074074e-05,
796
+ "loss": 0.2298,
797
+ "step": 1120
798
+ },
799
+ {
800
+ "epoch": 13.37,
801
+ "learning_rate": 4.060846560846561e-05,
802
+ "loss": 0.2352,
803
+ "step": 1130
804
+ },
805
+ {
806
+ "epoch": 13.49,
807
+ "learning_rate": 4.047619047619048e-05,
808
+ "loss": 0.2582,
809
+ "step": 1140
810
+ },
811
+ {
812
+ "epoch": 13.61,
813
+ "learning_rate": 4.0343915343915346e-05,
814
+ "loss": 0.2168,
815
+ "step": 1150
816
+ },
817
+ {
818
+ "epoch": 13.73,
819
+ "learning_rate": 4.021164021164021e-05,
820
+ "loss": 0.2278,
821
+ "step": 1160
822
+ },
823
+ {
824
+ "epoch": 13.85,
825
+ "learning_rate": 4.007936507936508e-05,
826
+ "loss": 0.2511,
827
+ "step": 1170
828
+ },
829
+ {
830
+ "epoch": 13.96,
831
+ "learning_rate": 3.9947089947089946e-05,
832
+ "loss": 0.2055,
833
+ "step": 1180
834
+ },
835
+ {
836
+ "epoch": 14.0,
837
+ "eval_accuracy": 0.8975853455453788,
838
+ "eval_loss": 0.2440473437309265,
839
+ "eval_runtime": 15.5821,
840
+ "eval_samples_per_second": 77.075,
841
+ "eval_steps_per_second": 2.439,
842
+ "step": 1183
843
+ },
844
+ {
845
+ "epoch": 14.08,
846
+ "learning_rate": 3.981481481481482e-05,
847
+ "loss": 0.2228,
848
+ "step": 1190
849
+ },
850
+ {
851
+ "epoch": 14.2,
852
+ "learning_rate": 3.968253968253968e-05,
853
+ "loss": 0.2118,
854
+ "step": 1200
855
+ },
856
+ {
857
+ "epoch": 14.32,
858
+ "learning_rate": 3.9550264550264554e-05,
859
+ "loss": 0.2281,
860
+ "step": 1210
861
+ },
862
+ {
863
+ "epoch": 14.44,
864
+ "learning_rate": 3.941798941798942e-05,
865
+ "loss": 0.2107,
866
+ "step": 1220
867
+ },
868
+ {
869
+ "epoch": 14.56,
870
+ "learning_rate": 3.928571428571429e-05,
871
+ "loss": 0.2133,
872
+ "step": 1230
873
+ },
874
+ {
875
+ "epoch": 14.67,
876
+ "learning_rate": 3.9153439153439155e-05,
877
+ "loss": 0.2271,
878
+ "step": 1240
879
+ },
880
+ {
881
+ "epoch": 14.79,
882
+ "learning_rate": 3.9021164021164026e-05,
883
+ "loss": 0.2252,
884
+ "step": 1250
885
+ },
886
+ {
887
+ "epoch": 14.91,
888
+ "learning_rate": 3.888888888888889e-05,
889
+ "loss": 0.2271,
890
+ "step": 1260
891
+ },
892
+ {
893
+ "epoch": 14.99,
894
+ "eval_accuracy": 0.8884263114071607,
895
+ "eval_loss": 0.2461342066526413,
896
+ "eval_runtime": 15.57,
897
+ "eval_samples_per_second": 77.136,
898
+ "eval_steps_per_second": 2.441,
899
+ "step": 1267
900
+ },
901
+ {
902
+ "epoch": 15.03,
903
+ "learning_rate": 3.8756613756613755e-05,
904
+ "loss": 0.223,
905
+ "step": 1270
906
+ },
907
+ {
908
+ "epoch": 15.15,
909
+ "learning_rate": 3.862433862433863e-05,
910
+ "loss": 0.1974,
911
+ "step": 1280
912
+ },
913
+ {
914
+ "epoch": 15.27,
915
+ "learning_rate": 3.84920634920635e-05,
916
+ "loss": 0.2514,
917
+ "step": 1290
918
+ },
919
+ {
920
+ "epoch": 15.38,
921
+ "learning_rate": 3.835978835978836e-05,
922
+ "loss": 0.2005,
923
+ "step": 1300
924
+ },
925
+ {
926
+ "epoch": 15.5,
927
+ "learning_rate": 3.822751322751323e-05,
928
+ "loss": 0.2014,
929
+ "step": 1310
930
+ },
931
+ {
932
+ "epoch": 15.62,
933
+ "learning_rate": 3.809523809523809e-05,
934
+ "loss": 0.1952,
935
+ "step": 1320
936
+ },
937
+ {
938
+ "epoch": 15.74,
939
+ "learning_rate": 3.7962962962962964e-05,
940
+ "loss": 0.2002,
941
+ "step": 1330
942
+ },
943
+ {
944
+ "epoch": 15.86,
945
+ "learning_rate": 3.7830687830687835e-05,
946
+ "loss": 0.2351,
947
+ "step": 1340
948
+ },
949
+ {
950
+ "epoch": 15.98,
951
+ "learning_rate": 3.76984126984127e-05,
952
+ "loss": 0.2106,
953
+ "step": 1350
954
+ },
955
+ {
956
+ "epoch": 16.0,
957
+ "eval_accuracy": 0.8784346378018318,
958
+ "eval_loss": 0.26777663826942444,
959
+ "eval_runtime": 15.6876,
960
+ "eval_samples_per_second": 76.557,
961
+ "eval_steps_per_second": 2.422,
962
+ "step": 1352
963
+ },
964
+ {
965
+ "epoch": 16.09,
966
+ "learning_rate": 3.7566137566137564e-05,
967
+ "loss": 0.2066,
968
+ "step": 1360
969
+ },
970
+ {
971
+ "epoch": 16.21,
972
+ "learning_rate": 3.7433862433862436e-05,
973
+ "loss": 0.2009,
974
+ "step": 1370
975
+ },
976
+ {
977
+ "epoch": 16.33,
978
+ "learning_rate": 3.730158730158731e-05,
979
+ "loss": 0.2162,
980
+ "step": 1380
981
+ },
982
+ {
983
+ "epoch": 16.45,
984
+ "learning_rate": 3.716931216931217e-05,
985
+ "loss": 0.2155,
986
+ "step": 1390
987
+ },
988
+ {
989
+ "epoch": 16.57,
990
+ "learning_rate": 3.7037037037037037e-05,
991
+ "loss": 0.1995,
992
+ "step": 1400
993
+ },
994
+ {
995
+ "epoch": 16.69,
996
+ "learning_rate": 3.690476190476191e-05,
997
+ "loss": 0.1992,
998
+ "step": 1410
999
+ },
1000
+ {
1001
+ "epoch": 16.8,
1002
+ "learning_rate": 3.677248677248677e-05,
1003
+ "loss": 0.2366,
1004
+ "step": 1420
1005
+ },
1006
+ {
1007
+ "epoch": 16.92,
1008
+ "learning_rate": 3.6640211640211644e-05,
1009
+ "loss": 0.2076,
1010
+ "step": 1430
1011
+ },
1012
+ {
1013
+ "epoch": 16.99,
1014
+ "eval_accuracy": 0.9150707743547044,
1015
+ "eval_loss": 0.22340074181556702,
1016
+ "eval_runtime": 16.0204,
1017
+ "eval_samples_per_second": 74.967,
1018
+ "eval_steps_per_second": 2.372,
1019
+ "step": 1436
1020
+ },
1021
+ {
1022
+ "epoch": 17.04,
1023
+ "learning_rate": 3.650793650793651e-05,
1024
+ "loss": 0.2054,
1025
+ "step": 1440
1026
+ },
1027
+ {
1028
+ "epoch": 17.16,
1029
+ "learning_rate": 3.637566137566138e-05,
1030
+ "loss": 0.1922,
1031
+ "step": 1450
1032
+ },
1033
+ {
1034
+ "epoch": 17.28,
1035
+ "learning_rate": 3.6243386243386245e-05,
1036
+ "loss": 0.1768,
1037
+ "step": 1460
1038
+ },
1039
+ {
1040
+ "epoch": 17.4,
1041
+ "learning_rate": 3.611111111111111e-05,
1042
+ "loss": 0.2221,
1043
+ "step": 1470
1044
+ },
1045
+ {
1046
+ "epoch": 17.51,
1047
+ "learning_rate": 3.597883597883598e-05,
1048
+ "loss": 0.1813,
1049
+ "step": 1480
1050
+ },
1051
+ {
1052
+ "epoch": 17.63,
1053
+ "learning_rate": 3.584656084656085e-05,
1054
+ "loss": 0.2035,
1055
+ "step": 1490
1056
+ },
1057
+ {
1058
+ "epoch": 17.75,
1059
+ "learning_rate": 3.571428571428572e-05,
1060
+ "loss": 0.2015,
1061
+ "step": 1500
1062
+ },
1063
+ {
1064
+ "epoch": 17.87,
1065
+ "learning_rate": 3.558201058201058e-05,
1066
+ "loss": 0.2246,
1067
+ "step": 1510
1068
+ },
1069
+ {
1070
+ "epoch": 17.99,
1071
+ "learning_rate": 3.5449735449735446e-05,
1072
+ "loss": 0.2074,
1073
+ "step": 1520
1074
+ },
1075
+ {
1076
+ "epoch": 18.0,
1077
+ "eval_accuracy": 0.8934221482098251,
1078
+ "eval_loss": 0.24407817423343658,
1079
+ "eval_runtime": 15.7214,
1080
+ "eval_samples_per_second": 76.393,
1081
+ "eval_steps_per_second": 2.417,
1082
+ "step": 1521
1083
+ },
1084
+ {
1085
+ "epoch": 18.11,
1086
+ "learning_rate": 3.5317460317460324e-05,
1087
+ "loss": 0.208,
1088
+ "step": 1530
1089
+ },
1090
+ {
1091
+ "epoch": 18.22,
1092
+ "learning_rate": 3.518518518518519e-05,
1093
+ "loss": 0.1872,
1094
+ "step": 1540
1095
+ },
1096
+ {
1097
+ "epoch": 18.34,
1098
+ "learning_rate": 3.5052910052910054e-05,
1099
+ "loss": 0.2111,
1100
+ "step": 1550
1101
+ },
1102
+ {
1103
+ "epoch": 18.46,
1104
+ "learning_rate": 3.492063492063492e-05,
1105
+ "loss": 0.1953,
1106
+ "step": 1560
1107
+ },
1108
+ {
1109
+ "epoch": 18.58,
1110
+ "learning_rate": 3.478835978835979e-05,
1111
+ "loss": 0.1802,
1112
+ "step": 1570
1113
+ },
1114
+ {
1115
+ "epoch": 18.7,
1116
+ "learning_rate": 3.465608465608466e-05,
1117
+ "loss": 0.2181,
1118
+ "step": 1580
1119
+ },
1120
+ {
1121
+ "epoch": 18.82,
1122
+ "learning_rate": 3.4523809523809526e-05,
1123
+ "loss": 0.2199,
1124
+ "step": 1590
1125
+ },
1126
+ {
1127
+ "epoch": 18.93,
1128
+ "learning_rate": 3.439153439153439e-05,
1129
+ "loss": 0.2043,
1130
+ "step": 1600
1131
+ },
1132
+ {
1133
+ "epoch": 18.99,
1134
+ "eval_accuracy": 0.9000832639467111,
1135
+ "eval_loss": 0.2535579800605774,
1136
+ "eval_runtime": 15.8014,
1137
+ "eval_samples_per_second": 76.006,
1138
+ "eval_steps_per_second": 2.405,
1139
+ "step": 1605
1140
+ },
1141
+ {
1142
+ "epoch": 19.05,
1143
+ "learning_rate": 3.425925925925926e-05,
1144
+ "loss": 0.1837,
1145
+ "step": 1610
1146
+ },
1147
+ {
1148
+ "epoch": 19.17,
1149
+ "learning_rate": 3.412698412698413e-05,
1150
+ "loss": 0.1705,
1151
+ "step": 1620
1152
+ },
1153
+ {
1154
+ "epoch": 19.29,
1155
+ "learning_rate": 3.3994708994709e-05,
1156
+ "loss": 0.1925,
1157
+ "step": 1630
1158
+ },
1159
+ {
1160
+ "epoch": 19.41,
1161
+ "learning_rate": 3.386243386243386e-05,
1162
+ "loss": 0.1857,
1163
+ "step": 1640
1164
+ },
1165
+ {
1166
+ "epoch": 19.53,
1167
+ "learning_rate": 3.3730158730158734e-05,
1168
+ "loss": 0.181,
1169
+ "step": 1650
1170
+ },
1171
+ {
1172
+ "epoch": 19.64,
1173
+ "learning_rate": 3.35978835978836e-05,
1174
+ "loss": 0.2066,
1175
+ "step": 1660
1176
+ },
1177
+ {
1178
+ "epoch": 19.76,
1179
+ "learning_rate": 3.3465608465608464e-05,
1180
+ "loss": 0.2078,
1181
+ "step": 1670
1182
+ },
1183
+ {
1184
+ "epoch": 19.88,
1185
+ "learning_rate": 3.3333333333333335e-05,
1186
+ "loss": 0.2028,
1187
+ "step": 1680
1188
+ },
1189
+ {
1190
+ "epoch": 20.0,
1191
+ "learning_rate": 3.3201058201058206e-05,
1192
+ "loss": 0.2058,
1193
+ "step": 1690
1194
+ },
1195
+ {
1196
+ "epoch": 20.0,
1197
+ "eval_accuracy": 0.8834304746044963,
1198
+ "eval_loss": 0.2481454610824585,
1199
+ "eval_runtime": 16.3173,
1200
+ "eval_samples_per_second": 73.603,
1201
+ "eval_steps_per_second": 2.329,
1202
+ "step": 1690
1203
+ },
1204
+ {
1205
+ "epoch": 20.12,
1206
+ "learning_rate": 3.306878306878307e-05,
1207
+ "loss": 0.1715,
1208
+ "step": 1700
1209
+ },
1210
+ {
1211
+ "epoch": 20.24,
1212
+ "learning_rate": 3.2936507936507936e-05,
1213
+ "loss": 0.1735,
1214
+ "step": 1710
1215
+ },
1216
+ {
1217
+ "epoch": 20.36,
1218
+ "learning_rate": 3.280423280423281e-05,
1219
+ "loss": 0.1703,
1220
+ "step": 1720
1221
+ },
1222
+ {
1223
+ "epoch": 20.47,
1224
+ "learning_rate": 3.267195767195768e-05,
1225
+ "loss": 0.1718,
1226
+ "step": 1730
1227
+ },
1228
+ {
1229
+ "epoch": 20.59,
1230
+ "learning_rate": 3.253968253968254e-05,
1231
+ "loss": 0.1817,
1232
+ "step": 1740
1233
+ },
1234
+ {
1235
+ "epoch": 20.71,
1236
+ "learning_rate": 3.240740740740741e-05,
1237
+ "loss": 0.1959,
1238
+ "step": 1750
1239
+ },
1240
+ {
1241
+ "epoch": 20.83,
1242
+ "learning_rate": 3.227513227513227e-05,
1243
+ "loss": 0.1753,
1244
+ "step": 1760
1245
+ },
1246
+ {
1247
+ "epoch": 20.95,
1248
+ "learning_rate": 3.2142857142857144e-05,
1249
+ "loss": 0.1575,
1250
+ "step": 1770
1251
+ },
1252
+ {
1253
+ "epoch": 20.99,
1254
+ "eval_accuracy": 0.9025811823480433,
1255
+ "eval_loss": 0.2368643879890442,
1256
+ "eval_runtime": 15.885,
1257
+ "eval_samples_per_second": 75.606,
1258
+ "eval_steps_per_second": 2.392,
1259
+ "step": 1774
1260
+ },
1261
+ {
1262
+ "epoch": 21.07,
1263
+ "learning_rate": 3.2010582010582015e-05,
1264
+ "loss": 0.1754,
1265
+ "step": 1780
1266
+ },
1267
+ {
1268
+ "epoch": 21.18,
1269
+ "learning_rate": 3.187830687830688e-05,
1270
+ "loss": 0.168,
1271
+ "step": 1790
1272
+ },
1273
+ {
1274
+ "epoch": 21.3,
1275
+ "learning_rate": 3.1746031746031745e-05,
1276
+ "loss": 0.1746,
1277
+ "step": 1800
1278
+ },
1279
+ {
1280
+ "epoch": 21.42,
1281
+ "learning_rate": 3.1613756613756616e-05,
1282
+ "loss": 0.1883,
1283
+ "step": 1810
1284
+ },
1285
+ {
1286
+ "epoch": 21.54,
1287
+ "learning_rate": 3.148148148148148e-05,
1288
+ "loss": 0.1732,
1289
+ "step": 1820
1290
+ },
1291
+ {
1292
+ "epoch": 21.66,
1293
+ "learning_rate": 3.134920634920635e-05,
1294
+ "loss": 0.147,
1295
+ "step": 1830
1296
+ },
1297
+ {
1298
+ "epoch": 21.78,
1299
+ "learning_rate": 3.121693121693122e-05,
1300
+ "loss": 0.1961,
1301
+ "step": 1840
1302
+ },
1303
+ {
1304
+ "epoch": 21.89,
1305
+ "learning_rate": 3.108465608465609e-05,
1306
+ "loss": 0.1889,
1307
+ "step": 1850
1308
+ },
1309
+ {
1310
+ "epoch": 22.0,
1311
+ "eval_accuracy": 0.9117402164862615,
1312
+ "eval_loss": 0.2264406383037567,
1313
+ "eval_runtime": 15.8707,
1314
+ "eval_samples_per_second": 75.674,
1315
+ "eval_steps_per_second": 2.394,
1316
+ "step": 1859
1317
+ },
1318
+ {
1319
+ "epoch": 22.01,
1320
+ "learning_rate": 3.095238095238095e-05,
1321
+ "loss": 0.1844,
1322
+ "step": 1860
1323
+ },
1324
+ {
1325
+ "epoch": 22.13,
1326
+ "learning_rate": 3.0820105820105824e-05,
1327
+ "loss": 0.175,
1328
+ "step": 1870
1329
+ },
1330
+ {
1331
+ "epoch": 22.25,
1332
+ "learning_rate": 3.068783068783069e-05,
1333
+ "loss": 0.1638,
1334
+ "step": 1880
1335
+ },
1336
+ {
1337
+ "epoch": 22.37,
1338
+ "learning_rate": 3.055555555555556e-05,
1339
+ "loss": 0.1828,
1340
+ "step": 1890
1341
+ },
1342
+ {
1343
+ "epoch": 22.49,
1344
+ "learning_rate": 3.0423280423280425e-05,
1345
+ "loss": 0.1869,
1346
+ "step": 1900
1347
+ },
1348
+ {
1349
+ "epoch": 22.6,
1350
+ "learning_rate": 3.0291005291005293e-05,
1351
+ "loss": 0.1354,
1352
+ "step": 1910
1353
+ },
1354
+ {
1355
+ "epoch": 22.72,
1356
+ "learning_rate": 3.0158730158730158e-05,
1357
+ "loss": 0.1544,
1358
+ "step": 1920
1359
+ },
1360
+ {
1361
+ "epoch": 22.84,
1362
+ "learning_rate": 3.002645502645503e-05,
1363
+ "loss": 0.1607,
1364
+ "step": 1930
1365
+ },
1366
+ {
1367
+ "epoch": 22.96,
1368
+ "learning_rate": 2.9894179894179897e-05,
1369
+ "loss": 0.1797,
1370
+ "step": 1940
1371
+ },
1372
+ {
1373
+ "epoch": 22.99,
1374
+ "eval_accuracy": 0.9084096586178185,
1375
+ "eval_loss": 0.23159563541412354,
1376
+ "eval_runtime": 15.8807,
1377
+ "eval_samples_per_second": 75.626,
1378
+ "eval_steps_per_second": 2.393,
1379
+ "step": 1943
1380
+ },
1381
+ {
1382
+ "epoch": 23.08,
1383
+ "learning_rate": 2.9761904761904762e-05,
1384
+ "loss": 0.164,
1385
+ "step": 1950
1386
+ },
1387
+ {
1388
+ "epoch": 23.2,
1389
+ "learning_rate": 2.962962962962963e-05,
1390
+ "loss": 0.1444,
1391
+ "step": 1960
1392
+ },
1393
+ {
1394
+ "epoch": 23.31,
1395
+ "learning_rate": 2.94973544973545e-05,
1396
+ "loss": 0.1518,
1397
+ "step": 1970
1398
+ },
1399
+ {
1400
+ "epoch": 23.43,
1401
+ "learning_rate": 2.9365079365079366e-05,
1402
+ "loss": 0.1498,
1403
+ "step": 1980
1404
+ },
1405
+ {
1406
+ "epoch": 23.55,
1407
+ "learning_rate": 2.9232804232804234e-05,
1408
+ "loss": 0.1624,
1409
+ "step": 1990
1410
+ },
1411
+ {
1412
+ "epoch": 23.67,
1413
+ "learning_rate": 2.91005291005291e-05,
1414
+ "loss": 0.1367,
1415
+ "step": 2000
1416
+ },
1417
+ {
1418
+ "epoch": 23.79,
1419
+ "learning_rate": 2.8968253968253974e-05,
1420
+ "loss": 0.1645,
1421
+ "step": 2010
1422
+ },
1423
+ {
1424
+ "epoch": 23.91,
1425
+ "learning_rate": 2.8835978835978838e-05,
1426
+ "loss": 0.1729,
1427
+ "step": 2020
1428
+ },
1429
+ {
1430
+ "epoch": 24.0,
1431
+ "eval_accuracy": 0.9042464612822648,
1432
+ "eval_loss": 0.24072496592998505,
1433
+ "eval_runtime": 15.9775,
1434
+ "eval_samples_per_second": 75.168,
1435
+ "eval_steps_per_second": 2.378,
1436
+ "step": 2028
1437
+ },
1438
+ {
1439
+ "epoch": 24.02,
1440
+ "learning_rate": 2.8703703703703706e-05,
1441
+ "loss": 0.1603,
1442
+ "step": 2030
1443
+ },
1444
+ {
1445
+ "epoch": 24.14,
1446
+ "learning_rate": 2.857142857142857e-05,
1447
+ "loss": 0.1448,
1448
+ "step": 2040
1449
+ },
1450
+ {
1451
+ "epoch": 24.26,
1452
+ "learning_rate": 2.8439153439153442e-05,
1453
+ "loss": 0.1389,
1454
+ "step": 2050
1455
+ },
1456
+ {
1457
+ "epoch": 24.38,
1458
+ "learning_rate": 2.830687830687831e-05,
1459
+ "loss": 0.1612,
1460
+ "step": 2060
1461
+ },
1462
+ {
1463
+ "epoch": 24.5,
1464
+ "learning_rate": 2.8174603174603175e-05,
1465
+ "loss": 0.1429,
1466
+ "step": 2070
1467
+ },
1468
+ {
1469
+ "epoch": 24.62,
1470
+ "learning_rate": 2.8042328042328043e-05,
1471
+ "loss": 0.1498,
1472
+ "step": 2080
1473
+ },
1474
+ {
1475
+ "epoch": 24.73,
1476
+ "learning_rate": 2.7910052910052914e-05,
1477
+ "loss": 0.1699,
1478
+ "step": 2090
1479
+ },
1480
+ {
1481
+ "epoch": 24.85,
1482
+ "learning_rate": 2.777777777777778e-05,
1483
+ "loss": 0.1409,
1484
+ "step": 2100
1485
+ },
1486
+ {
1487
+ "epoch": 24.97,
1488
+ "learning_rate": 2.7645502645502647e-05,
1489
+ "loss": 0.163,
1490
+ "step": 2110
1491
+ },
1492
+ {
1493
+ "epoch": 24.99,
1494
+ "eval_accuracy": 0.9009159034138218,
1495
+ "eval_loss": 0.27579963207244873,
1496
+ "eval_runtime": 15.7181,
1497
+ "eval_samples_per_second": 76.409,
1498
+ "eval_steps_per_second": 2.418,
1499
+ "step": 2112
1500
+ },
1501
+ {
1502
+ "epoch": 25.09,
1503
+ "learning_rate": 2.7513227513227512e-05,
1504
+ "loss": 0.1668,
1505
+ "step": 2120
1506
+ },
1507
+ {
1508
+ "epoch": 25.21,
1509
+ "learning_rate": 2.7380952380952383e-05,
1510
+ "loss": 0.1529,
1511
+ "step": 2130
1512
+ },
1513
+ {
1514
+ "epoch": 25.33,
1515
+ "learning_rate": 2.724867724867725e-05,
1516
+ "loss": 0.1581,
1517
+ "step": 2140
1518
+ },
1519
+ {
1520
+ "epoch": 25.44,
1521
+ "learning_rate": 2.7116402116402116e-05,
1522
+ "loss": 0.1408,
1523
+ "step": 2150
1524
+ },
1525
+ {
1526
+ "epoch": 25.56,
1527
+ "learning_rate": 2.6984126984126984e-05,
1528
+ "loss": 0.1463,
1529
+ "step": 2160
1530
+ },
1531
+ {
1532
+ "epoch": 25.68,
1533
+ "learning_rate": 2.6851851851851855e-05,
1534
+ "loss": 0.1431,
1535
+ "step": 2170
1536
+ },
1537
+ {
1538
+ "epoch": 25.8,
1539
+ "learning_rate": 2.6719576719576723e-05,
1540
+ "loss": 0.1397,
1541
+ "step": 2180
1542
+ },
1543
+ {
1544
+ "epoch": 25.92,
1545
+ "learning_rate": 2.6587301587301588e-05,
1546
+ "loss": 0.1609,
1547
+ "step": 2190
1548
+ },
1549
+ {
1550
+ "epoch": 26.0,
1551
+ "eval_accuracy": 0.8934221482098251,
1552
+ "eval_loss": 0.25153836607933044,
1553
+ "eval_runtime": 15.7973,
1554
+ "eval_samples_per_second": 76.026,
1555
+ "eval_steps_per_second": 2.405,
1556
+ "step": 2197
1557
+ },
1558
+ {
1559
+ "epoch": 26.04,
1560
+ "learning_rate": 2.6455026455026456e-05,
1561
+ "loss": 0.1503,
1562
+ "step": 2200
1563
+ },
1564
+ {
1565
+ "epoch": 26.15,
1566
+ "learning_rate": 2.6322751322751328e-05,
1567
+ "loss": 0.141,
1568
+ "step": 2210
1569
+ },
1570
+ {
1571
+ "epoch": 26.27,
1572
+ "learning_rate": 2.6190476190476192e-05,
1573
+ "loss": 0.1384,
1574
+ "step": 2220
1575
+ },
1576
+ {
1577
+ "epoch": 26.39,
1578
+ "learning_rate": 2.605820105820106e-05,
1579
+ "loss": 0.1505,
1580
+ "step": 2230
1581
+ },
1582
+ {
1583
+ "epoch": 26.51,
1584
+ "learning_rate": 2.5925925925925925e-05,
1585
+ "loss": 0.153,
1586
+ "step": 2240
1587
+ },
1588
+ {
1589
+ "epoch": 26.63,
1590
+ "learning_rate": 2.5793650793650796e-05,
1591
+ "loss": 0.1367,
1592
+ "step": 2250
1593
+ },
1594
+ {
1595
+ "epoch": 26.75,
1596
+ "learning_rate": 2.5661375661375664e-05,
1597
+ "loss": 0.1587,
1598
+ "step": 2260
1599
+ },
1600
+ {
1601
+ "epoch": 26.86,
1602
+ "learning_rate": 2.552910052910053e-05,
1603
+ "loss": 0.1493,
1604
+ "step": 2270
1605
+ },
1606
+ {
1607
+ "epoch": 26.98,
1608
+ "learning_rate": 2.5396825396825397e-05,
1609
+ "loss": 0.1345,
1610
+ "step": 2280
1611
+ },
1612
+ {
1613
+ "epoch": 26.99,
1614
+ "eval_accuracy": 0.8950874271440467,
1615
+ "eval_loss": 0.2521910071372986,
1616
+ "eval_runtime": 15.9043,
1617
+ "eval_samples_per_second": 75.514,
1618
+ "eval_steps_per_second": 2.389,
1619
+ "step": 2281
1620
+ },
1621
+ {
1622
+ "epoch": 27.1,
1623
+ "learning_rate": 2.526455026455027e-05,
1624
+ "loss": 0.1349,
1625
+ "step": 2290
1626
+ },
1627
+ {
1628
+ "epoch": 27.22,
1629
+ "learning_rate": 2.5132275132275137e-05,
1630
+ "loss": 0.1301,
1631
+ "step": 2300
1632
+ },
1633
+ {
1634
+ "epoch": 27.34,
1635
+ "learning_rate": 2.5e-05,
1636
+ "loss": 0.1382,
1637
+ "step": 2310
1638
+ },
1639
+ {
1640
+ "epoch": 27.46,
1641
+ "learning_rate": 2.4867724867724866e-05,
1642
+ "loss": 0.1336,
1643
+ "step": 2320
1644
+ },
1645
+ {
1646
+ "epoch": 27.57,
1647
+ "learning_rate": 2.4735449735449737e-05,
1648
+ "loss": 0.1308,
1649
+ "step": 2330
1650
+ },
1651
+ {
1652
+ "epoch": 27.69,
1653
+ "learning_rate": 2.4603174603174602e-05,
1654
+ "loss": 0.1285,
1655
+ "step": 2340
1656
+ },
1657
+ {
1658
+ "epoch": 27.81,
1659
+ "learning_rate": 2.4470899470899473e-05,
1660
+ "loss": 0.1424,
1661
+ "step": 2350
1662
+ },
1663
+ {
1664
+ "epoch": 27.93,
1665
+ "learning_rate": 2.4338624338624338e-05,
1666
+ "loss": 0.1567,
1667
+ "step": 2360
1668
+ },
1669
+ {
1670
+ "epoch": 28.0,
1671
+ "eval_accuracy": 0.9084096586178185,
1672
+ "eval_loss": 0.24210087954998016,
1673
+ "eval_runtime": 15.8265,
1674
+ "eval_samples_per_second": 75.885,
1675
+ "eval_steps_per_second": 2.401,
1676
+ "step": 2366
1677
+ },
1678
+ {
1679
+ "epoch": 28.05,
1680
+ "learning_rate": 2.4206349206349206e-05,
1681
+ "loss": 0.135,
1682
+ "step": 2370
1683
+ },
1684
+ {
1685
+ "epoch": 28.17,
1686
+ "learning_rate": 2.4074074074074074e-05,
1687
+ "loss": 0.1305,
1688
+ "step": 2380
1689
+ },
1690
+ {
1691
+ "epoch": 28.28,
1692
+ "learning_rate": 2.3941798941798942e-05,
1693
+ "loss": 0.1351,
1694
+ "step": 2390
1695
+ },
1696
+ {
1697
+ "epoch": 28.4,
1698
+ "learning_rate": 2.380952380952381e-05,
1699
+ "loss": 0.1351,
1700
+ "step": 2400
1701
+ },
1702
+ {
1703
+ "epoch": 28.52,
1704
+ "learning_rate": 2.3677248677248678e-05,
1705
+ "loss": 0.1272,
1706
+ "step": 2410
1707
+ },
1708
+ {
1709
+ "epoch": 28.64,
1710
+ "learning_rate": 2.3544973544973546e-05,
1711
+ "loss": 0.1522,
1712
+ "step": 2420
1713
+ },
1714
+ {
1715
+ "epoch": 28.76,
1716
+ "learning_rate": 2.3412698412698414e-05,
1717
+ "loss": 0.1295,
1718
+ "step": 2430
1719
+ },
1720
+ {
1721
+ "epoch": 28.88,
1722
+ "learning_rate": 2.328042328042328e-05,
1723
+ "loss": 0.141,
1724
+ "step": 2440
1725
+ },
1726
+ {
1727
+ "epoch": 28.99,
1728
+ "learning_rate": 2.314814814814815e-05,
1729
+ "loss": 0.1441,
1730
+ "step": 2450
1731
+ },
1732
+ {
1733
+ "epoch": 28.99,
1734
+ "eval_accuracy": 0.9167360532889259,
1735
+ "eval_loss": 0.2525836229324341,
1736
+ "eval_runtime": 16.1677,
1737
+ "eval_samples_per_second": 74.284,
1738
+ "eval_steps_per_second": 2.35,
1739
+ "step": 2450
1740
+ },
1741
+ {
1742
+ "epoch": 29.11,
1743
+ "learning_rate": 2.3015873015873015e-05,
1744
+ "loss": 0.1166,
1745
+ "step": 2460
1746
+ },
1747
+ {
1748
+ "epoch": 29.23,
1749
+ "learning_rate": 2.2883597883597886e-05,
1750
+ "loss": 0.1318,
1751
+ "step": 2470
1752
+ },
1753
+ {
1754
+ "epoch": 29.35,
1755
+ "learning_rate": 2.275132275132275e-05,
1756
+ "loss": 0.1171,
1757
+ "step": 2480
1758
+ },
1759
+ {
1760
+ "epoch": 29.47,
1761
+ "learning_rate": 2.261904761904762e-05,
1762
+ "loss": 0.1276,
1763
+ "step": 2490
1764
+ },
1765
+ {
1766
+ "epoch": 29.59,
1767
+ "learning_rate": 2.2486772486772487e-05,
1768
+ "loss": 0.1442,
1769
+ "step": 2500
1770
+ },
1771
+ {
1772
+ "epoch": 29.7,
1773
+ "learning_rate": 2.2354497354497355e-05,
1774
+ "loss": 0.1522,
1775
+ "step": 2510
1776
+ },
1777
+ {
1778
+ "epoch": 29.82,
1779
+ "learning_rate": 2.2222222222222223e-05,
1780
+ "loss": 0.1357,
1781
+ "step": 2520
1782
+ },
1783
+ {
1784
+ "epoch": 29.94,
1785
+ "learning_rate": 2.208994708994709e-05,
1786
+ "loss": 0.1219,
1787
+ "step": 2530
1788
+ },
1789
+ {
1790
+ "epoch": 30.0,
1791
+ "eval_accuracy": 0.9109075770191507,
1792
+ "eval_loss": 0.2636389136314392,
1793
+ "eval_runtime": 15.8974,
1794
+ "eval_samples_per_second": 75.547,
1795
+ "eval_steps_per_second": 2.39,
1796
+ "step": 2535
1797
+ },
1798
+ {
1799
+ "epoch": 30.06,
1800
+ "learning_rate": 2.1957671957671956e-05,
1801
+ "loss": 0.1387,
1802
+ "step": 2540
1803
+ },
1804
+ {
1805
+ "epoch": 30.18,
1806
+ "learning_rate": 2.1825396825396827e-05,
1807
+ "loss": 0.1182,
1808
+ "step": 2550
1809
+ },
1810
+ {
1811
+ "epoch": 30.3,
1812
+ "learning_rate": 2.1693121693121692e-05,
1813
+ "loss": 0.1103,
1814
+ "step": 2560
1815
+ },
1816
+ {
1817
+ "epoch": 30.41,
1818
+ "learning_rate": 2.1560846560846563e-05,
1819
+ "loss": 0.1385,
1820
+ "step": 2570
1821
+ },
1822
+ {
1823
+ "epoch": 30.53,
1824
+ "learning_rate": 2.1428571428571428e-05,
1825
+ "loss": 0.1279,
1826
+ "step": 2580
1827
+ },
1828
+ {
1829
+ "epoch": 30.65,
1830
+ "learning_rate": 2.1296296296296296e-05,
1831
+ "loss": 0.1133,
1832
+ "step": 2590
1833
+ },
1834
+ {
1835
+ "epoch": 30.77,
1836
+ "learning_rate": 2.1164021164021164e-05,
1837
+ "loss": 0.1475,
1838
+ "step": 2600
1839
+ },
1840
+ {
1841
+ "epoch": 30.89,
1842
+ "learning_rate": 2.1031746031746032e-05,
1843
+ "loss": 0.1137,
1844
+ "step": 2610
1845
+ },
1846
+ {
1847
+ "epoch": 30.99,
1848
+ "eval_accuracy": 0.9034138218151541,
1849
+ "eval_loss": 0.2889064848423004,
1850
+ "eval_runtime": 15.4125,
1851
+ "eval_samples_per_second": 77.924,
1852
+ "eval_steps_per_second": 2.466,
1853
+ "step": 2619
1854
+ },
1855
+ {
1856
+ "epoch": 31.01,
1857
+ "learning_rate": 2.08994708994709e-05,
1858
+ "loss": 0.1316,
1859
+ "step": 2620
1860
+ },
1861
+ {
1862
+ "epoch": 31.12,
1863
+ "learning_rate": 2.076719576719577e-05,
1864
+ "loss": 0.1245,
1865
+ "step": 2630
1866
+ },
1867
+ {
1868
+ "epoch": 31.24,
1869
+ "learning_rate": 2.0634920634920636e-05,
1870
+ "loss": 0.1104,
1871
+ "step": 2640
1872
+ },
1873
+ {
1874
+ "epoch": 31.36,
1875
+ "learning_rate": 2.0502645502645504e-05,
1876
+ "loss": 0.1201,
1877
+ "step": 2650
1878
+ },
1879
+ {
1880
+ "epoch": 31.48,
1881
+ "learning_rate": 2.037037037037037e-05,
1882
+ "loss": 0.1269,
1883
+ "step": 2660
1884
+ },
1885
+ {
1886
+ "epoch": 31.6,
1887
+ "learning_rate": 2.023809523809524e-05,
1888
+ "loss": 0.1034,
1889
+ "step": 2670
1890
+ },
1891
+ {
1892
+ "epoch": 31.72,
1893
+ "learning_rate": 2.0105820105820105e-05,
1894
+ "loss": 0.1039,
1895
+ "step": 2680
1896
+ },
1897
+ {
1898
+ "epoch": 31.83,
1899
+ "learning_rate": 1.9973544973544973e-05,
1900
+ "loss": 0.1297,
1901
+ "step": 2690
1902
+ },
1903
+ {
1904
+ "epoch": 31.95,
1905
+ "learning_rate": 1.984126984126984e-05,
1906
+ "loss": 0.1109,
1907
+ "step": 2700
1908
+ },
1909
+ {
1910
+ "epoch": 32.0,
1911
+ "eval_accuracy": 0.8917568692756037,
1912
+ "eval_loss": 0.3240314722061157,
1913
+ "eval_runtime": 15.6837,
1914
+ "eval_samples_per_second": 76.576,
1915
+ "eval_steps_per_second": 2.423,
1916
+ "step": 2704
1917
+ },
1918
+ {
1919
+ "epoch": 32.07,
1920
+ "learning_rate": 1.970899470899471e-05,
1921
+ "loss": 0.1219,
1922
+ "step": 2710
1923
+ },
1924
+ {
1925
+ "epoch": 32.19,
1926
+ "learning_rate": 1.9576719576719577e-05,
1927
+ "loss": 0.125,
1928
+ "step": 2720
1929
+ },
1930
+ {
1931
+ "epoch": 32.31,
1932
+ "learning_rate": 1.9444444444444445e-05,
1933
+ "loss": 0.1037,
1934
+ "step": 2730
1935
+ },
1936
+ {
1937
+ "epoch": 32.43,
1938
+ "learning_rate": 1.9312169312169313e-05,
1939
+ "loss": 0.0952,
1940
+ "step": 2740
1941
+ },
1942
+ {
1943
+ "epoch": 32.54,
1944
+ "learning_rate": 1.917989417989418e-05,
1945
+ "loss": 0.1125,
1946
+ "step": 2750
1947
+ },
1948
+ {
1949
+ "epoch": 32.66,
1950
+ "learning_rate": 1.9047619047619046e-05,
1951
+ "loss": 0.1036,
1952
+ "step": 2760
1953
+ },
1954
+ {
1955
+ "epoch": 32.78,
1956
+ "learning_rate": 1.8915343915343918e-05,
1957
+ "loss": 0.1192,
1958
+ "step": 2770
1959
+ },
1960
+ {
1961
+ "epoch": 32.9,
1962
+ "learning_rate": 1.8783068783068782e-05,
1963
+ "loss": 0.1254,
1964
+ "step": 2780
1965
+ },
1966
+ {
1967
+ "epoch": 32.99,
1968
+ "eval_accuracy": 0.9150707743547044,
1969
+ "eval_loss": 0.2847664952278137,
1970
+ "eval_runtime": 15.3587,
1971
+ "eval_samples_per_second": 78.196,
1972
+ "eval_steps_per_second": 2.474,
1973
+ "step": 2788
1974
+ },
1975
+ {
1976
+ "epoch": 33.02,
1977
+ "learning_rate": 1.8650793650793654e-05,
1978
+ "loss": 0.0989,
1979
+ "step": 2790
1980
+ },
1981
+ {
1982
+ "epoch": 33.14,
1983
+ "learning_rate": 1.8518518518518518e-05,
1984
+ "loss": 0.1006,
1985
+ "step": 2800
1986
+ },
1987
+ {
1988
+ "epoch": 33.25,
1989
+ "learning_rate": 1.8386243386243386e-05,
1990
+ "loss": 0.1213,
1991
+ "step": 2810
1992
+ },
1993
+ {
1994
+ "epoch": 33.37,
1995
+ "learning_rate": 1.8253968253968254e-05,
1996
+ "loss": 0.1153,
1997
+ "step": 2820
1998
+ },
1999
+ {
2000
+ "epoch": 33.49,
2001
+ "learning_rate": 1.8121693121693122e-05,
2002
+ "loss": 0.1199,
2003
+ "step": 2830
2004
+ },
2005
+ {
2006
+ "epoch": 33.61,
2007
+ "learning_rate": 1.798941798941799e-05,
2008
+ "loss": 0.1034,
2009
+ "step": 2840
2010
+ },
2011
+ {
2012
+ "epoch": 33.73,
2013
+ "learning_rate": 1.785714285714286e-05,
2014
+ "loss": 0.1013,
2015
+ "step": 2850
2016
+ },
2017
+ {
2018
+ "epoch": 33.85,
2019
+ "learning_rate": 1.7724867724867723e-05,
2020
+ "loss": 0.1197,
2021
+ "step": 2860
2022
+ },
2023
+ {
2024
+ "epoch": 33.96,
2025
+ "learning_rate": 1.7592592592592595e-05,
2026
+ "loss": 0.1425,
2027
+ "step": 2870
2028
+ },
2029
+ {
2030
+ "epoch": 34.0,
2031
+ "eval_accuracy": 0.9092422980849292,
2032
+ "eval_loss": 0.27417123317718506,
2033
+ "eval_runtime": 15.4475,
2034
+ "eval_samples_per_second": 77.747,
2035
+ "eval_steps_per_second": 2.46,
2036
+ "step": 2873
2037
+ },
2038
+ {
2039
+ "epoch": 34.08,
2040
+ "learning_rate": 1.746031746031746e-05,
2041
+ "loss": 0.0986,
2042
+ "step": 2880
2043
+ },
2044
+ {
2045
+ "epoch": 34.2,
2046
+ "learning_rate": 1.732804232804233e-05,
2047
+ "loss": 0.0942,
2048
+ "step": 2890
2049
+ },
2050
+ {
2051
+ "epoch": 34.32,
2052
+ "learning_rate": 1.7195767195767195e-05,
2053
+ "loss": 0.1186,
2054
+ "step": 2900
2055
+ },
2056
+ {
2057
+ "epoch": 34.44,
2058
+ "learning_rate": 1.7063492063492063e-05,
2059
+ "loss": 0.0965,
2060
+ "step": 2910
2061
+ },
2062
+ {
2063
+ "epoch": 34.56,
2064
+ "learning_rate": 1.693121693121693e-05,
2065
+ "loss": 0.1119,
2066
+ "step": 2920
2067
+ },
2068
+ {
2069
+ "epoch": 34.67,
2070
+ "learning_rate": 1.67989417989418e-05,
2071
+ "loss": 0.1238,
2072
+ "step": 2930
2073
+ },
2074
+ {
2075
+ "epoch": 34.79,
2076
+ "learning_rate": 1.6666666666666667e-05,
2077
+ "loss": 0.125,
2078
+ "step": 2940
2079
+ },
2080
+ {
2081
+ "epoch": 34.91,
2082
+ "learning_rate": 1.6534391534391536e-05,
2083
+ "loss": 0.1164,
2084
+ "step": 2950
2085
+ },
2086
+ {
2087
+ "epoch": 34.99,
2088
+ "eval_accuracy": 0.9150707743547044,
2089
+ "eval_loss": 0.3028131425380707,
2090
+ "eval_runtime": 15.5308,
2091
+ "eval_samples_per_second": 77.33,
2092
+ "eval_steps_per_second": 2.447,
2093
+ "step": 2957
2094
+ },
2095
+ {
2096
+ "epoch": 35.03,
2097
+ "learning_rate": 1.6402116402116404e-05,
2098
+ "loss": 0.0972,
2099
+ "step": 2960
2100
+ },
2101
+ {
2102
+ "epoch": 35.15,
2103
+ "learning_rate": 1.626984126984127e-05,
2104
+ "loss": 0.1096,
2105
+ "step": 2970
2106
+ },
2107
+ {
2108
+ "epoch": 35.27,
2109
+ "learning_rate": 1.6137566137566136e-05,
2110
+ "loss": 0.1163,
2111
+ "step": 2980
2112
+ },
2113
+ {
2114
+ "epoch": 35.38,
2115
+ "learning_rate": 1.6005291005291008e-05,
2116
+ "loss": 0.1034,
2117
+ "step": 2990
2118
+ },
2119
+ {
2120
+ "epoch": 35.5,
2121
+ "learning_rate": 1.5873015873015872e-05,
2122
+ "loss": 0.0962,
2123
+ "step": 3000
2124
+ },
2125
+ {
2126
+ "epoch": 35.62,
2127
+ "learning_rate": 1.574074074074074e-05,
2128
+ "loss": 0.0955,
2129
+ "step": 3010
2130
+ },
2131
+ {
2132
+ "epoch": 35.74,
2133
+ "learning_rate": 1.560846560846561e-05,
2134
+ "loss": 0.1082,
2135
+ "step": 3020
2136
+ },
2137
+ {
2138
+ "epoch": 35.86,
2139
+ "learning_rate": 1.5476190476190476e-05,
2140
+ "loss": 0.0875,
2141
+ "step": 3030
2142
+ },
2143
+ {
2144
+ "epoch": 35.98,
2145
+ "learning_rate": 1.5343915343915344e-05,
2146
+ "loss": 0.0781,
2147
+ "step": 3040
2148
+ },
2149
+ {
2150
+ "epoch": 36.0,
2151
+ "eval_accuracy": 0.9059117402164862,
2152
+ "eval_loss": 0.33245834708213806,
2153
+ "eval_runtime": 15.6522,
2154
+ "eval_samples_per_second": 76.73,
2155
+ "eval_steps_per_second": 2.428,
2156
+ "step": 3042
2157
+ },
2158
+ {
2159
+ "epoch": 36.09,
2160
+ "learning_rate": 1.5211640211640213e-05,
2161
+ "loss": 0.1148,
2162
+ "step": 3050
2163
+ },
2164
+ {
2165
+ "epoch": 36.21,
2166
+ "learning_rate": 1.5079365079365079e-05,
2167
+ "loss": 0.0983,
2168
+ "step": 3060
2169
+ },
2170
+ {
2171
+ "epoch": 36.33,
2172
+ "learning_rate": 1.4947089947089949e-05,
2173
+ "loss": 0.1018,
2174
+ "step": 3070
2175
+ },
2176
+ {
2177
+ "epoch": 36.45,
2178
+ "learning_rate": 1.4814814814814815e-05,
2179
+ "loss": 0.1093,
2180
+ "step": 3080
2181
+ },
2182
+ {
2183
+ "epoch": 36.57,
2184
+ "learning_rate": 1.4682539682539683e-05,
2185
+ "loss": 0.1056,
2186
+ "step": 3090
2187
+ },
2188
+ {
2189
+ "epoch": 36.69,
2190
+ "learning_rate": 1.455026455026455e-05,
2191
+ "loss": 0.0843,
2192
+ "step": 3100
2193
+ },
2194
+ {
2195
+ "epoch": 36.8,
2196
+ "learning_rate": 1.4417989417989419e-05,
2197
+ "loss": 0.0945,
2198
+ "step": 3110
2199
+ },
2200
+ {
2201
+ "epoch": 36.92,
2202
+ "learning_rate": 1.4285714285714285e-05,
2203
+ "loss": 0.1004,
2204
+ "step": 3120
2205
+ },
2206
+ {
2207
+ "epoch": 36.99,
2208
+ "eval_accuracy": 0.9017485428809325,
2209
+ "eval_loss": 0.314680814743042,
2210
+ "eval_runtime": 15.403,
2211
+ "eval_samples_per_second": 77.972,
2212
+ "eval_steps_per_second": 2.467,
2213
+ "step": 3126
2214
+ },
2215
+ {
2216
+ "epoch": 37.04,
2217
+ "learning_rate": 1.4153439153439155e-05,
2218
+ "loss": 0.0903,
2219
+ "step": 3130
2220
+ },
2221
+ {
2222
+ "epoch": 37.16,
2223
+ "learning_rate": 1.4021164021164022e-05,
2224
+ "loss": 0.0932,
2225
+ "step": 3140
2226
+ },
2227
+ {
2228
+ "epoch": 37.28,
2229
+ "learning_rate": 1.388888888888889e-05,
2230
+ "loss": 0.0949,
2231
+ "step": 3150
2232
+ },
2233
+ {
2234
+ "epoch": 37.4,
2235
+ "learning_rate": 1.3756613756613756e-05,
2236
+ "loss": 0.0923,
2237
+ "step": 3160
2238
+ },
2239
+ {
2240
+ "epoch": 37.51,
2241
+ "learning_rate": 1.3624338624338626e-05,
2242
+ "loss": 0.0837,
2243
+ "step": 3170
2244
+ },
2245
+ {
2246
+ "epoch": 37.63,
2247
+ "learning_rate": 1.3492063492063492e-05,
2248
+ "loss": 0.1104,
2249
+ "step": 3180
2250
+ },
2251
+ {
2252
+ "epoch": 37.75,
2253
+ "learning_rate": 1.3359788359788362e-05,
2254
+ "loss": 0.1168,
2255
+ "step": 3190
2256
+ },
2257
+ {
2258
+ "epoch": 37.87,
2259
+ "learning_rate": 1.3227513227513228e-05,
2260
+ "loss": 0.098,
2261
+ "step": 3200
2262
+ },
2263
+ {
2264
+ "epoch": 37.99,
2265
+ "learning_rate": 1.3095238095238096e-05,
2266
+ "loss": 0.0955,
2267
+ "step": 3210
2268
+ },
2269
+ {
2270
+ "epoch": 38.0,
2271
+ "eval_accuracy": 0.9117402164862615,
2272
+ "eval_loss": 0.31323495507240295,
2273
+ "eval_runtime": 15.436,
2274
+ "eval_samples_per_second": 77.805,
2275
+ "eval_steps_per_second": 2.462,
2276
+ "step": 3211
2277
+ },
2278
+ {
2279
+ "epoch": 38.11,
2280
+ "learning_rate": 1.2962962962962962e-05,
2281
+ "loss": 0.1021,
2282
+ "step": 3220
2283
+ },
2284
+ {
2285
+ "epoch": 38.22,
2286
+ "learning_rate": 1.2830687830687832e-05,
2287
+ "loss": 0.082,
2288
+ "step": 3230
2289
+ },
2290
+ {
2291
+ "epoch": 38.34,
2292
+ "learning_rate": 1.2698412698412699e-05,
2293
+ "loss": 0.1104,
2294
+ "step": 3240
2295
+ },
2296
+ {
2297
+ "epoch": 38.46,
2298
+ "learning_rate": 1.2566137566137568e-05,
2299
+ "loss": 0.0954,
2300
+ "step": 3250
2301
+ },
2302
+ {
2303
+ "epoch": 38.58,
2304
+ "learning_rate": 1.2433862433862433e-05,
2305
+ "loss": 0.084,
2306
+ "step": 3260
2307
+ },
2308
+ {
2309
+ "epoch": 38.7,
2310
+ "learning_rate": 1.2301587301587301e-05,
2311
+ "loss": 0.0762,
2312
+ "step": 3270
2313
+ },
2314
+ {
2315
+ "epoch": 38.82,
2316
+ "learning_rate": 1.2169312169312169e-05,
2317
+ "loss": 0.0789,
2318
+ "step": 3280
2319
+ },
2320
+ {
2321
+ "epoch": 38.93,
2322
+ "learning_rate": 1.2037037037037037e-05,
2323
+ "loss": 0.0942,
2324
+ "step": 3290
2325
+ },
2326
+ {
2327
+ "epoch": 38.99,
2328
+ "eval_accuracy": 0.9142381348875936,
2329
+ "eval_loss": 0.3243829607963562,
2330
+ "eval_runtime": 16.3904,
2331
+ "eval_samples_per_second": 73.275,
2332
+ "eval_steps_per_second": 2.318,
2333
+ "step": 3295
2334
+ },
2335
+ {
2336
+ "epoch": 39.05,
2337
+ "learning_rate": 1.1904761904761905e-05,
2338
+ "loss": 0.0888,
2339
+ "step": 3300
2340
+ },
2341
+ {
2342
+ "epoch": 39.17,
2343
+ "learning_rate": 1.1772486772486773e-05,
2344
+ "loss": 0.0895,
2345
+ "step": 3310
2346
+ },
2347
+ {
2348
+ "epoch": 39.29,
2349
+ "learning_rate": 1.164021164021164e-05,
2350
+ "loss": 0.0973,
2351
+ "step": 3320
2352
+ },
2353
+ {
2354
+ "epoch": 39.41,
2355
+ "learning_rate": 1.1507936507936508e-05,
2356
+ "loss": 0.0648,
2357
+ "step": 3330
2358
+ },
2359
+ {
2360
+ "epoch": 39.53,
2361
+ "learning_rate": 1.1375661375661376e-05,
2362
+ "loss": 0.11,
2363
+ "step": 3340
2364
+ },
2365
+ {
2366
+ "epoch": 39.64,
2367
+ "learning_rate": 1.1243386243386244e-05,
2368
+ "loss": 0.0698,
2369
+ "step": 3350
2370
+ },
2371
+ {
2372
+ "epoch": 39.76,
2373
+ "learning_rate": 1.1111111111111112e-05,
2374
+ "loss": 0.0988,
2375
+ "step": 3360
2376
+ },
2377
+ {
2378
+ "epoch": 39.88,
2379
+ "learning_rate": 1.0978835978835978e-05,
2380
+ "loss": 0.0803,
2381
+ "step": 3370
2382
+ },
2383
+ {
2384
+ "epoch": 40.0,
2385
+ "learning_rate": 1.0846560846560846e-05,
2386
+ "loss": 0.1077,
2387
+ "step": 3380
2388
+ },
2389
+ {
2390
+ "epoch": 40.0,
2391
+ "eval_accuracy": 0.9134054954204829,
2392
+ "eval_loss": 0.32237449288368225,
2393
+ "eval_runtime": 15.4633,
2394
+ "eval_samples_per_second": 77.668,
2395
+ "eval_steps_per_second": 2.457,
2396
+ "step": 3380
2397
+ },
2398
+ {
2399
+ "epoch": 40.12,
2400
+ "learning_rate": 1.0714285714285714e-05,
2401
+ "loss": 0.063,
2402
+ "step": 3390
2403
+ },
2404
+ {
2405
+ "epoch": 40.24,
2406
+ "learning_rate": 1.0582010582010582e-05,
2407
+ "loss": 0.077,
2408
+ "step": 3400
2409
+ },
2410
+ {
2411
+ "epoch": 40.36,
2412
+ "learning_rate": 1.044973544973545e-05,
2413
+ "loss": 0.0976,
2414
+ "step": 3410
2415
+ },
2416
+ {
2417
+ "epoch": 40.47,
2418
+ "learning_rate": 1.0317460317460318e-05,
2419
+ "loss": 0.1123,
2420
+ "step": 3420
2421
+ },
2422
+ {
2423
+ "epoch": 40.59,
2424
+ "learning_rate": 1.0185185185185185e-05,
2425
+ "loss": 0.0968,
2426
+ "step": 3430
2427
+ },
2428
+ {
2429
+ "epoch": 40.71,
2430
+ "learning_rate": 1.0052910052910053e-05,
2431
+ "loss": 0.0808,
2432
+ "step": 3440
2433
+ },
2434
+ {
2435
+ "epoch": 40.83,
2436
+ "learning_rate": 9.92063492063492e-06,
2437
+ "loss": 0.0861,
2438
+ "step": 3450
2439
+ },
2440
+ {
2441
+ "epoch": 40.95,
2442
+ "learning_rate": 9.788359788359789e-06,
2443
+ "loss": 0.079,
2444
+ "step": 3460
2445
+ },
2446
+ {
2447
+ "epoch": 40.99,
2448
+ "eval_accuracy": 0.9134054954204829,
2449
+ "eval_loss": 0.33289557695388794,
2450
+ "eval_runtime": 15.7987,
2451
+ "eval_samples_per_second": 76.019,
2452
+ "eval_steps_per_second": 2.405,
2453
+ "step": 3464
2454
+ },
2455
+ {
2456
+ "epoch": 41.07,
2457
+ "learning_rate": 9.656084656084657e-06,
2458
+ "loss": 0.0968,
2459
+ "step": 3470
2460
+ },
2461
+ {
2462
+ "epoch": 41.18,
2463
+ "learning_rate": 9.523809523809523e-06,
2464
+ "loss": 0.0785,
2465
+ "step": 3480
2466
+ },
2467
+ {
2468
+ "epoch": 41.3,
2469
+ "learning_rate": 9.391534391534391e-06,
2470
+ "loss": 0.0711,
2471
+ "step": 3490
2472
+ },
2473
+ {
2474
+ "epoch": 41.42,
2475
+ "learning_rate": 9.259259259259259e-06,
2476
+ "loss": 0.0785,
2477
+ "step": 3500
2478
+ },
2479
+ {
2480
+ "epoch": 41.54,
2481
+ "learning_rate": 9.126984126984127e-06,
2482
+ "loss": 0.0888,
2483
+ "step": 3510
2484
+ },
2485
+ {
2486
+ "epoch": 41.66,
2487
+ "learning_rate": 8.994708994708995e-06,
2488
+ "loss": 0.0727,
2489
+ "step": 3520
2490
+ },
2491
+ {
2492
+ "epoch": 41.78,
2493
+ "learning_rate": 8.862433862433862e-06,
2494
+ "loss": 0.0806,
2495
+ "step": 3530
2496
+ },
2497
+ {
2498
+ "epoch": 41.89,
2499
+ "learning_rate": 8.73015873015873e-06,
2500
+ "loss": 0.0826,
2501
+ "step": 3540
2502
+ },
2503
+ {
2504
+ "epoch": 42.0,
2505
+ "eval_accuracy": 0.9109075770191507,
2506
+ "eval_loss": 0.33648884296417236,
2507
+ "eval_runtime": 15.7707,
2508
+ "eval_samples_per_second": 76.154,
2509
+ "eval_steps_per_second": 2.41,
2510
+ "step": 3549
2511
+ },
2512
+ {
2513
+ "epoch": 42.01,
2514
+ "learning_rate": 8.597883597883598e-06,
2515
+ "loss": 0.0678,
2516
+ "step": 3550
2517
+ },
2518
+ {
2519
+ "epoch": 42.13,
2520
+ "learning_rate": 8.465608465608466e-06,
2521
+ "loss": 0.0653,
2522
+ "step": 3560
2523
+ },
2524
+ {
2525
+ "epoch": 42.25,
2526
+ "learning_rate": 8.333333333333334e-06,
2527
+ "loss": 0.0788,
2528
+ "step": 3570
2529
+ },
2530
+ {
2531
+ "epoch": 42.37,
2532
+ "learning_rate": 8.201058201058202e-06,
2533
+ "loss": 0.0798,
2534
+ "step": 3580
2535
+ },
2536
+ {
2537
+ "epoch": 42.49,
2538
+ "learning_rate": 8.068783068783068e-06,
2539
+ "loss": 0.0832,
2540
+ "step": 3590
2541
+ },
2542
+ {
2543
+ "epoch": 42.6,
2544
+ "learning_rate": 7.936507936507936e-06,
2545
+ "loss": 0.0819,
2546
+ "step": 3600
2547
+ },
2548
+ {
2549
+ "epoch": 42.72,
2550
+ "learning_rate": 7.804232804232804e-06,
2551
+ "loss": 0.0813,
2552
+ "step": 3610
2553
+ },
2554
+ {
2555
+ "epoch": 42.84,
2556
+ "learning_rate": 7.671957671957672e-06,
2557
+ "loss": 0.0738,
2558
+ "step": 3620
2559
+ },
2560
+ {
2561
+ "epoch": 42.96,
2562
+ "learning_rate": 7.5396825396825394e-06,
2563
+ "loss": 0.07,
2564
+ "step": 3630
2565
+ },
2566
+ {
2567
+ "epoch": 42.99,
2568
+ "eval_accuracy": 0.9100749375520399,
2569
+ "eval_loss": 0.3693588972091675,
2570
+ "eval_runtime": 15.7316,
2571
+ "eval_samples_per_second": 76.343,
2572
+ "eval_steps_per_second": 2.416,
2573
+ "step": 3633
2574
+ },
2575
+ {
2576
+ "epoch": 43.08,
2577
+ "learning_rate": 7.4074074074074075e-06,
2578
+ "loss": 0.0717,
2579
+ "step": 3640
2580
+ },
2581
+ {
2582
+ "epoch": 43.2,
2583
+ "learning_rate": 7.275132275132275e-06,
2584
+ "loss": 0.0835,
2585
+ "step": 3650
2586
+ },
2587
+ {
2588
+ "epoch": 43.31,
2589
+ "learning_rate": 7.142857142857143e-06,
2590
+ "loss": 0.0765,
2591
+ "step": 3660
2592
+ },
2593
+ {
2594
+ "epoch": 43.43,
2595
+ "learning_rate": 7.010582010582011e-06,
2596
+ "loss": 0.0721,
2597
+ "step": 3670
2598
+ },
2599
+ {
2600
+ "epoch": 43.55,
2601
+ "learning_rate": 6.878306878306878e-06,
2602
+ "loss": 0.0733,
2603
+ "step": 3680
2604
+ },
2605
+ {
2606
+ "epoch": 43.67,
2607
+ "learning_rate": 6.746031746031746e-06,
2608
+ "loss": 0.0709,
2609
+ "step": 3690
2610
+ },
2611
+ {
2612
+ "epoch": 43.79,
2613
+ "learning_rate": 6.613756613756614e-06,
2614
+ "loss": 0.09,
2615
+ "step": 3700
2616
+ },
2617
+ {
2618
+ "epoch": 43.91,
2619
+ "learning_rate": 6.481481481481481e-06,
2620
+ "loss": 0.0698,
2621
+ "step": 3710
2622
+ },
2623
+ {
2624
+ "epoch": 44.0,
2625
+ "eval_accuracy": 0.9184013322231473,
2626
+ "eval_loss": 0.3492058217525482,
2627
+ "eval_runtime": 15.97,
2628
+ "eval_samples_per_second": 75.204,
2629
+ "eval_steps_per_second": 2.379,
2630
+ "step": 3718
2631
+ }
2632
+ ],
2633
+ "max_steps": 4200,
2634
+ "num_train_epochs": 50,
2635
+ "total_flos": 4.035107025911808e+19,
2636
+ "trial_name": null,
2637
+ "trial_params": null
2638
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd714c320d57b3046e0a922a49d3fe30947df6baa9b6bfa29227dc7db6d4fc76
3
+ size 4027