satwikapaul commited on
Commit
8a58b99
1 Parent(s): 60b4d95

Upload folder using huggingface_hub

Browse files
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "total_flos": 2.4159665336144486e+18,
4
+ "train_loss": 0.33013623780929124,
5
+ "train_runtime": 2443.4946,
6
+ "train_samples_per_second": 12.756,
7
+ "train_steps_per_second": 0.798
8
+ }
checkpoint-1920/config.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Abstract art",
13
+ "1": "African Art",
14
+ "10": "Gothic art",
15
+ "11": "Graffiti",
16
+ "12": "Impressionism and Post-Impressionism",
17
+ "13": "Islamic Art",
18
+ "14": "Japanese Art",
19
+ "15": "Late Baroque (Rococo) art",
20
+ "16": "Latin American muralism",
21
+ "17": "Medieval art",
22
+ "18": "Minimalism paintings",
23
+ "19": "Polynesian Art",
24
+ "2": "Amazonian Art",
25
+ "20": "Pop art",
26
+ "21": "Realism paintings",
27
+ "22": "Renaissance paintings",
28
+ "23": "Surrealist paintings",
29
+ "24": "cubism",
30
+ "25": "expressionism",
31
+ "26": "romanticism",
32
+ "3": "Ancient Egyptian and Nubian Art",
33
+ "4": "Ancient and Traditional Chinese Art",
34
+ "5": "Anime art",
35
+ "6": "Art Nouveau paintings",
36
+ "7": "Baroque paintings",
37
+ "8": "Cave Art",
38
+ "9": "Constructivsm art"
39
+ },
40
+ "image_size": 224,
41
+ "initializer_range": 0.02,
42
+ "intermediate_size": 3072,
43
+ "label2id": {
44
+ "Abstract art": "0",
45
+ "African Art": "1",
46
+ "Amazonian Art": "2",
47
+ "Ancient Egyptian and Nubian Art": "3",
48
+ "Ancient and Traditional Chinese Art": "4",
49
+ "Anime art": "5",
50
+ "Art Nouveau paintings": "6",
51
+ "Baroque paintings": "7",
52
+ "Cave Art": "8",
53
+ "Constructivsm art": "9",
54
+ "Gothic art": "10",
55
+ "Graffiti": "11",
56
+ "Impressionism and Post-Impressionism": "12",
57
+ "Islamic Art": "13",
58
+ "Japanese Art": "14",
59
+ "Late Baroque (Rococo) art": "15",
60
+ "Latin American muralism": "16",
61
+ "Medieval art": "17",
62
+ "Minimalism paintings": "18",
63
+ "Polynesian Art": "19",
64
+ "Pop art": "20",
65
+ "Realism paintings": "21",
66
+ "Renaissance paintings": "22",
67
+ "Surrealist paintings": "23",
68
+ "cubism": "24",
69
+ "expressionism": "25",
70
+ "romanticism": "26"
71
+ },
72
+ "layer_norm_eps": 1e-12,
73
+ "model_type": "vit",
74
+ "num_attention_heads": 12,
75
+ "num_channels": 3,
76
+ "num_hidden_layers": 12,
77
+ "patch_size": 16,
78
+ "problem_type": "single_label_classification",
79
+ "qkv_bias": true,
80
+ "torch_dtype": "float32",
81
+ "transformers_version": "4.31.0"
82
+ }
checkpoint-1920/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b23c267504a7ff3ce0af07e4e64c6b4f45bfcabf82bf34f9c2eb68f078977b
3
+ size 686672645
checkpoint-1920/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-1920/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:517c0564adb06ba50178c4067b5a273bca8cb12013cc83bb5701b10316efe010
3
+ size 343345581
checkpoint-1920/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e44ede8cd8335b79b0464bce31df7de663add08496dad5293a2b1567a875127f
3
+ size 14575
checkpoint-1920/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3eb63d25b55ecdcf4bc54ff77519cc428b5d3efcd500f150845208274d80bc1
3
+ size 627
checkpoint-1920/trainer_state.json ADDED
@@ -0,0 +1,1600 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.1361786127090454,
3
+ "best_model_checkpoint": "./vit-base-beans/checkpoint-480",
4
+ "epoch": 14.76923076923077,
5
+ "global_step": 1920,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 0.00019897435897435898,
13
+ "loss": 3.2519,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.15,
18
+ "learning_rate": 0.00019794871794871796,
19
+ "loss": 3.1095,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.23,
24
+ "learning_rate": 0.00019692307692307696,
25
+ "loss": 2.9002,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.31,
30
+ "learning_rate": 0.0001958974358974359,
31
+ "loss": 2.7917,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.31,
36
+ "eval_accuracy": 0.35233160621761656,
37
+ "eval_loss": 2.6724953651428223,
38
+ "eval_runtime": 20.1054,
39
+ "eval_samples_per_second": 57.596,
40
+ "eval_steps_per_second": 7.212,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 0.38,
45
+ "learning_rate": 0.00019487179487179487,
46
+ "loss": 2.6291,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 0.46,
51
+ "learning_rate": 0.00019384615384615385,
52
+ "loss": 2.492,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 0.54,
57
+ "learning_rate": 0.00019282051282051282,
58
+ "loss": 2.4302,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.62,
63
+ "learning_rate": 0.00019179487179487182,
64
+ "loss": 2.3207,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.62,
69
+ "eval_accuracy": 0.4481865284974093,
70
+ "eval_loss": 2.263643980026245,
71
+ "eval_runtime": 20.6875,
72
+ "eval_samples_per_second": 55.976,
73
+ "eval_steps_per_second": 7.009,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 0.69,
78
+ "learning_rate": 0.0001907692307692308,
79
+ "loss": 2.1778,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 0.77,
84
+ "learning_rate": 0.00018974358974358974,
85
+ "loss": 2.1215,
86
+ "step": 100
87
+ },
88
+ {
89
+ "epoch": 0.85,
90
+ "learning_rate": 0.0001887179487179487,
91
+ "loss": 2.0333,
92
+ "step": 110
93
+ },
94
+ {
95
+ "epoch": 0.92,
96
+ "learning_rate": 0.0001876923076923077,
97
+ "loss": 1.9258,
98
+ "step": 120
99
+ },
100
+ {
101
+ "epoch": 0.92,
102
+ "eval_accuracy": 0.5535405872193437,
103
+ "eval_loss": 1.8725202083587646,
104
+ "eval_runtime": 19.974,
105
+ "eval_samples_per_second": 57.975,
106
+ "eval_steps_per_second": 7.259,
107
+ "step": 120
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "learning_rate": 0.0001866666666666667,
112
+ "loss": 1.981,
113
+ "step": 130
114
+ },
115
+ {
116
+ "epoch": 1.08,
117
+ "learning_rate": 0.00018564102564102566,
118
+ "loss": 1.5576,
119
+ "step": 140
120
+ },
121
+ {
122
+ "epoch": 1.15,
123
+ "learning_rate": 0.00018461538461538463,
124
+ "loss": 1.4327,
125
+ "step": 150
126
+ },
127
+ {
128
+ "epoch": 1.23,
129
+ "learning_rate": 0.00018358974358974358,
130
+ "loss": 1.4339,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 1.23,
135
+ "eval_accuracy": 0.6260794473229706,
136
+ "eval_loss": 1.5738734006881714,
137
+ "eval_runtime": 20.2011,
138
+ "eval_samples_per_second": 57.323,
139
+ "eval_steps_per_second": 7.178,
140
+ "step": 160
141
+ },
142
+ {
143
+ "epoch": 1.31,
144
+ "learning_rate": 0.00018256410256410258,
145
+ "loss": 1.2945,
146
+ "step": 170
147
+ },
148
+ {
149
+ "epoch": 1.38,
150
+ "learning_rate": 0.00018153846153846155,
151
+ "loss": 1.3482,
152
+ "step": 180
153
+ },
154
+ {
155
+ "epoch": 1.46,
156
+ "learning_rate": 0.00018051282051282052,
157
+ "loss": 1.2537,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 1.54,
162
+ "learning_rate": 0.0001794871794871795,
163
+ "loss": 1.3328,
164
+ "step": 200
165
+ },
166
+ {
167
+ "epoch": 1.54,
168
+ "eval_accuracy": 0.6226252158894646,
169
+ "eval_loss": 1.484718918800354,
170
+ "eval_runtime": 20.1617,
171
+ "eval_samples_per_second": 57.436,
172
+ "eval_steps_per_second": 7.192,
173
+ "step": 200
174
+ },
175
+ {
176
+ "epoch": 1.62,
177
+ "learning_rate": 0.00017846153846153847,
178
+ "loss": 1.1274,
179
+ "step": 210
180
+ },
181
+ {
182
+ "epoch": 1.69,
183
+ "learning_rate": 0.00017743589743589744,
184
+ "loss": 1.169,
185
+ "step": 220
186
+ },
187
+ {
188
+ "epoch": 1.77,
189
+ "learning_rate": 0.00017641025641025642,
190
+ "loss": 1.1404,
191
+ "step": 230
192
+ },
193
+ {
194
+ "epoch": 1.85,
195
+ "learning_rate": 0.0001753846153846154,
196
+ "loss": 1.0914,
197
+ "step": 240
198
+ },
199
+ {
200
+ "epoch": 1.85,
201
+ "eval_accuracy": 0.6442141623488774,
202
+ "eval_loss": 1.3511847257614136,
203
+ "eval_runtime": 19.8926,
204
+ "eval_samples_per_second": 58.213,
205
+ "eval_steps_per_second": 7.289,
206
+ "step": 240
207
+ },
208
+ {
209
+ "epoch": 1.92,
210
+ "learning_rate": 0.00017435897435897436,
211
+ "loss": 1.1632,
212
+ "step": 250
213
+ },
214
+ {
215
+ "epoch": 2.0,
216
+ "learning_rate": 0.00017333333333333334,
217
+ "loss": 1.029,
218
+ "step": 260
219
+ },
220
+ {
221
+ "epoch": 2.08,
222
+ "learning_rate": 0.00017230769230769234,
223
+ "loss": 0.7509,
224
+ "step": 270
225
+ },
226
+ {
227
+ "epoch": 2.15,
228
+ "learning_rate": 0.00017128205128205128,
229
+ "loss": 0.6594,
230
+ "step": 280
231
+ },
232
+ {
233
+ "epoch": 2.15,
234
+ "eval_accuracy": 0.6450777202072538,
235
+ "eval_loss": 1.2636480331420898,
236
+ "eval_runtime": 20.434,
237
+ "eval_samples_per_second": 56.67,
238
+ "eval_steps_per_second": 7.096,
239
+ "step": 280
240
+ },
241
+ {
242
+ "epoch": 2.23,
243
+ "learning_rate": 0.00017025641025641026,
244
+ "loss": 0.557,
245
+ "step": 290
246
+ },
247
+ {
248
+ "epoch": 2.31,
249
+ "learning_rate": 0.00016923076923076923,
250
+ "loss": 0.5061,
251
+ "step": 300
252
+ },
253
+ {
254
+ "epoch": 2.38,
255
+ "learning_rate": 0.00016820512820512823,
256
+ "loss": 0.53,
257
+ "step": 310
258
+ },
259
+ {
260
+ "epoch": 2.46,
261
+ "learning_rate": 0.0001671794871794872,
262
+ "loss": 0.5651,
263
+ "step": 320
264
+ },
265
+ {
266
+ "epoch": 2.46,
267
+ "eval_accuracy": 0.6649395509499136,
268
+ "eval_loss": 1.2330708503723145,
269
+ "eval_runtime": 20.6893,
270
+ "eval_samples_per_second": 55.971,
271
+ "eval_steps_per_second": 7.008,
272
+ "step": 320
273
+ },
274
+ {
275
+ "epoch": 2.54,
276
+ "learning_rate": 0.00016615384615384617,
277
+ "loss": 0.5417,
278
+ "step": 330
279
+ },
280
+ {
281
+ "epoch": 2.62,
282
+ "learning_rate": 0.00016512820512820512,
283
+ "loss": 0.5584,
284
+ "step": 340
285
+ },
286
+ {
287
+ "epoch": 2.69,
288
+ "learning_rate": 0.0001641025641025641,
289
+ "loss": 0.6261,
290
+ "step": 350
291
+ },
292
+ {
293
+ "epoch": 2.77,
294
+ "learning_rate": 0.0001630769230769231,
295
+ "loss": 0.6847,
296
+ "step": 360
297
+ },
298
+ {
299
+ "epoch": 2.77,
300
+ "eval_accuracy": 0.6381692573402418,
301
+ "eval_loss": 1.3150428533554077,
302
+ "eval_runtime": 20.8049,
303
+ "eval_samples_per_second": 55.66,
304
+ "eval_steps_per_second": 6.97,
305
+ "step": 360
306
+ },
307
+ {
308
+ "epoch": 2.85,
309
+ "learning_rate": 0.00016205128205128207,
310
+ "loss": 0.6079,
311
+ "step": 370
312
+ },
313
+ {
314
+ "epoch": 2.92,
315
+ "learning_rate": 0.00016102564102564104,
316
+ "loss": 0.6087,
317
+ "step": 380
318
+ },
319
+ {
320
+ "epoch": 3.0,
321
+ "learning_rate": 0.00016,
322
+ "loss": 0.6049,
323
+ "step": 390
324
+ },
325
+ {
326
+ "epoch": 3.08,
327
+ "learning_rate": 0.00015897435897435896,
328
+ "loss": 0.2965,
329
+ "step": 400
330
+ },
331
+ {
332
+ "epoch": 3.08,
333
+ "eval_accuracy": 0.6735751295336787,
334
+ "eval_loss": 1.1951243877410889,
335
+ "eval_runtime": 20.3022,
336
+ "eval_samples_per_second": 57.038,
337
+ "eval_steps_per_second": 7.142,
338
+ "step": 400
339
+ },
340
+ {
341
+ "epoch": 3.15,
342
+ "learning_rate": 0.00015794871794871796,
343
+ "loss": 0.3409,
344
+ "step": 410
345
+ },
346
+ {
347
+ "epoch": 3.23,
348
+ "learning_rate": 0.00015692307692307693,
349
+ "loss": 0.2707,
350
+ "step": 420
351
+ },
352
+ {
353
+ "epoch": 3.31,
354
+ "learning_rate": 0.0001558974358974359,
355
+ "loss": 0.2439,
356
+ "step": 430
357
+ },
358
+ {
359
+ "epoch": 3.38,
360
+ "learning_rate": 0.00015487179487179488,
361
+ "loss": 0.3052,
362
+ "step": 440
363
+ },
364
+ {
365
+ "epoch": 3.38,
366
+ "eval_accuracy": 0.5915371329879102,
367
+ "eval_loss": 1.4183249473571777,
368
+ "eval_runtime": 19.9664,
369
+ "eval_samples_per_second": 57.997,
370
+ "eval_steps_per_second": 7.262,
371
+ "step": 440
372
+ },
373
+ {
374
+ "epoch": 3.46,
375
+ "learning_rate": 0.00015384615384615385,
376
+ "loss": 0.2819,
377
+ "step": 450
378
+ },
379
+ {
380
+ "epoch": 3.54,
381
+ "learning_rate": 0.00015282051282051282,
382
+ "loss": 0.2755,
383
+ "step": 460
384
+ },
385
+ {
386
+ "epoch": 3.62,
387
+ "learning_rate": 0.0001517948717948718,
388
+ "loss": 0.2574,
389
+ "step": 470
390
+ },
391
+ {
392
+ "epoch": 3.69,
393
+ "learning_rate": 0.00015076923076923077,
394
+ "loss": 0.2592,
395
+ "step": 480
396
+ },
397
+ {
398
+ "epoch": 3.69,
399
+ "eval_accuracy": 0.697754749568221,
400
+ "eval_loss": 1.1361786127090454,
401
+ "eval_runtime": 19.9742,
402
+ "eval_samples_per_second": 57.975,
403
+ "eval_steps_per_second": 7.259,
404
+ "step": 480
405
+ },
406
+ {
407
+ "epoch": 3.77,
408
+ "learning_rate": 0.00014974358974358974,
409
+ "loss": 0.2762,
410
+ "step": 490
411
+ },
412
+ {
413
+ "epoch": 3.85,
414
+ "learning_rate": 0.00014871794871794872,
415
+ "loss": 0.2489,
416
+ "step": 500
417
+ },
418
+ {
419
+ "epoch": 3.92,
420
+ "learning_rate": 0.00014769230769230772,
421
+ "loss": 0.2119,
422
+ "step": 510
423
+ },
424
+ {
425
+ "epoch": 4.0,
426
+ "learning_rate": 0.00014666666666666666,
427
+ "loss": 0.3058,
428
+ "step": 520
429
+ },
430
+ {
431
+ "epoch": 4.0,
432
+ "eval_accuracy": 0.6303972366148531,
433
+ "eval_loss": 1.4736298322677612,
434
+ "eval_runtime": 20.1619,
435
+ "eval_samples_per_second": 57.435,
436
+ "eval_steps_per_second": 7.192,
437
+ "step": 520
438
+ },
439
+ {
440
+ "epoch": 4.08,
441
+ "learning_rate": 0.00014564102564102564,
442
+ "loss": 0.2272,
443
+ "step": 530
444
+ },
445
+ {
446
+ "epoch": 4.15,
447
+ "learning_rate": 0.0001446153846153846,
448
+ "loss": 0.1024,
449
+ "step": 540
450
+ },
451
+ {
452
+ "epoch": 4.23,
453
+ "learning_rate": 0.0001435897435897436,
454
+ "loss": 0.1202,
455
+ "step": 550
456
+ },
457
+ {
458
+ "epoch": 4.31,
459
+ "learning_rate": 0.00014256410256410258,
460
+ "loss": 0.1412,
461
+ "step": 560
462
+ },
463
+ {
464
+ "epoch": 4.31,
465
+ "eval_accuracy": 0.6528497409326425,
466
+ "eval_loss": 1.2578930854797363,
467
+ "eval_runtime": 20.8549,
468
+ "eval_samples_per_second": 55.527,
469
+ "eval_steps_per_second": 6.953,
470
+ "step": 560
471
+ },
472
+ {
473
+ "epoch": 4.38,
474
+ "learning_rate": 0.00014153846153846156,
475
+ "loss": 0.1405,
476
+ "step": 570
477
+ },
478
+ {
479
+ "epoch": 4.46,
480
+ "learning_rate": 0.0001405128205128205,
481
+ "loss": 0.1154,
482
+ "step": 580
483
+ },
484
+ {
485
+ "epoch": 4.54,
486
+ "learning_rate": 0.00013948717948717947,
487
+ "loss": 0.1481,
488
+ "step": 590
489
+ },
490
+ {
491
+ "epoch": 4.62,
492
+ "learning_rate": 0.00013846153846153847,
493
+ "loss": 0.1141,
494
+ "step": 600
495
+ },
496
+ {
497
+ "epoch": 4.62,
498
+ "eval_accuracy": 0.6675302245250432,
499
+ "eval_loss": 1.2793298959732056,
500
+ "eval_runtime": 20.7805,
501
+ "eval_samples_per_second": 55.725,
502
+ "eval_steps_per_second": 6.978,
503
+ "step": 600
504
+ },
505
+ {
506
+ "epoch": 4.69,
507
+ "learning_rate": 0.00013743589743589745,
508
+ "loss": 0.1616,
509
+ "step": 610
510
+ },
511
+ {
512
+ "epoch": 4.77,
513
+ "learning_rate": 0.00013641025641025642,
514
+ "loss": 0.1792,
515
+ "step": 620
516
+ },
517
+ {
518
+ "epoch": 4.85,
519
+ "learning_rate": 0.0001353846153846154,
520
+ "loss": 0.0885,
521
+ "step": 630
522
+ },
523
+ {
524
+ "epoch": 4.92,
525
+ "learning_rate": 0.00013435897435897437,
526
+ "loss": 0.1553,
527
+ "step": 640
528
+ },
529
+ {
530
+ "epoch": 4.92,
531
+ "eval_accuracy": 0.6588946459412781,
532
+ "eval_loss": 1.3182828426361084,
533
+ "eval_runtime": 20.1017,
534
+ "eval_samples_per_second": 57.607,
535
+ "eval_steps_per_second": 7.213,
536
+ "step": 640
537
+ },
538
+ {
539
+ "epoch": 5.0,
540
+ "learning_rate": 0.00013333333333333334,
541
+ "loss": 0.1181,
542
+ "step": 650
543
+ },
544
+ {
545
+ "epoch": 5.08,
546
+ "learning_rate": 0.0001323076923076923,
547
+ "loss": 0.1392,
548
+ "step": 660
549
+ },
550
+ {
551
+ "epoch": 5.15,
552
+ "learning_rate": 0.00013128205128205129,
553
+ "loss": 0.0673,
554
+ "step": 670
555
+ },
556
+ {
557
+ "epoch": 5.23,
558
+ "learning_rate": 0.00013025641025641026,
559
+ "loss": 0.059,
560
+ "step": 680
561
+ },
562
+ {
563
+ "epoch": 5.23,
564
+ "eval_accuracy": 0.6882556131260794,
565
+ "eval_loss": 1.2099286317825317,
566
+ "eval_runtime": 21.3732,
567
+ "eval_samples_per_second": 54.18,
568
+ "eval_steps_per_second": 6.784,
569
+ "step": 680
570
+ },
571
+ {
572
+ "epoch": 5.31,
573
+ "learning_rate": 0.00012923076923076923,
574
+ "loss": 0.0786,
575
+ "step": 690
576
+ },
577
+ {
578
+ "epoch": 5.38,
579
+ "learning_rate": 0.00012820512820512823,
580
+ "loss": 0.0537,
581
+ "step": 700
582
+ },
583
+ {
584
+ "epoch": 5.46,
585
+ "learning_rate": 0.00012717948717948718,
586
+ "loss": 0.0639,
587
+ "step": 710
588
+ },
589
+ {
590
+ "epoch": 5.54,
591
+ "learning_rate": 0.00012615384615384615,
592
+ "loss": 0.0914,
593
+ "step": 720
594
+ },
595
+ {
596
+ "epoch": 5.54,
597
+ "eval_accuracy": 0.6718480138169257,
598
+ "eval_loss": 1.298492193222046,
599
+ "eval_runtime": 20.1222,
600
+ "eval_samples_per_second": 57.548,
601
+ "eval_steps_per_second": 7.206,
602
+ "step": 720
603
+ },
604
+ {
605
+ "epoch": 5.62,
606
+ "learning_rate": 0.00012512820512820512,
607
+ "loss": 0.1004,
608
+ "step": 730
609
+ },
610
+ {
611
+ "epoch": 5.69,
612
+ "learning_rate": 0.00012410256410256412,
613
+ "loss": 0.0592,
614
+ "step": 740
615
+ },
616
+ {
617
+ "epoch": 5.77,
618
+ "learning_rate": 0.0001230769230769231,
619
+ "loss": 0.0688,
620
+ "step": 750
621
+ },
622
+ {
623
+ "epoch": 5.85,
624
+ "learning_rate": 0.00012205128205128207,
625
+ "loss": 0.0546,
626
+ "step": 760
627
+ },
628
+ {
629
+ "epoch": 5.85,
630
+ "eval_accuracy": 0.690846286701209,
631
+ "eval_loss": 1.282209873199463,
632
+ "eval_runtime": 20.42,
633
+ "eval_samples_per_second": 56.709,
634
+ "eval_steps_per_second": 7.101,
635
+ "step": 760
636
+ },
637
+ {
638
+ "epoch": 5.92,
639
+ "learning_rate": 0.00012102564102564103,
640
+ "loss": 0.0285,
641
+ "step": 770
642
+ },
643
+ {
644
+ "epoch": 6.0,
645
+ "learning_rate": 0.00012,
646
+ "loss": 0.0318,
647
+ "step": 780
648
+ },
649
+ {
650
+ "epoch": 6.08,
651
+ "learning_rate": 0.00011897435897435898,
652
+ "loss": 0.0249,
653
+ "step": 790
654
+ },
655
+ {
656
+ "epoch": 6.15,
657
+ "learning_rate": 0.00011794871794871796,
658
+ "loss": 0.0306,
659
+ "step": 800
660
+ },
661
+ {
662
+ "epoch": 6.15,
663
+ "eval_accuracy": 0.6735751295336787,
664
+ "eval_loss": 1.3418058156967163,
665
+ "eval_runtime": 20.9104,
666
+ "eval_samples_per_second": 55.379,
667
+ "eval_steps_per_second": 6.934,
668
+ "step": 800
669
+ },
670
+ {
671
+ "epoch": 6.23,
672
+ "learning_rate": 0.00011692307692307694,
673
+ "loss": 0.0689,
674
+ "step": 810
675
+ },
676
+ {
677
+ "epoch": 6.31,
678
+ "learning_rate": 0.00011589743589743591,
679
+ "loss": 0.0617,
680
+ "step": 820
681
+ },
682
+ {
683
+ "epoch": 6.38,
684
+ "learning_rate": 0.00011487179487179487,
685
+ "loss": 0.0717,
686
+ "step": 830
687
+ },
688
+ {
689
+ "epoch": 6.46,
690
+ "learning_rate": 0.00011384615384615384,
691
+ "loss": 0.0207,
692
+ "step": 840
693
+ },
694
+ {
695
+ "epoch": 6.46,
696
+ "eval_accuracy": 0.697754749568221,
697
+ "eval_loss": 1.3155086040496826,
698
+ "eval_runtime": 20.9506,
699
+ "eval_samples_per_second": 55.273,
700
+ "eval_steps_per_second": 6.921,
701
+ "step": 840
702
+ },
703
+ {
704
+ "epoch": 6.54,
705
+ "learning_rate": 0.00011282051282051283,
706
+ "loss": 0.0192,
707
+ "step": 850
708
+ },
709
+ {
710
+ "epoch": 6.62,
711
+ "learning_rate": 0.0001117948717948718,
712
+ "loss": 0.0448,
713
+ "step": 860
714
+ },
715
+ {
716
+ "epoch": 6.69,
717
+ "learning_rate": 0.00011076923076923077,
718
+ "loss": 0.0213,
719
+ "step": 870
720
+ },
721
+ {
722
+ "epoch": 6.77,
723
+ "learning_rate": 0.00010974358974358976,
724
+ "loss": 0.0505,
725
+ "step": 880
726
+ },
727
+ {
728
+ "epoch": 6.77,
729
+ "eval_accuracy": 0.6614853195164075,
730
+ "eval_loss": 1.4209389686584473,
731
+ "eval_runtime": 20.0879,
732
+ "eval_samples_per_second": 57.647,
733
+ "eval_steps_per_second": 7.218,
734
+ "step": 880
735
+ },
736
+ {
737
+ "epoch": 6.85,
738
+ "learning_rate": 0.00010871794871794872,
739
+ "loss": 0.0356,
740
+ "step": 890
741
+ },
742
+ {
743
+ "epoch": 6.92,
744
+ "learning_rate": 0.0001076923076923077,
745
+ "loss": 0.022,
746
+ "step": 900
747
+ },
748
+ {
749
+ "epoch": 7.0,
750
+ "learning_rate": 0.00010666666666666667,
751
+ "loss": 0.102,
752
+ "step": 910
753
+ },
754
+ {
755
+ "epoch": 7.08,
756
+ "learning_rate": 0.00010564102564102565,
757
+ "loss": 0.016,
758
+ "step": 920
759
+ },
760
+ {
761
+ "epoch": 7.08,
762
+ "eval_accuracy": 0.697754749568221,
763
+ "eval_loss": 1.3212494850158691,
764
+ "eval_runtime": 20.3129,
765
+ "eval_samples_per_second": 57.008,
766
+ "eval_steps_per_second": 7.138,
767
+ "step": 920
768
+ },
769
+ {
770
+ "epoch": 7.15,
771
+ "learning_rate": 0.00010461538461538463,
772
+ "loss": 0.038,
773
+ "step": 930
774
+ },
775
+ {
776
+ "epoch": 7.23,
777
+ "learning_rate": 0.0001035897435897436,
778
+ "loss": 0.0161,
779
+ "step": 940
780
+ },
781
+ {
782
+ "epoch": 7.31,
783
+ "learning_rate": 0.00010256410256410256,
784
+ "loss": 0.0266,
785
+ "step": 950
786
+ },
787
+ {
788
+ "epoch": 7.38,
789
+ "learning_rate": 0.00010153846153846153,
790
+ "loss": 0.0268,
791
+ "step": 960
792
+ },
793
+ {
794
+ "epoch": 7.38,
795
+ "eval_accuracy": 0.7081174438687392,
796
+ "eval_loss": 1.2981479167938232,
797
+ "eval_runtime": 20.6143,
798
+ "eval_samples_per_second": 56.175,
799
+ "eval_steps_per_second": 7.034,
800
+ "step": 960
801
+ },
802
+ {
803
+ "epoch": 7.46,
804
+ "learning_rate": 0.00010051282051282052,
805
+ "loss": 0.0235,
806
+ "step": 970
807
+ },
808
+ {
809
+ "epoch": 7.54,
810
+ "learning_rate": 9.948717948717949e-05,
811
+ "loss": 0.0126,
812
+ "step": 980
813
+ },
814
+ {
815
+ "epoch": 7.62,
816
+ "learning_rate": 9.846153846153848e-05,
817
+ "loss": 0.0143,
818
+ "step": 990
819
+ },
820
+ {
821
+ "epoch": 7.69,
822
+ "learning_rate": 9.743589743589744e-05,
823
+ "loss": 0.0299,
824
+ "step": 1000
825
+ },
826
+ {
827
+ "epoch": 7.69,
828
+ "eval_accuracy": 0.6925734024179621,
829
+ "eval_loss": 1.3568987846374512,
830
+ "eval_runtime": 20.4554,
831
+ "eval_samples_per_second": 56.611,
832
+ "eval_steps_per_second": 7.089,
833
+ "step": 1000
834
+ },
835
+ {
836
+ "epoch": 7.77,
837
+ "learning_rate": 9.641025641025641e-05,
838
+ "loss": 0.0122,
839
+ "step": 1010
840
+ },
841
+ {
842
+ "epoch": 7.85,
843
+ "learning_rate": 9.53846153846154e-05,
844
+ "loss": 0.0203,
845
+ "step": 1020
846
+ },
847
+ {
848
+ "epoch": 7.92,
849
+ "learning_rate": 9.435897435897436e-05,
850
+ "loss": 0.0105,
851
+ "step": 1030
852
+ },
853
+ {
854
+ "epoch": 8.0,
855
+ "learning_rate": 9.333333333333334e-05,
856
+ "loss": 0.0395,
857
+ "step": 1040
858
+ },
859
+ {
860
+ "epoch": 8.0,
861
+ "eval_accuracy": 0.697754749568221,
862
+ "eval_loss": 1.315879225730896,
863
+ "eval_runtime": 20.1743,
864
+ "eval_samples_per_second": 57.4,
865
+ "eval_steps_per_second": 7.187,
866
+ "step": 1040
867
+ },
868
+ {
869
+ "epoch": 8.08,
870
+ "learning_rate": 9.230769230769232e-05,
871
+ "loss": 0.0102,
872
+ "step": 1050
873
+ },
874
+ {
875
+ "epoch": 8.15,
876
+ "learning_rate": 9.128205128205129e-05,
877
+ "loss": 0.0103,
878
+ "step": 1060
879
+ },
880
+ {
881
+ "epoch": 8.23,
882
+ "learning_rate": 9.025641025641026e-05,
883
+ "loss": 0.0106,
884
+ "step": 1070
885
+ },
886
+ {
887
+ "epoch": 8.31,
888
+ "learning_rate": 8.923076923076924e-05,
889
+ "loss": 0.0106,
890
+ "step": 1080
891
+ },
892
+ {
893
+ "epoch": 8.31,
894
+ "eval_accuracy": 0.7046632124352331,
895
+ "eval_loss": 1.3094946146011353,
896
+ "eval_runtime": 20.1865,
897
+ "eval_samples_per_second": 57.365,
898
+ "eval_steps_per_second": 7.183,
899
+ "step": 1080
900
+ },
901
+ {
902
+ "epoch": 8.38,
903
+ "learning_rate": 8.820512820512821e-05,
904
+ "loss": 0.0364,
905
+ "step": 1090
906
+ },
907
+ {
908
+ "epoch": 8.46,
909
+ "learning_rate": 8.717948717948718e-05,
910
+ "loss": 0.0098,
911
+ "step": 1100
912
+ },
913
+ {
914
+ "epoch": 8.54,
915
+ "learning_rate": 8.615384615384617e-05,
916
+ "loss": 0.0462,
917
+ "step": 1110
918
+ },
919
+ {
920
+ "epoch": 8.62,
921
+ "learning_rate": 8.512820512820513e-05,
922
+ "loss": 0.0093,
923
+ "step": 1120
924
+ },
925
+ {
926
+ "epoch": 8.62,
927
+ "eval_accuracy": 0.7124352331606217,
928
+ "eval_loss": 1.3362736701965332,
929
+ "eval_runtime": 20.3723,
930
+ "eval_samples_per_second": 56.842,
931
+ "eval_steps_per_second": 7.117,
932
+ "step": 1120
933
+ },
934
+ {
935
+ "epoch": 8.69,
936
+ "learning_rate": 8.410256410256411e-05,
937
+ "loss": 0.0097,
938
+ "step": 1130
939
+ },
940
+ {
941
+ "epoch": 8.77,
942
+ "learning_rate": 8.307692307692309e-05,
943
+ "loss": 0.0087,
944
+ "step": 1140
945
+ },
946
+ {
947
+ "epoch": 8.85,
948
+ "learning_rate": 8.205128205128205e-05,
949
+ "loss": 0.0087,
950
+ "step": 1150
951
+ },
952
+ {
953
+ "epoch": 8.92,
954
+ "learning_rate": 8.102564102564103e-05,
955
+ "loss": 0.0364,
956
+ "step": 1160
957
+ },
958
+ {
959
+ "epoch": 8.92,
960
+ "eval_accuracy": 0.7089810017271158,
961
+ "eval_loss": 1.340015172958374,
962
+ "eval_runtime": 21.051,
963
+ "eval_samples_per_second": 55.009,
964
+ "eval_steps_per_second": 6.888,
965
+ "step": 1160
966
+ },
967
+ {
968
+ "epoch": 9.0,
969
+ "learning_rate": 8e-05,
970
+ "loss": 0.0087,
971
+ "step": 1170
972
+ },
973
+ {
974
+ "epoch": 9.08,
975
+ "learning_rate": 7.897435897435898e-05,
976
+ "loss": 0.0101,
977
+ "step": 1180
978
+ },
979
+ {
980
+ "epoch": 9.15,
981
+ "learning_rate": 7.794871794871795e-05,
982
+ "loss": 0.0174,
983
+ "step": 1190
984
+ },
985
+ {
986
+ "epoch": 9.23,
987
+ "learning_rate": 7.692307692307693e-05,
988
+ "loss": 0.0324,
989
+ "step": 1200
990
+ },
991
+ {
992
+ "epoch": 9.23,
993
+ "eval_accuracy": 0.7098445595854922,
994
+ "eval_loss": 1.3313075304031372,
995
+ "eval_runtime": 21.981,
996
+ "eval_samples_per_second": 52.682,
997
+ "eval_steps_per_second": 6.597,
998
+ "step": 1200
999
+ },
1000
+ {
1001
+ "epoch": 9.31,
1002
+ "learning_rate": 7.58974358974359e-05,
1003
+ "loss": 0.0076,
1004
+ "step": 1210
1005
+ },
1006
+ {
1007
+ "epoch": 9.38,
1008
+ "learning_rate": 7.487179487179487e-05,
1009
+ "loss": 0.0287,
1010
+ "step": 1220
1011
+ },
1012
+ {
1013
+ "epoch": 9.46,
1014
+ "learning_rate": 7.384615384615386e-05,
1015
+ "loss": 0.0083,
1016
+ "step": 1230
1017
+ },
1018
+ {
1019
+ "epoch": 9.54,
1020
+ "learning_rate": 7.282051282051282e-05,
1021
+ "loss": 0.0076,
1022
+ "step": 1240
1023
+ },
1024
+ {
1025
+ "epoch": 9.54,
1026
+ "eval_accuracy": 0.7072538860103627,
1027
+ "eval_loss": 1.3641475439071655,
1028
+ "eval_runtime": 20.2849,
1029
+ "eval_samples_per_second": 57.087,
1030
+ "eval_steps_per_second": 7.148,
1031
+ "step": 1240
1032
+ },
1033
+ {
1034
+ "epoch": 9.62,
1035
+ "learning_rate": 7.17948717948718e-05,
1036
+ "loss": 0.0076,
1037
+ "step": 1250
1038
+ },
1039
+ {
1040
+ "epoch": 9.69,
1041
+ "learning_rate": 7.076923076923078e-05,
1042
+ "loss": 0.0075,
1043
+ "step": 1260
1044
+ },
1045
+ {
1046
+ "epoch": 9.77,
1047
+ "learning_rate": 6.974358974358974e-05,
1048
+ "loss": 0.0078,
1049
+ "step": 1270
1050
+ },
1051
+ {
1052
+ "epoch": 9.85,
1053
+ "learning_rate": 6.871794871794872e-05,
1054
+ "loss": 0.0076,
1055
+ "step": 1280
1056
+ },
1057
+ {
1058
+ "epoch": 9.85,
1059
+ "eval_accuracy": 0.7098445595854922,
1060
+ "eval_loss": 1.357639193534851,
1061
+ "eval_runtime": 20.3563,
1062
+ "eval_samples_per_second": 56.887,
1063
+ "eval_steps_per_second": 7.123,
1064
+ "step": 1280
1065
+ },
1066
+ {
1067
+ "epoch": 9.92,
1068
+ "learning_rate": 6.76923076923077e-05,
1069
+ "loss": 0.007,
1070
+ "step": 1290
1071
+ },
1072
+ {
1073
+ "epoch": 10.0,
1074
+ "learning_rate": 6.666666666666667e-05,
1075
+ "loss": 0.0075,
1076
+ "step": 1300
1077
+ },
1078
+ {
1079
+ "epoch": 10.08,
1080
+ "learning_rate": 6.564102564102564e-05,
1081
+ "loss": 0.0126,
1082
+ "step": 1310
1083
+ },
1084
+ {
1085
+ "epoch": 10.15,
1086
+ "learning_rate": 6.461538461538462e-05,
1087
+ "loss": 0.021,
1088
+ "step": 1320
1089
+ },
1090
+ {
1091
+ "epoch": 10.15,
1092
+ "eval_accuracy": 0.7124352331606217,
1093
+ "eval_loss": 1.3460208177566528,
1094
+ "eval_runtime": 20.5844,
1095
+ "eval_samples_per_second": 56.256,
1096
+ "eval_steps_per_second": 7.044,
1097
+ "step": 1320
1098
+ },
1099
+ {
1100
+ "epoch": 10.23,
1101
+ "learning_rate": 6.358974358974359e-05,
1102
+ "loss": 0.0068,
1103
+ "step": 1330
1104
+ },
1105
+ {
1106
+ "epoch": 10.31,
1107
+ "learning_rate": 6.256410256410256e-05,
1108
+ "loss": 0.0068,
1109
+ "step": 1340
1110
+ },
1111
+ {
1112
+ "epoch": 10.38,
1113
+ "learning_rate": 6.153846153846155e-05,
1114
+ "loss": 0.0066,
1115
+ "step": 1350
1116
+ },
1117
+ {
1118
+ "epoch": 10.46,
1119
+ "learning_rate": 6.0512820512820515e-05,
1120
+ "loss": 0.0064,
1121
+ "step": 1360
1122
+ },
1123
+ {
1124
+ "epoch": 10.46,
1125
+ "eval_accuracy": 0.7184801381692574,
1126
+ "eval_loss": 1.3521218299865723,
1127
+ "eval_runtime": 21.5706,
1128
+ "eval_samples_per_second": 53.684,
1129
+ "eval_steps_per_second": 6.722,
1130
+ "step": 1360
1131
+ },
1132
+ {
1133
+ "epoch": 10.54,
1134
+ "learning_rate": 5.948717948717949e-05,
1135
+ "loss": 0.0061,
1136
+ "step": 1370
1137
+ },
1138
+ {
1139
+ "epoch": 10.62,
1140
+ "learning_rate": 5.846153846153847e-05,
1141
+ "loss": 0.0062,
1142
+ "step": 1380
1143
+ },
1144
+ {
1145
+ "epoch": 10.69,
1146
+ "learning_rate": 5.7435897435897434e-05,
1147
+ "loss": 0.0364,
1148
+ "step": 1390
1149
+ },
1150
+ {
1151
+ "epoch": 10.77,
1152
+ "learning_rate": 5.6410256410256414e-05,
1153
+ "loss": 0.0069,
1154
+ "step": 1400
1155
+ },
1156
+ {
1157
+ "epoch": 10.77,
1158
+ "eval_accuracy": 0.7141623488773747,
1159
+ "eval_loss": 1.3537815809249878,
1160
+ "eval_runtime": 20.0789,
1161
+ "eval_samples_per_second": 57.672,
1162
+ "eval_steps_per_second": 7.221,
1163
+ "step": 1400
1164
+ },
1165
+ {
1166
+ "epoch": 10.85,
1167
+ "learning_rate": 5.538461538461539e-05,
1168
+ "loss": 0.0069,
1169
+ "step": 1410
1170
+ },
1171
+ {
1172
+ "epoch": 10.92,
1173
+ "learning_rate": 5.435897435897436e-05,
1174
+ "loss": 0.0068,
1175
+ "step": 1420
1176
+ },
1177
+ {
1178
+ "epoch": 11.0,
1179
+ "learning_rate": 5.333333333333333e-05,
1180
+ "loss": 0.0063,
1181
+ "step": 1430
1182
+ },
1183
+ {
1184
+ "epoch": 11.08,
1185
+ "learning_rate": 5.230769230769231e-05,
1186
+ "loss": 0.0101,
1187
+ "step": 1440
1188
+ },
1189
+ {
1190
+ "epoch": 11.08,
1191
+ "eval_accuracy": 0.7150259067357513,
1192
+ "eval_loss": 1.3637244701385498,
1193
+ "eval_runtime": 20.418,
1194
+ "eval_samples_per_second": 56.715,
1195
+ "eval_steps_per_second": 7.102,
1196
+ "step": 1440
1197
+ },
1198
+ {
1199
+ "epoch": 11.15,
1200
+ "learning_rate": 5.128205128205128e-05,
1201
+ "loss": 0.0094,
1202
+ "step": 1450
1203
+ },
1204
+ {
1205
+ "epoch": 11.23,
1206
+ "learning_rate": 5.025641025641026e-05,
1207
+ "loss": 0.0152,
1208
+ "step": 1460
1209
+ },
1210
+ {
1211
+ "epoch": 11.31,
1212
+ "learning_rate": 4.923076923076924e-05,
1213
+ "loss": 0.006,
1214
+ "step": 1470
1215
+ },
1216
+ {
1217
+ "epoch": 11.38,
1218
+ "learning_rate": 4.8205128205128205e-05,
1219
+ "loss": 0.0115,
1220
+ "step": 1480
1221
+ },
1222
+ {
1223
+ "epoch": 11.38,
1224
+ "eval_accuracy": 0.7081174438687392,
1225
+ "eval_loss": 1.3893637657165527,
1226
+ "eval_runtime": 21.0579,
1227
+ "eval_samples_per_second": 54.991,
1228
+ "eval_steps_per_second": 6.886,
1229
+ "step": 1480
1230
+ },
1231
+ {
1232
+ "epoch": 11.46,
1233
+ "learning_rate": 4.717948717948718e-05,
1234
+ "loss": 0.0062,
1235
+ "step": 1490
1236
+ },
1237
+ {
1238
+ "epoch": 11.54,
1239
+ "learning_rate": 4.615384615384616e-05,
1240
+ "loss": 0.0058,
1241
+ "step": 1500
1242
+ },
1243
+ {
1244
+ "epoch": 11.62,
1245
+ "learning_rate": 4.512820512820513e-05,
1246
+ "loss": 0.0058,
1247
+ "step": 1510
1248
+ },
1249
+ {
1250
+ "epoch": 11.69,
1251
+ "learning_rate": 4.4102564102564104e-05,
1252
+ "loss": 0.0062,
1253
+ "step": 1520
1254
+ },
1255
+ {
1256
+ "epoch": 11.69,
1257
+ "eval_accuracy": 0.7072538860103627,
1258
+ "eval_loss": 1.3909190893173218,
1259
+ "eval_runtime": 20.5699,
1260
+ "eval_samples_per_second": 56.296,
1261
+ "eval_steps_per_second": 7.049,
1262
+ "step": 1520
1263
+ },
1264
+ {
1265
+ "epoch": 11.77,
1266
+ "learning_rate": 4.3076923076923084e-05,
1267
+ "loss": 0.0171,
1268
+ "step": 1530
1269
+ },
1270
+ {
1271
+ "epoch": 11.85,
1272
+ "learning_rate": 4.205128205128206e-05,
1273
+ "loss": 0.0159,
1274
+ "step": 1540
1275
+ },
1276
+ {
1277
+ "epoch": 11.92,
1278
+ "learning_rate": 4.1025641025641023e-05,
1279
+ "loss": 0.0054,
1280
+ "step": 1550
1281
+ },
1282
+ {
1283
+ "epoch": 12.0,
1284
+ "learning_rate": 4e-05,
1285
+ "loss": 0.0055,
1286
+ "step": 1560
1287
+ },
1288
+ {
1289
+ "epoch": 12.0,
1290
+ "eval_accuracy": 0.7124352331606217,
1291
+ "eval_loss": 1.3742793798446655,
1292
+ "eval_runtime": 20.0602,
1293
+ "eval_samples_per_second": 57.726,
1294
+ "eval_steps_per_second": 7.228,
1295
+ "step": 1560
1296
+ },
1297
+ {
1298
+ "epoch": 12.08,
1299
+ "learning_rate": 3.8974358974358976e-05,
1300
+ "loss": 0.0056,
1301
+ "step": 1570
1302
+ },
1303
+ {
1304
+ "epoch": 12.15,
1305
+ "learning_rate": 3.794871794871795e-05,
1306
+ "loss": 0.0234,
1307
+ "step": 1580
1308
+ },
1309
+ {
1310
+ "epoch": 12.23,
1311
+ "learning_rate": 3.692307692307693e-05,
1312
+ "loss": 0.0053,
1313
+ "step": 1590
1314
+ },
1315
+ {
1316
+ "epoch": 12.31,
1317
+ "learning_rate": 3.58974358974359e-05,
1318
+ "loss": 0.0054,
1319
+ "step": 1600
1320
+ },
1321
+ {
1322
+ "epoch": 12.31,
1323
+ "eval_accuracy": 0.7158894645941278,
1324
+ "eval_loss": 1.3830854892730713,
1325
+ "eval_runtime": 20.2514,
1326
+ "eval_samples_per_second": 57.181,
1327
+ "eval_steps_per_second": 7.16,
1328
+ "step": 1600
1329
+ },
1330
+ {
1331
+ "epoch": 12.38,
1332
+ "learning_rate": 3.487179487179487e-05,
1333
+ "loss": 0.0052,
1334
+ "step": 1610
1335
+ },
1336
+ {
1337
+ "epoch": 12.46,
1338
+ "learning_rate": 3.384615384615385e-05,
1339
+ "loss": 0.0052,
1340
+ "step": 1620
1341
+ },
1342
+ {
1343
+ "epoch": 12.54,
1344
+ "learning_rate": 3.282051282051282e-05,
1345
+ "loss": 0.0056,
1346
+ "step": 1630
1347
+ },
1348
+ {
1349
+ "epoch": 12.62,
1350
+ "learning_rate": 3.1794871794871795e-05,
1351
+ "loss": 0.0054,
1352
+ "step": 1640
1353
+ },
1354
+ {
1355
+ "epoch": 12.62,
1356
+ "eval_accuracy": 0.7167530224525043,
1357
+ "eval_loss": 1.3866362571716309,
1358
+ "eval_runtime": 21.0166,
1359
+ "eval_samples_per_second": 55.099,
1360
+ "eval_steps_per_second": 6.899,
1361
+ "step": 1640
1362
+ },
1363
+ {
1364
+ "epoch": 12.69,
1365
+ "learning_rate": 3.0769230769230774e-05,
1366
+ "loss": 0.0052,
1367
+ "step": 1650
1368
+ },
1369
+ {
1370
+ "epoch": 12.77,
1371
+ "learning_rate": 2.9743589743589744e-05,
1372
+ "loss": 0.0052,
1373
+ "step": 1660
1374
+ },
1375
+ {
1376
+ "epoch": 12.85,
1377
+ "learning_rate": 2.8717948717948717e-05,
1378
+ "loss": 0.0058,
1379
+ "step": 1670
1380
+ },
1381
+ {
1382
+ "epoch": 12.92,
1383
+ "learning_rate": 2.7692307692307694e-05,
1384
+ "loss": 0.0147,
1385
+ "step": 1680
1386
+ },
1387
+ {
1388
+ "epoch": 12.92,
1389
+ "eval_accuracy": 0.7150259067357513,
1390
+ "eval_loss": 1.391139030456543,
1391
+ "eval_runtime": 20.4472,
1392
+ "eval_samples_per_second": 56.634,
1393
+ "eval_steps_per_second": 7.091,
1394
+ "step": 1680
1395
+ },
1396
+ {
1397
+ "epoch": 13.0,
1398
+ "learning_rate": 2.6666666666666667e-05,
1399
+ "loss": 0.0051,
1400
+ "step": 1690
1401
+ },
1402
+ {
1403
+ "epoch": 13.08,
1404
+ "learning_rate": 2.564102564102564e-05,
1405
+ "loss": 0.0144,
1406
+ "step": 1700
1407
+ },
1408
+ {
1409
+ "epoch": 13.15,
1410
+ "learning_rate": 2.461538461538462e-05,
1411
+ "loss": 0.0052,
1412
+ "step": 1710
1413
+ },
1414
+ {
1415
+ "epoch": 13.23,
1416
+ "learning_rate": 2.358974358974359e-05,
1417
+ "loss": 0.0058,
1418
+ "step": 1720
1419
+ },
1420
+ {
1421
+ "epoch": 13.23,
1422
+ "eval_accuracy": 0.7141623488773747,
1423
+ "eval_loss": 1.3986831903457642,
1424
+ "eval_runtime": 20.1538,
1425
+ "eval_samples_per_second": 57.458,
1426
+ "eval_steps_per_second": 7.195,
1427
+ "step": 1720
1428
+ },
1429
+ {
1430
+ "epoch": 13.31,
1431
+ "learning_rate": 2.2564102564102566e-05,
1432
+ "loss": 0.0047,
1433
+ "step": 1730
1434
+ },
1435
+ {
1436
+ "epoch": 13.38,
1437
+ "learning_rate": 2.1538461538461542e-05,
1438
+ "loss": 0.0053,
1439
+ "step": 1740
1440
+ },
1441
+ {
1442
+ "epoch": 13.46,
1443
+ "learning_rate": 2.0512820512820512e-05,
1444
+ "loss": 0.005,
1445
+ "step": 1750
1446
+ },
1447
+ {
1448
+ "epoch": 13.54,
1449
+ "learning_rate": 1.9487179487179488e-05,
1450
+ "loss": 0.0096,
1451
+ "step": 1760
1452
+ },
1453
+ {
1454
+ "epoch": 13.54,
1455
+ "eval_accuracy": 0.7132987910189983,
1456
+ "eval_loss": 1.3994832038879395,
1457
+ "eval_runtime": 20.378,
1458
+ "eval_samples_per_second": 56.826,
1459
+ "eval_steps_per_second": 7.116,
1460
+ "step": 1760
1461
+ },
1462
+ {
1463
+ "epoch": 13.62,
1464
+ "learning_rate": 1.8461538461538465e-05,
1465
+ "loss": 0.0055,
1466
+ "step": 1770
1467
+ },
1468
+ {
1469
+ "epoch": 13.69,
1470
+ "learning_rate": 1.7435897435897434e-05,
1471
+ "loss": 0.0112,
1472
+ "step": 1780
1473
+ },
1474
+ {
1475
+ "epoch": 13.77,
1476
+ "learning_rate": 1.641025641025641e-05,
1477
+ "loss": 0.0047,
1478
+ "step": 1790
1479
+ },
1480
+ {
1481
+ "epoch": 13.85,
1482
+ "learning_rate": 1.5384615384615387e-05,
1483
+ "loss": 0.0048,
1484
+ "step": 1800
1485
+ },
1486
+ {
1487
+ "epoch": 13.85,
1488
+ "eval_accuracy": 0.7115716753022453,
1489
+ "eval_loss": 1.401125192642212,
1490
+ "eval_runtime": 20.4927,
1491
+ "eval_samples_per_second": 56.508,
1492
+ "eval_steps_per_second": 7.076,
1493
+ "step": 1800
1494
+ },
1495
+ {
1496
+ "epoch": 13.92,
1497
+ "learning_rate": 1.4358974358974359e-05,
1498
+ "loss": 0.0053,
1499
+ "step": 1810
1500
+ },
1501
+ {
1502
+ "epoch": 14.0,
1503
+ "learning_rate": 1.3333333333333333e-05,
1504
+ "loss": 0.0047,
1505
+ "step": 1820
1506
+ },
1507
+ {
1508
+ "epoch": 14.08,
1509
+ "learning_rate": 1.230769230769231e-05,
1510
+ "loss": 0.0146,
1511
+ "step": 1830
1512
+ },
1513
+ {
1514
+ "epoch": 14.15,
1515
+ "learning_rate": 1.1282051282051283e-05,
1516
+ "loss": 0.0054,
1517
+ "step": 1840
1518
+ },
1519
+ {
1520
+ "epoch": 14.15,
1521
+ "eval_accuracy": 0.7115716753022453,
1522
+ "eval_loss": 1.4026780128479004,
1523
+ "eval_runtime": 20.8793,
1524
+ "eval_samples_per_second": 55.462,
1525
+ "eval_steps_per_second": 6.945,
1526
+ "step": 1840
1527
+ },
1528
+ {
1529
+ "epoch": 14.23,
1530
+ "learning_rate": 1.0256410256410256e-05,
1531
+ "loss": 0.0046,
1532
+ "step": 1850
1533
+ },
1534
+ {
1535
+ "epoch": 14.31,
1536
+ "learning_rate": 9.230769230769232e-06,
1537
+ "loss": 0.0046,
1538
+ "step": 1860
1539
+ },
1540
+ {
1541
+ "epoch": 14.38,
1542
+ "learning_rate": 8.205128205128205e-06,
1543
+ "loss": 0.005,
1544
+ "step": 1870
1545
+ },
1546
+ {
1547
+ "epoch": 14.46,
1548
+ "learning_rate": 7.179487179487179e-06,
1549
+ "loss": 0.0048,
1550
+ "step": 1880
1551
+ },
1552
+ {
1553
+ "epoch": 14.46,
1554
+ "eval_accuracy": 0.7115716753022453,
1555
+ "eval_loss": 1.403483510017395,
1556
+ "eval_runtime": 21.9216,
1557
+ "eval_samples_per_second": 52.825,
1558
+ "eval_steps_per_second": 6.614,
1559
+ "step": 1880
1560
+ },
1561
+ {
1562
+ "epoch": 14.54,
1563
+ "learning_rate": 6.153846153846155e-06,
1564
+ "loss": 0.0089,
1565
+ "step": 1890
1566
+ },
1567
+ {
1568
+ "epoch": 14.62,
1569
+ "learning_rate": 5.128205128205128e-06,
1570
+ "loss": 0.0047,
1571
+ "step": 1900
1572
+ },
1573
+ {
1574
+ "epoch": 14.69,
1575
+ "learning_rate": 4.102564102564103e-06,
1576
+ "loss": 0.0048,
1577
+ "step": 1910
1578
+ },
1579
+ {
1580
+ "epoch": 14.77,
1581
+ "learning_rate": 3.0769230769230774e-06,
1582
+ "loss": 0.0047,
1583
+ "step": 1920
1584
+ },
1585
+ {
1586
+ "epoch": 14.77,
1587
+ "eval_accuracy": 0.7124352331606217,
1588
+ "eval_loss": 1.4049346446990967,
1589
+ "eval_runtime": 20.2662,
1590
+ "eval_samples_per_second": 57.139,
1591
+ "eval_steps_per_second": 7.155,
1592
+ "step": 1920
1593
+ }
1594
+ ],
1595
+ "max_steps": 1950,
1596
+ "num_train_epochs": 15,
1597
+ "total_flos": 2.3789170628711793e+18,
1598
+ "trial_name": null,
1599
+ "trial_params": null
1600
+ }
checkpoint-1920/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a67954eb3bb115d5fc916b53e59475d474112dfa86034717eadf5ee507e440
3
+ size 3963
checkpoint-480/config.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Abstract art",
13
+ "1": "African Art",
14
+ "10": "Gothic art",
15
+ "11": "Graffiti",
16
+ "12": "Impressionism and Post-Impressionism",
17
+ "13": "Islamic Art",
18
+ "14": "Japanese Art",
19
+ "15": "Late Baroque (Rococo) art",
20
+ "16": "Latin American muralism",
21
+ "17": "Medieval art",
22
+ "18": "Minimalism paintings",
23
+ "19": "Polynesian Art",
24
+ "2": "Amazonian Art",
25
+ "20": "Pop art",
26
+ "21": "Realism paintings",
27
+ "22": "Renaissance paintings",
28
+ "23": "Surrealist paintings",
29
+ "24": "cubism",
30
+ "25": "expressionism",
31
+ "26": "romanticism",
32
+ "3": "Ancient Egyptian and Nubian Art",
33
+ "4": "Ancient and Traditional Chinese Art",
34
+ "5": "Anime art",
35
+ "6": "Art Nouveau paintings",
36
+ "7": "Baroque paintings",
37
+ "8": "Cave Art",
38
+ "9": "Constructivsm art"
39
+ },
40
+ "image_size": 224,
41
+ "initializer_range": 0.02,
42
+ "intermediate_size": 3072,
43
+ "label2id": {
44
+ "Abstract art": "0",
45
+ "African Art": "1",
46
+ "Amazonian Art": "2",
47
+ "Ancient Egyptian and Nubian Art": "3",
48
+ "Ancient and Traditional Chinese Art": "4",
49
+ "Anime art": "5",
50
+ "Art Nouveau paintings": "6",
51
+ "Baroque paintings": "7",
52
+ "Cave Art": "8",
53
+ "Constructivsm art": "9",
54
+ "Gothic art": "10",
55
+ "Graffiti": "11",
56
+ "Impressionism and Post-Impressionism": "12",
57
+ "Islamic Art": "13",
58
+ "Japanese Art": "14",
59
+ "Late Baroque (Rococo) art": "15",
60
+ "Latin American muralism": "16",
61
+ "Medieval art": "17",
62
+ "Minimalism paintings": "18",
63
+ "Polynesian Art": "19",
64
+ "Pop art": "20",
65
+ "Realism paintings": "21",
66
+ "Renaissance paintings": "22",
67
+ "Surrealist paintings": "23",
68
+ "cubism": "24",
69
+ "expressionism": "25",
70
+ "romanticism": "26"
71
+ },
72
+ "layer_norm_eps": 1e-12,
73
+ "model_type": "vit",
74
+ "num_attention_heads": 12,
75
+ "num_channels": 3,
76
+ "num_hidden_layers": 12,
77
+ "patch_size": 16,
78
+ "problem_type": "single_label_classification",
79
+ "qkv_bias": true,
80
+ "torch_dtype": "float32",
81
+ "transformers_version": "4.31.0"
82
+ }
checkpoint-480/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39866bc41496272bc97e4eb5968b35d4f278ba322a32506a61fce312ffb9ad31
3
+ size 686672645
checkpoint-480/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-480/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d75c61e9648e429aee5951f10433b6ff7c7100220be70b8d072c24daa7f73ea
3
+ size 343345581
checkpoint-480/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e0f30c7754637247f97149558697a4932915f961818f7360cfc62855116ca0
3
+ size 14575
checkpoint-480/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce08fd87a10ccfe0d9420fd259547ccd82600e52d1f525c9099c20257bd49dca
3
+ size 627
checkpoint-480/trainer_state.json ADDED
@@ -0,0 +1,412 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.1361786127090454,
3
+ "best_model_checkpoint": "./vit-base-beans/checkpoint-480",
4
+ "epoch": 3.6923076923076925,
5
+ "global_step": 480,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 0.00019897435897435898,
13
+ "loss": 3.2519,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.15,
18
+ "learning_rate": 0.00019794871794871796,
19
+ "loss": 3.1095,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.23,
24
+ "learning_rate": 0.00019692307692307696,
25
+ "loss": 2.9002,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.31,
30
+ "learning_rate": 0.0001958974358974359,
31
+ "loss": 2.7917,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.31,
36
+ "eval_accuracy": 0.35233160621761656,
37
+ "eval_loss": 2.6724953651428223,
38
+ "eval_runtime": 20.1054,
39
+ "eval_samples_per_second": 57.596,
40
+ "eval_steps_per_second": 7.212,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 0.38,
45
+ "learning_rate": 0.00019487179487179487,
46
+ "loss": 2.6291,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 0.46,
51
+ "learning_rate": 0.00019384615384615385,
52
+ "loss": 2.492,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 0.54,
57
+ "learning_rate": 0.00019282051282051282,
58
+ "loss": 2.4302,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.62,
63
+ "learning_rate": 0.00019179487179487182,
64
+ "loss": 2.3207,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.62,
69
+ "eval_accuracy": 0.4481865284974093,
70
+ "eval_loss": 2.263643980026245,
71
+ "eval_runtime": 20.6875,
72
+ "eval_samples_per_second": 55.976,
73
+ "eval_steps_per_second": 7.009,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 0.69,
78
+ "learning_rate": 0.0001907692307692308,
79
+ "loss": 2.1778,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 0.77,
84
+ "learning_rate": 0.00018974358974358974,
85
+ "loss": 2.1215,
86
+ "step": 100
87
+ },
88
+ {
89
+ "epoch": 0.85,
90
+ "learning_rate": 0.0001887179487179487,
91
+ "loss": 2.0333,
92
+ "step": 110
93
+ },
94
+ {
95
+ "epoch": 0.92,
96
+ "learning_rate": 0.0001876923076923077,
97
+ "loss": 1.9258,
98
+ "step": 120
99
+ },
100
+ {
101
+ "epoch": 0.92,
102
+ "eval_accuracy": 0.5535405872193437,
103
+ "eval_loss": 1.8725202083587646,
104
+ "eval_runtime": 19.974,
105
+ "eval_samples_per_second": 57.975,
106
+ "eval_steps_per_second": 7.259,
107
+ "step": 120
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "learning_rate": 0.0001866666666666667,
112
+ "loss": 1.981,
113
+ "step": 130
114
+ },
115
+ {
116
+ "epoch": 1.08,
117
+ "learning_rate": 0.00018564102564102566,
118
+ "loss": 1.5576,
119
+ "step": 140
120
+ },
121
+ {
122
+ "epoch": 1.15,
123
+ "learning_rate": 0.00018461538461538463,
124
+ "loss": 1.4327,
125
+ "step": 150
126
+ },
127
+ {
128
+ "epoch": 1.23,
129
+ "learning_rate": 0.00018358974358974358,
130
+ "loss": 1.4339,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 1.23,
135
+ "eval_accuracy": 0.6260794473229706,
136
+ "eval_loss": 1.5738734006881714,
137
+ "eval_runtime": 20.2011,
138
+ "eval_samples_per_second": 57.323,
139
+ "eval_steps_per_second": 7.178,
140
+ "step": 160
141
+ },
142
+ {
143
+ "epoch": 1.31,
144
+ "learning_rate": 0.00018256410256410258,
145
+ "loss": 1.2945,
146
+ "step": 170
147
+ },
148
+ {
149
+ "epoch": 1.38,
150
+ "learning_rate": 0.00018153846153846155,
151
+ "loss": 1.3482,
152
+ "step": 180
153
+ },
154
+ {
155
+ "epoch": 1.46,
156
+ "learning_rate": 0.00018051282051282052,
157
+ "loss": 1.2537,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 1.54,
162
+ "learning_rate": 0.0001794871794871795,
163
+ "loss": 1.3328,
164
+ "step": 200
165
+ },
166
+ {
167
+ "epoch": 1.54,
168
+ "eval_accuracy": 0.6226252158894646,
169
+ "eval_loss": 1.484718918800354,
170
+ "eval_runtime": 20.1617,
171
+ "eval_samples_per_second": 57.436,
172
+ "eval_steps_per_second": 7.192,
173
+ "step": 200
174
+ },
175
+ {
176
+ "epoch": 1.62,
177
+ "learning_rate": 0.00017846153846153847,
178
+ "loss": 1.1274,
179
+ "step": 210
180
+ },
181
+ {
182
+ "epoch": 1.69,
183
+ "learning_rate": 0.00017743589743589744,
184
+ "loss": 1.169,
185
+ "step": 220
186
+ },
187
+ {
188
+ "epoch": 1.77,
189
+ "learning_rate": 0.00017641025641025642,
190
+ "loss": 1.1404,
191
+ "step": 230
192
+ },
193
+ {
194
+ "epoch": 1.85,
195
+ "learning_rate": 0.0001753846153846154,
196
+ "loss": 1.0914,
197
+ "step": 240
198
+ },
199
+ {
200
+ "epoch": 1.85,
201
+ "eval_accuracy": 0.6442141623488774,
202
+ "eval_loss": 1.3511847257614136,
203
+ "eval_runtime": 19.8926,
204
+ "eval_samples_per_second": 58.213,
205
+ "eval_steps_per_second": 7.289,
206
+ "step": 240
207
+ },
208
+ {
209
+ "epoch": 1.92,
210
+ "learning_rate": 0.00017435897435897436,
211
+ "loss": 1.1632,
212
+ "step": 250
213
+ },
214
+ {
215
+ "epoch": 2.0,
216
+ "learning_rate": 0.00017333333333333334,
217
+ "loss": 1.029,
218
+ "step": 260
219
+ },
220
+ {
221
+ "epoch": 2.08,
222
+ "learning_rate": 0.00017230769230769234,
223
+ "loss": 0.7509,
224
+ "step": 270
225
+ },
226
+ {
227
+ "epoch": 2.15,
228
+ "learning_rate": 0.00017128205128205128,
229
+ "loss": 0.6594,
230
+ "step": 280
231
+ },
232
+ {
233
+ "epoch": 2.15,
234
+ "eval_accuracy": 0.6450777202072538,
235
+ "eval_loss": 1.2636480331420898,
236
+ "eval_runtime": 20.434,
237
+ "eval_samples_per_second": 56.67,
238
+ "eval_steps_per_second": 7.096,
239
+ "step": 280
240
+ },
241
+ {
242
+ "epoch": 2.23,
243
+ "learning_rate": 0.00017025641025641026,
244
+ "loss": 0.557,
245
+ "step": 290
246
+ },
247
+ {
248
+ "epoch": 2.31,
249
+ "learning_rate": 0.00016923076923076923,
250
+ "loss": 0.5061,
251
+ "step": 300
252
+ },
253
+ {
254
+ "epoch": 2.38,
255
+ "learning_rate": 0.00016820512820512823,
256
+ "loss": 0.53,
257
+ "step": 310
258
+ },
259
+ {
260
+ "epoch": 2.46,
261
+ "learning_rate": 0.0001671794871794872,
262
+ "loss": 0.5651,
263
+ "step": 320
264
+ },
265
+ {
266
+ "epoch": 2.46,
267
+ "eval_accuracy": 0.6649395509499136,
268
+ "eval_loss": 1.2330708503723145,
269
+ "eval_runtime": 20.6893,
270
+ "eval_samples_per_second": 55.971,
271
+ "eval_steps_per_second": 7.008,
272
+ "step": 320
273
+ },
274
+ {
275
+ "epoch": 2.54,
276
+ "learning_rate": 0.00016615384615384617,
277
+ "loss": 0.5417,
278
+ "step": 330
279
+ },
280
+ {
281
+ "epoch": 2.62,
282
+ "learning_rate": 0.00016512820512820512,
283
+ "loss": 0.5584,
284
+ "step": 340
285
+ },
286
+ {
287
+ "epoch": 2.69,
288
+ "learning_rate": 0.0001641025641025641,
289
+ "loss": 0.6261,
290
+ "step": 350
291
+ },
292
+ {
293
+ "epoch": 2.77,
294
+ "learning_rate": 0.0001630769230769231,
295
+ "loss": 0.6847,
296
+ "step": 360
297
+ },
298
+ {
299
+ "epoch": 2.77,
300
+ "eval_accuracy": 0.6381692573402418,
301
+ "eval_loss": 1.3150428533554077,
302
+ "eval_runtime": 20.8049,
303
+ "eval_samples_per_second": 55.66,
304
+ "eval_steps_per_second": 6.97,
305
+ "step": 360
306
+ },
307
+ {
308
+ "epoch": 2.85,
309
+ "learning_rate": 0.00016205128205128207,
310
+ "loss": 0.6079,
311
+ "step": 370
312
+ },
313
+ {
314
+ "epoch": 2.92,
315
+ "learning_rate": 0.00016102564102564104,
316
+ "loss": 0.6087,
317
+ "step": 380
318
+ },
319
+ {
320
+ "epoch": 3.0,
321
+ "learning_rate": 0.00016,
322
+ "loss": 0.6049,
323
+ "step": 390
324
+ },
325
+ {
326
+ "epoch": 3.08,
327
+ "learning_rate": 0.00015897435897435896,
328
+ "loss": 0.2965,
329
+ "step": 400
330
+ },
331
+ {
332
+ "epoch": 3.08,
333
+ "eval_accuracy": 0.6735751295336787,
334
+ "eval_loss": 1.1951243877410889,
335
+ "eval_runtime": 20.3022,
336
+ "eval_samples_per_second": 57.038,
337
+ "eval_steps_per_second": 7.142,
338
+ "step": 400
339
+ },
340
+ {
341
+ "epoch": 3.15,
342
+ "learning_rate": 0.00015794871794871796,
343
+ "loss": 0.3409,
344
+ "step": 410
345
+ },
346
+ {
347
+ "epoch": 3.23,
348
+ "learning_rate": 0.00015692307692307693,
349
+ "loss": 0.2707,
350
+ "step": 420
351
+ },
352
+ {
353
+ "epoch": 3.31,
354
+ "learning_rate": 0.0001558974358974359,
355
+ "loss": 0.2439,
356
+ "step": 430
357
+ },
358
+ {
359
+ "epoch": 3.38,
360
+ "learning_rate": 0.00015487179487179488,
361
+ "loss": 0.3052,
362
+ "step": 440
363
+ },
364
+ {
365
+ "epoch": 3.38,
366
+ "eval_accuracy": 0.5915371329879102,
367
+ "eval_loss": 1.4183249473571777,
368
+ "eval_runtime": 19.9664,
369
+ "eval_samples_per_second": 57.997,
370
+ "eval_steps_per_second": 7.262,
371
+ "step": 440
372
+ },
373
+ {
374
+ "epoch": 3.46,
375
+ "learning_rate": 0.00015384615384615385,
376
+ "loss": 0.2819,
377
+ "step": 450
378
+ },
379
+ {
380
+ "epoch": 3.54,
381
+ "learning_rate": 0.00015282051282051282,
382
+ "loss": 0.2755,
383
+ "step": 460
384
+ },
385
+ {
386
+ "epoch": 3.62,
387
+ "learning_rate": 0.0001517948717948718,
388
+ "loss": 0.2574,
389
+ "step": 470
390
+ },
391
+ {
392
+ "epoch": 3.69,
393
+ "learning_rate": 0.00015076923076923077,
394
+ "loss": 0.2592,
395
+ "step": 480
396
+ },
397
+ {
398
+ "epoch": 3.69,
399
+ "eval_accuracy": 0.697754749568221,
400
+ "eval_loss": 1.1361786127090454,
401
+ "eval_runtime": 19.9742,
402
+ "eval_samples_per_second": 57.975,
403
+ "eval_steps_per_second": 7.259,
404
+ "step": 480
405
+ }
406
+ ],
407
+ "max_steps": 1950,
408
+ "num_train_epochs": 15,
409
+ "total_flos": 5.948067750708142e+17,
410
+ "trial_name": null,
411
+ "trial_params": null
412
+ }
checkpoint-480/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a67954eb3bb115d5fc916b53e59475d474112dfa86034717eadf5ee507e440
3
+ size 3963
config.json ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Abstract art",
13
+ "1": "African Art",
14
+ "10": "Gothic art",
15
+ "11": "Graffiti",
16
+ "12": "Impressionism and Post-Impressionism",
17
+ "13": "Islamic Art",
18
+ "14": "Japanese Art",
19
+ "15": "Late Baroque (Rococo) art",
20
+ "16": "Latin American muralism",
21
+ "17": "Medieval art",
22
+ "18": "Minimalism paintings",
23
+ "19": "Polynesian Art",
24
+ "2": "Amazonian Art",
25
+ "20": "Pop art",
26
+ "21": "Realism paintings",
27
+ "22": "Renaissance paintings",
28
+ "23": "Surrealist paintings",
29
+ "24": "cubism",
30
+ "25": "expressionism",
31
+ "26": "romanticism",
32
+ "3": "Ancient Egyptian and Nubian Art",
33
+ "4": "Ancient and Traditional Chinese Art",
34
+ "5": "Anime art",
35
+ "6": "Art Nouveau paintings",
36
+ "7": "Baroque paintings",
37
+ "8": "Cave Art",
38
+ "9": "Constructivsm art"
39
+ },
40
+ "image_size": 224,
41
+ "initializer_range": 0.02,
42
+ "intermediate_size": 3072,
43
+ "label2id": {
44
+ "Abstract art": "0",
45
+ "African Art": "1",
46
+ "Amazonian Art": "2",
47
+ "Ancient Egyptian and Nubian Art": "3",
48
+ "Ancient and Traditional Chinese Art": "4",
49
+ "Anime art": "5",
50
+ "Art Nouveau paintings": "6",
51
+ "Baroque paintings": "7",
52
+ "Cave Art": "8",
53
+ "Constructivsm art": "9",
54
+ "Gothic art": "10",
55
+ "Graffiti": "11",
56
+ "Impressionism and Post-Impressionism": "12",
57
+ "Islamic Art": "13",
58
+ "Japanese Art": "14",
59
+ "Late Baroque (Rococo) art": "15",
60
+ "Latin American muralism": "16",
61
+ "Medieval art": "17",
62
+ "Minimalism paintings": "18",
63
+ "Polynesian Art": "19",
64
+ "Pop art": "20",
65
+ "Realism paintings": "21",
66
+ "Renaissance paintings": "22",
67
+ "Surrealist paintings": "23",
68
+ "cubism": "24",
69
+ "expressionism": "25",
70
+ "romanticism": "26"
71
+ },
72
+ "layer_norm_eps": 1e-12,
73
+ "model_type": "vit",
74
+ "num_attention_heads": 12,
75
+ "num_channels": 3,
76
+ "num_hidden_layers": 12,
77
+ "patch_size": 16,
78
+ "problem_type": "single_label_classification",
79
+ "qkv_bias": true,
80
+ "torch_dtype": "float32",
81
+ "transformers_version": "4.31.0"
82
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTFeatureExtractor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d75c61e9648e429aee5951f10433b6ff7c7100220be70b8d072c24daa7f73ea
3
+ size 343345581
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "total_flos": 2.4159665336144486e+18,
4
+ "train_loss": 0.33013623780929124,
5
+ "train_runtime": 2443.4946,
6
+ "train_samples_per_second": 12.756,
7
+ "train_steps_per_second": 0.798
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1627 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.1361786127090454,
3
+ "best_model_checkpoint": "./vit-base-beans/checkpoint-480",
4
+ "epoch": 15.0,
5
+ "global_step": 1950,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 0.00019897435897435898,
13
+ "loss": 3.2519,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.15,
18
+ "learning_rate": 0.00019794871794871796,
19
+ "loss": 3.1095,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.23,
24
+ "learning_rate": 0.00019692307692307696,
25
+ "loss": 2.9002,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.31,
30
+ "learning_rate": 0.0001958974358974359,
31
+ "loss": 2.7917,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.31,
36
+ "eval_accuracy": 0.35233160621761656,
37
+ "eval_loss": 2.6724953651428223,
38
+ "eval_runtime": 20.1054,
39
+ "eval_samples_per_second": 57.596,
40
+ "eval_steps_per_second": 7.212,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 0.38,
45
+ "learning_rate": 0.00019487179487179487,
46
+ "loss": 2.6291,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 0.46,
51
+ "learning_rate": 0.00019384615384615385,
52
+ "loss": 2.492,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 0.54,
57
+ "learning_rate": 0.00019282051282051282,
58
+ "loss": 2.4302,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.62,
63
+ "learning_rate": 0.00019179487179487182,
64
+ "loss": 2.3207,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.62,
69
+ "eval_accuracy": 0.4481865284974093,
70
+ "eval_loss": 2.263643980026245,
71
+ "eval_runtime": 20.6875,
72
+ "eval_samples_per_second": 55.976,
73
+ "eval_steps_per_second": 7.009,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 0.69,
78
+ "learning_rate": 0.0001907692307692308,
79
+ "loss": 2.1778,
80
+ "step": 90
81
+ },
82
+ {
83
+ "epoch": 0.77,
84
+ "learning_rate": 0.00018974358974358974,
85
+ "loss": 2.1215,
86
+ "step": 100
87
+ },
88
+ {
89
+ "epoch": 0.85,
90
+ "learning_rate": 0.0001887179487179487,
91
+ "loss": 2.0333,
92
+ "step": 110
93
+ },
94
+ {
95
+ "epoch": 0.92,
96
+ "learning_rate": 0.0001876923076923077,
97
+ "loss": 1.9258,
98
+ "step": 120
99
+ },
100
+ {
101
+ "epoch": 0.92,
102
+ "eval_accuracy": 0.5535405872193437,
103
+ "eval_loss": 1.8725202083587646,
104
+ "eval_runtime": 19.974,
105
+ "eval_samples_per_second": 57.975,
106
+ "eval_steps_per_second": 7.259,
107
+ "step": 120
108
+ },
109
+ {
110
+ "epoch": 1.0,
111
+ "learning_rate": 0.0001866666666666667,
112
+ "loss": 1.981,
113
+ "step": 130
114
+ },
115
+ {
116
+ "epoch": 1.08,
117
+ "learning_rate": 0.00018564102564102566,
118
+ "loss": 1.5576,
119
+ "step": 140
120
+ },
121
+ {
122
+ "epoch": 1.15,
123
+ "learning_rate": 0.00018461538461538463,
124
+ "loss": 1.4327,
125
+ "step": 150
126
+ },
127
+ {
128
+ "epoch": 1.23,
129
+ "learning_rate": 0.00018358974358974358,
130
+ "loss": 1.4339,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 1.23,
135
+ "eval_accuracy": 0.6260794473229706,
136
+ "eval_loss": 1.5738734006881714,
137
+ "eval_runtime": 20.2011,
138
+ "eval_samples_per_second": 57.323,
139
+ "eval_steps_per_second": 7.178,
140
+ "step": 160
141
+ },
142
+ {
143
+ "epoch": 1.31,
144
+ "learning_rate": 0.00018256410256410258,
145
+ "loss": 1.2945,
146
+ "step": 170
147
+ },
148
+ {
149
+ "epoch": 1.38,
150
+ "learning_rate": 0.00018153846153846155,
151
+ "loss": 1.3482,
152
+ "step": 180
153
+ },
154
+ {
155
+ "epoch": 1.46,
156
+ "learning_rate": 0.00018051282051282052,
157
+ "loss": 1.2537,
158
+ "step": 190
159
+ },
160
+ {
161
+ "epoch": 1.54,
162
+ "learning_rate": 0.0001794871794871795,
163
+ "loss": 1.3328,
164
+ "step": 200
165
+ },
166
+ {
167
+ "epoch": 1.54,
168
+ "eval_accuracy": 0.6226252158894646,
169
+ "eval_loss": 1.484718918800354,
170
+ "eval_runtime": 20.1617,
171
+ "eval_samples_per_second": 57.436,
172
+ "eval_steps_per_second": 7.192,
173
+ "step": 200
174
+ },
175
+ {
176
+ "epoch": 1.62,
177
+ "learning_rate": 0.00017846153846153847,
178
+ "loss": 1.1274,
179
+ "step": 210
180
+ },
181
+ {
182
+ "epoch": 1.69,
183
+ "learning_rate": 0.00017743589743589744,
184
+ "loss": 1.169,
185
+ "step": 220
186
+ },
187
+ {
188
+ "epoch": 1.77,
189
+ "learning_rate": 0.00017641025641025642,
190
+ "loss": 1.1404,
191
+ "step": 230
192
+ },
193
+ {
194
+ "epoch": 1.85,
195
+ "learning_rate": 0.0001753846153846154,
196
+ "loss": 1.0914,
197
+ "step": 240
198
+ },
199
+ {
200
+ "epoch": 1.85,
201
+ "eval_accuracy": 0.6442141623488774,
202
+ "eval_loss": 1.3511847257614136,
203
+ "eval_runtime": 19.8926,
204
+ "eval_samples_per_second": 58.213,
205
+ "eval_steps_per_second": 7.289,
206
+ "step": 240
207
+ },
208
+ {
209
+ "epoch": 1.92,
210
+ "learning_rate": 0.00017435897435897436,
211
+ "loss": 1.1632,
212
+ "step": 250
213
+ },
214
+ {
215
+ "epoch": 2.0,
216
+ "learning_rate": 0.00017333333333333334,
217
+ "loss": 1.029,
218
+ "step": 260
219
+ },
220
+ {
221
+ "epoch": 2.08,
222
+ "learning_rate": 0.00017230769230769234,
223
+ "loss": 0.7509,
224
+ "step": 270
225
+ },
226
+ {
227
+ "epoch": 2.15,
228
+ "learning_rate": 0.00017128205128205128,
229
+ "loss": 0.6594,
230
+ "step": 280
231
+ },
232
+ {
233
+ "epoch": 2.15,
234
+ "eval_accuracy": 0.6450777202072538,
235
+ "eval_loss": 1.2636480331420898,
236
+ "eval_runtime": 20.434,
237
+ "eval_samples_per_second": 56.67,
238
+ "eval_steps_per_second": 7.096,
239
+ "step": 280
240
+ },
241
+ {
242
+ "epoch": 2.23,
243
+ "learning_rate": 0.00017025641025641026,
244
+ "loss": 0.557,
245
+ "step": 290
246
+ },
247
+ {
248
+ "epoch": 2.31,
249
+ "learning_rate": 0.00016923076923076923,
250
+ "loss": 0.5061,
251
+ "step": 300
252
+ },
253
+ {
254
+ "epoch": 2.38,
255
+ "learning_rate": 0.00016820512820512823,
256
+ "loss": 0.53,
257
+ "step": 310
258
+ },
259
+ {
260
+ "epoch": 2.46,
261
+ "learning_rate": 0.0001671794871794872,
262
+ "loss": 0.5651,
263
+ "step": 320
264
+ },
265
+ {
266
+ "epoch": 2.46,
267
+ "eval_accuracy": 0.6649395509499136,
268
+ "eval_loss": 1.2330708503723145,
269
+ "eval_runtime": 20.6893,
270
+ "eval_samples_per_second": 55.971,
271
+ "eval_steps_per_second": 7.008,
272
+ "step": 320
273
+ },
274
+ {
275
+ "epoch": 2.54,
276
+ "learning_rate": 0.00016615384615384617,
277
+ "loss": 0.5417,
278
+ "step": 330
279
+ },
280
+ {
281
+ "epoch": 2.62,
282
+ "learning_rate": 0.00016512820512820512,
283
+ "loss": 0.5584,
284
+ "step": 340
285
+ },
286
+ {
287
+ "epoch": 2.69,
288
+ "learning_rate": 0.0001641025641025641,
289
+ "loss": 0.6261,
290
+ "step": 350
291
+ },
292
+ {
293
+ "epoch": 2.77,
294
+ "learning_rate": 0.0001630769230769231,
295
+ "loss": 0.6847,
296
+ "step": 360
297
+ },
298
+ {
299
+ "epoch": 2.77,
300
+ "eval_accuracy": 0.6381692573402418,
301
+ "eval_loss": 1.3150428533554077,
302
+ "eval_runtime": 20.8049,
303
+ "eval_samples_per_second": 55.66,
304
+ "eval_steps_per_second": 6.97,
305
+ "step": 360
306
+ },
307
+ {
308
+ "epoch": 2.85,
309
+ "learning_rate": 0.00016205128205128207,
310
+ "loss": 0.6079,
311
+ "step": 370
312
+ },
313
+ {
314
+ "epoch": 2.92,
315
+ "learning_rate": 0.00016102564102564104,
316
+ "loss": 0.6087,
317
+ "step": 380
318
+ },
319
+ {
320
+ "epoch": 3.0,
321
+ "learning_rate": 0.00016,
322
+ "loss": 0.6049,
323
+ "step": 390
324
+ },
325
+ {
326
+ "epoch": 3.08,
327
+ "learning_rate": 0.00015897435897435896,
328
+ "loss": 0.2965,
329
+ "step": 400
330
+ },
331
+ {
332
+ "epoch": 3.08,
333
+ "eval_accuracy": 0.6735751295336787,
334
+ "eval_loss": 1.1951243877410889,
335
+ "eval_runtime": 20.3022,
336
+ "eval_samples_per_second": 57.038,
337
+ "eval_steps_per_second": 7.142,
338
+ "step": 400
339
+ },
340
+ {
341
+ "epoch": 3.15,
342
+ "learning_rate": 0.00015794871794871796,
343
+ "loss": 0.3409,
344
+ "step": 410
345
+ },
346
+ {
347
+ "epoch": 3.23,
348
+ "learning_rate": 0.00015692307692307693,
349
+ "loss": 0.2707,
350
+ "step": 420
351
+ },
352
+ {
353
+ "epoch": 3.31,
354
+ "learning_rate": 0.0001558974358974359,
355
+ "loss": 0.2439,
356
+ "step": 430
357
+ },
358
+ {
359
+ "epoch": 3.38,
360
+ "learning_rate": 0.00015487179487179488,
361
+ "loss": 0.3052,
362
+ "step": 440
363
+ },
364
+ {
365
+ "epoch": 3.38,
366
+ "eval_accuracy": 0.5915371329879102,
367
+ "eval_loss": 1.4183249473571777,
368
+ "eval_runtime": 19.9664,
369
+ "eval_samples_per_second": 57.997,
370
+ "eval_steps_per_second": 7.262,
371
+ "step": 440
372
+ },
373
+ {
374
+ "epoch": 3.46,
375
+ "learning_rate": 0.00015384615384615385,
376
+ "loss": 0.2819,
377
+ "step": 450
378
+ },
379
+ {
380
+ "epoch": 3.54,
381
+ "learning_rate": 0.00015282051282051282,
382
+ "loss": 0.2755,
383
+ "step": 460
384
+ },
385
+ {
386
+ "epoch": 3.62,
387
+ "learning_rate": 0.0001517948717948718,
388
+ "loss": 0.2574,
389
+ "step": 470
390
+ },
391
+ {
392
+ "epoch": 3.69,
393
+ "learning_rate": 0.00015076923076923077,
394
+ "loss": 0.2592,
395
+ "step": 480
396
+ },
397
+ {
398
+ "epoch": 3.69,
399
+ "eval_accuracy": 0.697754749568221,
400
+ "eval_loss": 1.1361786127090454,
401
+ "eval_runtime": 19.9742,
402
+ "eval_samples_per_second": 57.975,
403
+ "eval_steps_per_second": 7.259,
404
+ "step": 480
405
+ },
406
+ {
407
+ "epoch": 3.77,
408
+ "learning_rate": 0.00014974358974358974,
409
+ "loss": 0.2762,
410
+ "step": 490
411
+ },
412
+ {
413
+ "epoch": 3.85,
414
+ "learning_rate": 0.00014871794871794872,
415
+ "loss": 0.2489,
416
+ "step": 500
417
+ },
418
+ {
419
+ "epoch": 3.92,
420
+ "learning_rate": 0.00014769230769230772,
421
+ "loss": 0.2119,
422
+ "step": 510
423
+ },
424
+ {
425
+ "epoch": 4.0,
426
+ "learning_rate": 0.00014666666666666666,
427
+ "loss": 0.3058,
428
+ "step": 520
429
+ },
430
+ {
431
+ "epoch": 4.0,
432
+ "eval_accuracy": 0.6303972366148531,
433
+ "eval_loss": 1.4736298322677612,
434
+ "eval_runtime": 20.1619,
435
+ "eval_samples_per_second": 57.435,
436
+ "eval_steps_per_second": 7.192,
437
+ "step": 520
438
+ },
439
+ {
440
+ "epoch": 4.08,
441
+ "learning_rate": 0.00014564102564102564,
442
+ "loss": 0.2272,
443
+ "step": 530
444
+ },
445
+ {
446
+ "epoch": 4.15,
447
+ "learning_rate": 0.0001446153846153846,
448
+ "loss": 0.1024,
449
+ "step": 540
450
+ },
451
+ {
452
+ "epoch": 4.23,
453
+ "learning_rate": 0.0001435897435897436,
454
+ "loss": 0.1202,
455
+ "step": 550
456
+ },
457
+ {
458
+ "epoch": 4.31,
459
+ "learning_rate": 0.00014256410256410258,
460
+ "loss": 0.1412,
461
+ "step": 560
462
+ },
463
+ {
464
+ "epoch": 4.31,
465
+ "eval_accuracy": 0.6528497409326425,
466
+ "eval_loss": 1.2578930854797363,
467
+ "eval_runtime": 20.8549,
468
+ "eval_samples_per_second": 55.527,
469
+ "eval_steps_per_second": 6.953,
470
+ "step": 560
471
+ },
472
+ {
473
+ "epoch": 4.38,
474
+ "learning_rate": 0.00014153846153846156,
475
+ "loss": 0.1405,
476
+ "step": 570
477
+ },
478
+ {
479
+ "epoch": 4.46,
480
+ "learning_rate": 0.0001405128205128205,
481
+ "loss": 0.1154,
482
+ "step": 580
483
+ },
484
+ {
485
+ "epoch": 4.54,
486
+ "learning_rate": 0.00013948717948717947,
487
+ "loss": 0.1481,
488
+ "step": 590
489
+ },
490
+ {
491
+ "epoch": 4.62,
492
+ "learning_rate": 0.00013846153846153847,
493
+ "loss": 0.1141,
494
+ "step": 600
495
+ },
496
+ {
497
+ "epoch": 4.62,
498
+ "eval_accuracy": 0.6675302245250432,
499
+ "eval_loss": 1.2793298959732056,
500
+ "eval_runtime": 20.7805,
501
+ "eval_samples_per_second": 55.725,
502
+ "eval_steps_per_second": 6.978,
503
+ "step": 600
504
+ },
505
+ {
506
+ "epoch": 4.69,
507
+ "learning_rate": 0.00013743589743589745,
508
+ "loss": 0.1616,
509
+ "step": 610
510
+ },
511
+ {
512
+ "epoch": 4.77,
513
+ "learning_rate": 0.00013641025641025642,
514
+ "loss": 0.1792,
515
+ "step": 620
516
+ },
517
+ {
518
+ "epoch": 4.85,
519
+ "learning_rate": 0.0001353846153846154,
520
+ "loss": 0.0885,
521
+ "step": 630
522
+ },
523
+ {
524
+ "epoch": 4.92,
525
+ "learning_rate": 0.00013435897435897437,
526
+ "loss": 0.1553,
527
+ "step": 640
528
+ },
529
+ {
530
+ "epoch": 4.92,
531
+ "eval_accuracy": 0.6588946459412781,
532
+ "eval_loss": 1.3182828426361084,
533
+ "eval_runtime": 20.1017,
534
+ "eval_samples_per_second": 57.607,
535
+ "eval_steps_per_second": 7.213,
536
+ "step": 640
537
+ },
538
+ {
539
+ "epoch": 5.0,
540
+ "learning_rate": 0.00013333333333333334,
541
+ "loss": 0.1181,
542
+ "step": 650
543
+ },
544
+ {
545
+ "epoch": 5.08,
546
+ "learning_rate": 0.0001323076923076923,
547
+ "loss": 0.1392,
548
+ "step": 660
549
+ },
550
+ {
551
+ "epoch": 5.15,
552
+ "learning_rate": 0.00013128205128205129,
553
+ "loss": 0.0673,
554
+ "step": 670
555
+ },
556
+ {
557
+ "epoch": 5.23,
558
+ "learning_rate": 0.00013025641025641026,
559
+ "loss": 0.059,
560
+ "step": 680
561
+ },
562
+ {
563
+ "epoch": 5.23,
564
+ "eval_accuracy": 0.6882556131260794,
565
+ "eval_loss": 1.2099286317825317,
566
+ "eval_runtime": 21.3732,
567
+ "eval_samples_per_second": 54.18,
568
+ "eval_steps_per_second": 6.784,
569
+ "step": 680
570
+ },
571
+ {
572
+ "epoch": 5.31,
573
+ "learning_rate": 0.00012923076923076923,
574
+ "loss": 0.0786,
575
+ "step": 690
576
+ },
577
+ {
578
+ "epoch": 5.38,
579
+ "learning_rate": 0.00012820512820512823,
580
+ "loss": 0.0537,
581
+ "step": 700
582
+ },
583
+ {
584
+ "epoch": 5.46,
585
+ "learning_rate": 0.00012717948717948718,
586
+ "loss": 0.0639,
587
+ "step": 710
588
+ },
589
+ {
590
+ "epoch": 5.54,
591
+ "learning_rate": 0.00012615384615384615,
592
+ "loss": 0.0914,
593
+ "step": 720
594
+ },
595
+ {
596
+ "epoch": 5.54,
597
+ "eval_accuracy": 0.6718480138169257,
598
+ "eval_loss": 1.298492193222046,
599
+ "eval_runtime": 20.1222,
600
+ "eval_samples_per_second": 57.548,
601
+ "eval_steps_per_second": 7.206,
602
+ "step": 720
603
+ },
604
+ {
605
+ "epoch": 5.62,
606
+ "learning_rate": 0.00012512820512820512,
607
+ "loss": 0.1004,
608
+ "step": 730
609
+ },
610
+ {
611
+ "epoch": 5.69,
612
+ "learning_rate": 0.00012410256410256412,
613
+ "loss": 0.0592,
614
+ "step": 740
615
+ },
616
+ {
617
+ "epoch": 5.77,
618
+ "learning_rate": 0.0001230769230769231,
619
+ "loss": 0.0688,
620
+ "step": 750
621
+ },
622
+ {
623
+ "epoch": 5.85,
624
+ "learning_rate": 0.00012205128205128207,
625
+ "loss": 0.0546,
626
+ "step": 760
627
+ },
628
+ {
629
+ "epoch": 5.85,
630
+ "eval_accuracy": 0.690846286701209,
631
+ "eval_loss": 1.282209873199463,
632
+ "eval_runtime": 20.42,
633
+ "eval_samples_per_second": 56.709,
634
+ "eval_steps_per_second": 7.101,
635
+ "step": 760
636
+ },
637
+ {
638
+ "epoch": 5.92,
639
+ "learning_rate": 0.00012102564102564103,
640
+ "loss": 0.0285,
641
+ "step": 770
642
+ },
643
+ {
644
+ "epoch": 6.0,
645
+ "learning_rate": 0.00012,
646
+ "loss": 0.0318,
647
+ "step": 780
648
+ },
649
+ {
650
+ "epoch": 6.08,
651
+ "learning_rate": 0.00011897435897435898,
652
+ "loss": 0.0249,
653
+ "step": 790
654
+ },
655
+ {
656
+ "epoch": 6.15,
657
+ "learning_rate": 0.00011794871794871796,
658
+ "loss": 0.0306,
659
+ "step": 800
660
+ },
661
+ {
662
+ "epoch": 6.15,
663
+ "eval_accuracy": 0.6735751295336787,
664
+ "eval_loss": 1.3418058156967163,
665
+ "eval_runtime": 20.9104,
666
+ "eval_samples_per_second": 55.379,
667
+ "eval_steps_per_second": 6.934,
668
+ "step": 800
669
+ },
670
+ {
671
+ "epoch": 6.23,
672
+ "learning_rate": 0.00011692307692307694,
673
+ "loss": 0.0689,
674
+ "step": 810
675
+ },
676
+ {
677
+ "epoch": 6.31,
678
+ "learning_rate": 0.00011589743589743591,
679
+ "loss": 0.0617,
680
+ "step": 820
681
+ },
682
+ {
683
+ "epoch": 6.38,
684
+ "learning_rate": 0.00011487179487179487,
685
+ "loss": 0.0717,
686
+ "step": 830
687
+ },
688
+ {
689
+ "epoch": 6.46,
690
+ "learning_rate": 0.00011384615384615384,
691
+ "loss": 0.0207,
692
+ "step": 840
693
+ },
694
+ {
695
+ "epoch": 6.46,
696
+ "eval_accuracy": 0.697754749568221,
697
+ "eval_loss": 1.3155086040496826,
698
+ "eval_runtime": 20.9506,
699
+ "eval_samples_per_second": 55.273,
700
+ "eval_steps_per_second": 6.921,
701
+ "step": 840
702
+ },
703
+ {
704
+ "epoch": 6.54,
705
+ "learning_rate": 0.00011282051282051283,
706
+ "loss": 0.0192,
707
+ "step": 850
708
+ },
709
+ {
710
+ "epoch": 6.62,
711
+ "learning_rate": 0.0001117948717948718,
712
+ "loss": 0.0448,
713
+ "step": 860
714
+ },
715
+ {
716
+ "epoch": 6.69,
717
+ "learning_rate": 0.00011076923076923077,
718
+ "loss": 0.0213,
719
+ "step": 870
720
+ },
721
+ {
722
+ "epoch": 6.77,
723
+ "learning_rate": 0.00010974358974358976,
724
+ "loss": 0.0505,
725
+ "step": 880
726
+ },
727
+ {
728
+ "epoch": 6.77,
729
+ "eval_accuracy": 0.6614853195164075,
730
+ "eval_loss": 1.4209389686584473,
731
+ "eval_runtime": 20.0879,
732
+ "eval_samples_per_second": 57.647,
733
+ "eval_steps_per_second": 7.218,
734
+ "step": 880
735
+ },
736
+ {
737
+ "epoch": 6.85,
738
+ "learning_rate": 0.00010871794871794872,
739
+ "loss": 0.0356,
740
+ "step": 890
741
+ },
742
+ {
743
+ "epoch": 6.92,
744
+ "learning_rate": 0.0001076923076923077,
745
+ "loss": 0.022,
746
+ "step": 900
747
+ },
748
+ {
749
+ "epoch": 7.0,
750
+ "learning_rate": 0.00010666666666666667,
751
+ "loss": 0.102,
752
+ "step": 910
753
+ },
754
+ {
755
+ "epoch": 7.08,
756
+ "learning_rate": 0.00010564102564102565,
757
+ "loss": 0.016,
758
+ "step": 920
759
+ },
760
+ {
761
+ "epoch": 7.08,
762
+ "eval_accuracy": 0.697754749568221,
763
+ "eval_loss": 1.3212494850158691,
764
+ "eval_runtime": 20.3129,
765
+ "eval_samples_per_second": 57.008,
766
+ "eval_steps_per_second": 7.138,
767
+ "step": 920
768
+ },
769
+ {
770
+ "epoch": 7.15,
771
+ "learning_rate": 0.00010461538461538463,
772
+ "loss": 0.038,
773
+ "step": 930
774
+ },
775
+ {
776
+ "epoch": 7.23,
777
+ "learning_rate": 0.0001035897435897436,
778
+ "loss": 0.0161,
779
+ "step": 940
780
+ },
781
+ {
782
+ "epoch": 7.31,
783
+ "learning_rate": 0.00010256410256410256,
784
+ "loss": 0.0266,
785
+ "step": 950
786
+ },
787
+ {
788
+ "epoch": 7.38,
789
+ "learning_rate": 0.00010153846153846153,
790
+ "loss": 0.0268,
791
+ "step": 960
792
+ },
793
+ {
794
+ "epoch": 7.38,
795
+ "eval_accuracy": 0.7081174438687392,
796
+ "eval_loss": 1.2981479167938232,
797
+ "eval_runtime": 20.6143,
798
+ "eval_samples_per_second": 56.175,
799
+ "eval_steps_per_second": 7.034,
800
+ "step": 960
801
+ },
802
+ {
803
+ "epoch": 7.46,
804
+ "learning_rate": 0.00010051282051282052,
805
+ "loss": 0.0235,
806
+ "step": 970
807
+ },
808
+ {
809
+ "epoch": 7.54,
810
+ "learning_rate": 9.948717948717949e-05,
811
+ "loss": 0.0126,
812
+ "step": 980
813
+ },
814
+ {
815
+ "epoch": 7.62,
816
+ "learning_rate": 9.846153846153848e-05,
817
+ "loss": 0.0143,
818
+ "step": 990
819
+ },
820
+ {
821
+ "epoch": 7.69,
822
+ "learning_rate": 9.743589743589744e-05,
823
+ "loss": 0.0299,
824
+ "step": 1000
825
+ },
826
+ {
827
+ "epoch": 7.69,
828
+ "eval_accuracy": 0.6925734024179621,
829
+ "eval_loss": 1.3568987846374512,
830
+ "eval_runtime": 20.4554,
831
+ "eval_samples_per_second": 56.611,
832
+ "eval_steps_per_second": 7.089,
833
+ "step": 1000
834
+ },
835
+ {
836
+ "epoch": 7.77,
837
+ "learning_rate": 9.641025641025641e-05,
838
+ "loss": 0.0122,
839
+ "step": 1010
840
+ },
841
+ {
842
+ "epoch": 7.85,
843
+ "learning_rate": 9.53846153846154e-05,
844
+ "loss": 0.0203,
845
+ "step": 1020
846
+ },
847
+ {
848
+ "epoch": 7.92,
849
+ "learning_rate": 9.435897435897436e-05,
850
+ "loss": 0.0105,
851
+ "step": 1030
852
+ },
853
+ {
854
+ "epoch": 8.0,
855
+ "learning_rate": 9.333333333333334e-05,
856
+ "loss": 0.0395,
857
+ "step": 1040
858
+ },
859
+ {
860
+ "epoch": 8.0,
861
+ "eval_accuracy": 0.697754749568221,
862
+ "eval_loss": 1.315879225730896,
863
+ "eval_runtime": 20.1743,
864
+ "eval_samples_per_second": 57.4,
865
+ "eval_steps_per_second": 7.187,
866
+ "step": 1040
867
+ },
868
+ {
869
+ "epoch": 8.08,
870
+ "learning_rate": 9.230769230769232e-05,
871
+ "loss": 0.0102,
872
+ "step": 1050
873
+ },
874
+ {
875
+ "epoch": 8.15,
876
+ "learning_rate": 9.128205128205129e-05,
877
+ "loss": 0.0103,
878
+ "step": 1060
879
+ },
880
+ {
881
+ "epoch": 8.23,
882
+ "learning_rate": 9.025641025641026e-05,
883
+ "loss": 0.0106,
884
+ "step": 1070
885
+ },
886
+ {
887
+ "epoch": 8.31,
888
+ "learning_rate": 8.923076923076924e-05,
889
+ "loss": 0.0106,
890
+ "step": 1080
891
+ },
892
+ {
893
+ "epoch": 8.31,
894
+ "eval_accuracy": 0.7046632124352331,
895
+ "eval_loss": 1.3094946146011353,
896
+ "eval_runtime": 20.1865,
897
+ "eval_samples_per_second": 57.365,
898
+ "eval_steps_per_second": 7.183,
899
+ "step": 1080
900
+ },
901
+ {
902
+ "epoch": 8.38,
903
+ "learning_rate": 8.820512820512821e-05,
904
+ "loss": 0.0364,
905
+ "step": 1090
906
+ },
907
+ {
908
+ "epoch": 8.46,
909
+ "learning_rate": 8.717948717948718e-05,
910
+ "loss": 0.0098,
911
+ "step": 1100
912
+ },
913
+ {
914
+ "epoch": 8.54,
915
+ "learning_rate": 8.615384615384617e-05,
916
+ "loss": 0.0462,
917
+ "step": 1110
918
+ },
919
+ {
920
+ "epoch": 8.62,
921
+ "learning_rate": 8.512820512820513e-05,
922
+ "loss": 0.0093,
923
+ "step": 1120
924
+ },
925
+ {
926
+ "epoch": 8.62,
927
+ "eval_accuracy": 0.7124352331606217,
928
+ "eval_loss": 1.3362736701965332,
929
+ "eval_runtime": 20.3723,
930
+ "eval_samples_per_second": 56.842,
931
+ "eval_steps_per_second": 7.117,
932
+ "step": 1120
933
+ },
934
+ {
935
+ "epoch": 8.69,
936
+ "learning_rate": 8.410256410256411e-05,
937
+ "loss": 0.0097,
938
+ "step": 1130
939
+ },
940
+ {
941
+ "epoch": 8.77,
942
+ "learning_rate": 8.307692307692309e-05,
943
+ "loss": 0.0087,
944
+ "step": 1140
945
+ },
946
+ {
947
+ "epoch": 8.85,
948
+ "learning_rate": 8.205128205128205e-05,
949
+ "loss": 0.0087,
950
+ "step": 1150
951
+ },
952
+ {
953
+ "epoch": 8.92,
954
+ "learning_rate": 8.102564102564103e-05,
955
+ "loss": 0.0364,
956
+ "step": 1160
957
+ },
958
+ {
959
+ "epoch": 8.92,
960
+ "eval_accuracy": 0.7089810017271158,
961
+ "eval_loss": 1.340015172958374,
962
+ "eval_runtime": 21.051,
963
+ "eval_samples_per_second": 55.009,
964
+ "eval_steps_per_second": 6.888,
965
+ "step": 1160
966
+ },
967
+ {
968
+ "epoch": 9.0,
969
+ "learning_rate": 8e-05,
970
+ "loss": 0.0087,
971
+ "step": 1170
972
+ },
973
+ {
974
+ "epoch": 9.08,
975
+ "learning_rate": 7.897435897435898e-05,
976
+ "loss": 0.0101,
977
+ "step": 1180
978
+ },
979
+ {
980
+ "epoch": 9.15,
981
+ "learning_rate": 7.794871794871795e-05,
982
+ "loss": 0.0174,
983
+ "step": 1190
984
+ },
985
+ {
986
+ "epoch": 9.23,
987
+ "learning_rate": 7.692307692307693e-05,
988
+ "loss": 0.0324,
989
+ "step": 1200
990
+ },
991
+ {
992
+ "epoch": 9.23,
993
+ "eval_accuracy": 0.7098445595854922,
994
+ "eval_loss": 1.3313075304031372,
995
+ "eval_runtime": 21.981,
996
+ "eval_samples_per_second": 52.682,
997
+ "eval_steps_per_second": 6.597,
998
+ "step": 1200
999
+ },
1000
+ {
1001
+ "epoch": 9.31,
1002
+ "learning_rate": 7.58974358974359e-05,
1003
+ "loss": 0.0076,
1004
+ "step": 1210
1005
+ },
1006
+ {
1007
+ "epoch": 9.38,
1008
+ "learning_rate": 7.487179487179487e-05,
1009
+ "loss": 0.0287,
1010
+ "step": 1220
1011
+ },
1012
+ {
1013
+ "epoch": 9.46,
1014
+ "learning_rate": 7.384615384615386e-05,
1015
+ "loss": 0.0083,
1016
+ "step": 1230
1017
+ },
1018
+ {
1019
+ "epoch": 9.54,
1020
+ "learning_rate": 7.282051282051282e-05,
1021
+ "loss": 0.0076,
1022
+ "step": 1240
1023
+ },
1024
+ {
1025
+ "epoch": 9.54,
1026
+ "eval_accuracy": 0.7072538860103627,
1027
+ "eval_loss": 1.3641475439071655,
1028
+ "eval_runtime": 20.2849,
1029
+ "eval_samples_per_second": 57.087,
1030
+ "eval_steps_per_second": 7.148,
1031
+ "step": 1240
1032
+ },
1033
+ {
1034
+ "epoch": 9.62,
1035
+ "learning_rate": 7.17948717948718e-05,
1036
+ "loss": 0.0076,
1037
+ "step": 1250
1038
+ },
1039
+ {
1040
+ "epoch": 9.69,
1041
+ "learning_rate": 7.076923076923078e-05,
1042
+ "loss": 0.0075,
1043
+ "step": 1260
1044
+ },
1045
+ {
1046
+ "epoch": 9.77,
1047
+ "learning_rate": 6.974358974358974e-05,
1048
+ "loss": 0.0078,
1049
+ "step": 1270
1050
+ },
1051
+ {
1052
+ "epoch": 9.85,
1053
+ "learning_rate": 6.871794871794872e-05,
1054
+ "loss": 0.0076,
1055
+ "step": 1280
1056
+ },
1057
+ {
1058
+ "epoch": 9.85,
1059
+ "eval_accuracy": 0.7098445595854922,
1060
+ "eval_loss": 1.357639193534851,
1061
+ "eval_runtime": 20.3563,
1062
+ "eval_samples_per_second": 56.887,
1063
+ "eval_steps_per_second": 7.123,
1064
+ "step": 1280
1065
+ },
1066
+ {
1067
+ "epoch": 9.92,
1068
+ "learning_rate": 6.76923076923077e-05,
1069
+ "loss": 0.007,
1070
+ "step": 1290
1071
+ },
1072
+ {
1073
+ "epoch": 10.0,
1074
+ "learning_rate": 6.666666666666667e-05,
1075
+ "loss": 0.0075,
1076
+ "step": 1300
1077
+ },
1078
+ {
1079
+ "epoch": 10.08,
1080
+ "learning_rate": 6.564102564102564e-05,
1081
+ "loss": 0.0126,
1082
+ "step": 1310
1083
+ },
1084
+ {
1085
+ "epoch": 10.15,
1086
+ "learning_rate": 6.461538461538462e-05,
1087
+ "loss": 0.021,
1088
+ "step": 1320
1089
+ },
1090
+ {
1091
+ "epoch": 10.15,
1092
+ "eval_accuracy": 0.7124352331606217,
1093
+ "eval_loss": 1.3460208177566528,
1094
+ "eval_runtime": 20.5844,
1095
+ "eval_samples_per_second": 56.256,
1096
+ "eval_steps_per_second": 7.044,
1097
+ "step": 1320
1098
+ },
1099
+ {
1100
+ "epoch": 10.23,
1101
+ "learning_rate": 6.358974358974359e-05,
1102
+ "loss": 0.0068,
1103
+ "step": 1330
1104
+ },
1105
+ {
1106
+ "epoch": 10.31,
1107
+ "learning_rate": 6.256410256410256e-05,
1108
+ "loss": 0.0068,
1109
+ "step": 1340
1110
+ },
1111
+ {
1112
+ "epoch": 10.38,
1113
+ "learning_rate": 6.153846153846155e-05,
1114
+ "loss": 0.0066,
1115
+ "step": 1350
1116
+ },
1117
+ {
1118
+ "epoch": 10.46,
1119
+ "learning_rate": 6.0512820512820515e-05,
1120
+ "loss": 0.0064,
1121
+ "step": 1360
1122
+ },
1123
+ {
1124
+ "epoch": 10.46,
1125
+ "eval_accuracy": 0.7184801381692574,
1126
+ "eval_loss": 1.3521218299865723,
1127
+ "eval_runtime": 21.5706,
1128
+ "eval_samples_per_second": 53.684,
1129
+ "eval_steps_per_second": 6.722,
1130
+ "step": 1360
1131
+ },
1132
+ {
1133
+ "epoch": 10.54,
1134
+ "learning_rate": 5.948717948717949e-05,
1135
+ "loss": 0.0061,
1136
+ "step": 1370
1137
+ },
1138
+ {
1139
+ "epoch": 10.62,
1140
+ "learning_rate": 5.846153846153847e-05,
1141
+ "loss": 0.0062,
1142
+ "step": 1380
1143
+ },
1144
+ {
1145
+ "epoch": 10.69,
1146
+ "learning_rate": 5.7435897435897434e-05,
1147
+ "loss": 0.0364,
1148
+ "step": 1390
1149
+ },
1150
+ {
1151
+ "epoch": 10.77,
1152
+ "learning_rate": 5.6410256410256414e-05,
1153
+ "loss": 0.0069,
1154
+ "step": 1400
1155
+ },
1156
+ {
1157
+ "epoch": 10.77,
1158
+ "eval_accuracy": 0.7141623488773747,
1159
+ "eval_loss": 1.3537815809249878,
1160
+ "eval_runtime": 20.0789,
1161
+ "eval_samples_per_second": 57.672,
1162
+ "eval_steps_per_second": 7.221,
1163
+ "step": 1400
1164
+ },
1165
+ {
1166
+ "epoch": 10.85,
1167
+ "learning_rate": 5.538461538461539e-05,
1168
+ "loss": 0.0069,
1169
+ "step": 1410
1170
+ },
1171
+ {
1172
+ "epoch": 10.92,
1173
+ "learning_rate": 5.435897435897436e-05,
1174
+ "loss": 0.0068,
1175
+ "step": 1420
1176
+ },
1177
+ {
1178
+ "epoch": 11.0,
1179
+ "learning_rate": 5.333333333333333e-05,
1180
+ "loss": 0.0063,
1181
+ "step": 1430
1182
+ },
1183
+ {
1184
+ "epoch": 11.08,
1185
+ "learning_rate": 5.230769230769231e-05,
1186
+ "loss": 0.0101,
1187
+ "step": 1440
1188
+ },
1189
+ {
1190
+ "epoch": 11.08,
1191
+ "eval_accuracy": 0.7150259067357513,
1192
+ "eval_loss": 1.3637244701385498,
1193
+ "eval_runtime": 20.418,
1194
+ "eval_samples_per_second": 56.715,
1195
+ "eval_steps_per_second": 7.102,
1196
+ "step": 1440
1197
+ },
1198
+ {
1199
+ "epoch": 11.15,
1200
+ "learning_rate": 5.128205128205128e-05,
1201
+ "loss": 0.0094,
1202
+ "step": 1450
1203
+ },
1204
+ {
1205
+ "epoch": 11.23,
1206
+ "learning_rate": 5.025641025641026e-05,
1207
+ "loss": 0.0152,
1208
+ "step": 1460
1209
+ },
1210
+ {
1211
+ "epoch": 11.31,
1212
+ "learning_rate": 4.923076923076924e-05,
1213
+ "loss": 0.006,
1214
+ "step": 1470
1215
+ },
1216
+ {
1217
+ "epoch": 11.38,
1218
+ "learning_rate": 4.8205128205128205e-05,
1219
+ "loss": 0.0115,
1220
+ "step": 1480
1221
+ },
1222
+ {
1223
+ "epoch": 11.38,
1224
+ "eval_accuracy": 0.7081174438687392,
1225
+ "eval_loss": 1.3893637657165527,
1226
+ "eval_runtime": 21.0579,
1227
+ "eval_samples_per_second": 54.991,
1228
+ "eval_steps_per_second": 6.886,
1229
+ "step": 1480
1230
+ },
1231
+ {
1232
+ "epoch": 11.46,
1233
+ "learning_rate": 4.717948717948718e-05,
1234
+ "loss": 0.0062,
1235
+ "step": 1490
1236
+ },
1237
+ {
1238
+ "epoch": 11.54,
1239
+ "learning_rate": 4.615384615384616e-05,
1240
+ "loss": 0.0058,
1241
+ "step": 1500
1242
+ },
1243
+ {
1244
+ "epoch": 11.62,
1245
+ "learning_rate": 4.512820512820513e-05,
1246
+ "loss": 0.0058,
1247
+ "step": 1510
1248
+ },
1249
+ {
1250
+ "epoch": 11.69,
1251
+ "learning_rate": 4.4102564102564104e-05,
1252
+ "loss": 0.0062,
1253
+ "step": 1520
1254
+ },
1255
+ {
1256
+ "epoch": 11.69,
1257
+ "eval_accuracy": 0.7072538860103627,
1258
+ "eval_loss": 1.3909190893173218,
1259
+ "eval_runtime": 20.5699,
1260
+ "eval_samples_per_second": 56.296,
1261
+ "eval_steps_per_second": 7.049,
1262
+ "step": 1520
1263
+ },
1264
+ {
1265
+ "epoch": 11.77,
1266
+ "learning_rate": 4.3076923076923084e-05,
1267
+ "loss": 0.0171,
1268
+ "step": 1530
1269
+ },
1270
+ {
1271
+ "epoch": 11.85,
1272
+ "learning_rate": 4.205128205128206e-05,
1273
+ "loss": 0.0159,
1274
+ "step": 1540
1275
+ },
1276
+ {
1277
+ "epoch": 11.92,
1278
+ "learning_rate": 4.1025641025641023e-05,
1279
+ "loss": 0.0054,
1280
+ "step": 1550
1281
+ },
1282
+ {
1283
+ "epoch": 12.0,
1284
+ "learning_rate": 4e-05,
1285
+ "loss": 0.0055,
1286
+ "step": 1560
1287
+ },
1288
+ {
1289
+ "epoch": 12.0,
1290
+ "eval_accuracy": 0.7124352331606217,
1291
+ "eval_loss": 1.3742793798446655,
1292
+ "eval_runtime": 20.0602,
1293
+ "eval_samples_per_second": 57.726,
1294
+ "eval_steps_per_second": 7.228,
1295
+ "step": 1560
1296
+ },
1297
+ {
1298
+ "epoch": 12.08,
1299
+ "learning_rate": 3.8974358974358976e-05,
1300
+ "loss": 0.0056,
1301
+ "step": 1570
1302
+ },
1303
+ {
1304
+ "epoch": 12.15,
1305
+ "learning_rate": 3.794871794871795e-05,
1306
+ "loss": 0.0234,
1307
+ "step": 1580
1308
+ },
1309
+ {
1310
+ "epoch": 12.23,
1311
+ "learning_rate": 3.692307692307693e-05,
1312
+ "loss": 0.0053,
1313
+ "step": 1590
1314
+ },
1315
+ {
1316
+ "epoch": 12.31,
1317
+ "learning_rate": 3.58974358974359e-05,
1318
+ "loss": 0.0054,
1319
+ "step": 1600
1320
+ },
1321
+ {
1322
+ "epoch": 12.31,
1323
+ "eval_accuracy": 0.7158894645941278,
1324
+ "eval_loss": 1.3830854892730713,
1325
+ "eval_runtime": 20.2514,
1326
+ "eval_samples_per_second": 57.181,
1327
+ "eval_steps_per_second": 7.16,
1328
+ "step": 1600
1329
+ },
1330
+ {
1331
+ "epoch": 12.38,
1332
+ "learning_rate": 3.487179487179487e-05,
1333
+ "loss": 0.0052,
1334
+ "step": 1610
1335
+ },
1336
+ {
1337
+ "epoch": 12.46,
1338
+ "learning_rate": 3.384615384615385e-05,
1339
+ "loss": 0.0052,
1340
+ "step": 1620
1341
+ },
1342
+ {
1343
+ "epoch": 12.54,
1344
+ "learning_rate": 3.282051282051282e-05,
1345
+ "loss": 0.0056,
1346
+ "step": 1630
1347
+ },
1348
+ {
1349
+ "epoch": 12.62,
1350
+ "learning_rate": 3.1794871794871795e-05,
1351
+ "loss": 0.0054,
1352
+ "step": 1640
1353
+ },
1354
+ {
1355
+ "epoch": 12.62,
1356
+ "eval_accuracy": 0.7167530224525043,
1357
+ "eval_loss": 1.3866362571716309,
1358
+ "eval_runtime": 21.0166,
1359
+ "eval_samples_per_second": 55.099,
1360
+ "eval_steps_per_second": 6.899,
1361
+ "step": 1640
1362
+ },
1363
+ {
1364
+ "epoch": 12.69,
1365
+ "learning_rate": 3.0769230769230774e-05,
1366
+ "loss": 0.0052,
1367
+ "step": 1650
1368
+ },
1369
+ {
1370
+ "epoch": 12.77,
1371
+ "learning_rate": 2.9743589743589744e-05,
1372
+ "loss": 0.0052,
1373
+ "step": 1660
1374
+ },
1375
+ {
1376
+ "epoch": 12.85,
1377
+ "learning_rate": 2.8717948717948717e-05,
1378
+ "loss": 0.0058,
1379
+ "step": 1670
1380
+ },
1381
+ {
1382
+ "epoch": 12.92,
1383
+ "learning_rate": 2.7692307692307694e-05,
1384
+ "loss": 0.0147,
1385
+ "step": 1680
1386
+ },
1387
+ {
1388
+ "epoch": 12.92,
1389
+ "eval_accuracy": 0.7150259067357513,
1390
+ "eval_loss": 1.391139030456543,
1391
+ "eval_runtime": 20.4472,
1392
+ "eval_samples_per_second": 56.634,
1393
+ "eval_steps_per_second": 7.091,
1394
+ "step": 1680
1395
+ },
1396
+ {
1397
+ "epoch": 13.0,
1398
+ "learning_rate": 2.6666666666666667e-05,
1399
+ "loss": 0.0051,
1400
+ "step": 1690
1401
+ },
1402
+ {
1403
+ "epoch": 13.08,
1404
+ "learning_rate": 2.564102564102564e-05,
1405
+ "loss": 0.0144,
1406
+ "step": 1700
1407
+ },
1408
+ {
1409
+ "epoch": 13.15,
1410
+ "learning_rate": 2.461538461538462e-05,
1411
+ "loss": 0.0052,
1412
+ "step": 1710
1413
+ },
1414
+ {
1415
+ "epoch": 13.23,
1416
+ "learning_rate": 2.358974358974359e-05,
1417
+ "loss": 0.0058,
1418
+ "step": 1720
1419
+ },
1420
+ {
1421
+ "epoch": 13.23,
1422
+ "eval_accuracy": 0.7141623488773747,
1423
+ "eval_loss": 1.3986831903457642,
1424
+ "eval_runtime": 20.1538,
1425
+ "eval_samples_per_second": 57.458,
1426
+ "eval_steps_per_second": 7.195,
1427
+ "step": 1720
1428
+ },
1429
+ {
1430
+ "epoch": 13.31,
1431
+ "learning_rate": 2.2564102564102566e-05,
1432
+ "loss": 0.0047,
1433
+ "step": 1730
1434
+ },
1435
+ {
1436
+ "epoch": 13.38,
1437
+ "learning_rate": 2.1538461538461542e-05,
1438
+ "loss": 0.0053,
1439
+ "step": 1740
1440
+ },
1441
+ {
1442
+ "epoch": 13.46,
1443
+ "learning_rate": 2.0512820512820512e-05,
1444
+ "loss": 0.005,
1445
+ "step": 1750
1446
+ },
1447
+ {
1448
+ "epoch": 13.54,
1449
+ "learning_rate": 1.9487179487179488e-05,
1450
+ "loss": 0.0096,
1451
+ "step": 1760
1452
+ },
1453
+ {
1454
+ "epoch": 13.54,
1455
+ "eval_accuracy": 0.7132987910189983,
1456
+ "eval_loss": 1.3994832038879395,
1457
+ "eval_runtime": 20.378,
1458
+ "eval_samples_per_second": 56.826,
1459
+ "eval_steps_per_second": 7.116,
1460
+ "step": 1760
1461
+ },
1462
+ {
1463
+ "epoch": 13.62,
1464
+ "learning_rate": 1.8461538461538465e-05,
1465
+ "loss": 0.0055,
1466
+ "step": 1770
1467
+ },
1468
+ {
1469
+ "epoch": 13.69,
1470
+ "learning_rate": 1.7435897435897434e-05,
1471
+ "loss": 0.0112,
1472
+ "step": 1780
1473
+ },
1474
+ {
1475
+ "epoch": 13.77,
1476
+ "learning_rate": 1.641025641025641e-05,
1477
+ "loss": 0.0047,
1478
+ "step": 1790
1479
+ },
1480
+ {
1481
+ "epoch": 13.85,
1482
+ "learning_rate": 1.5384615384615387e-05,
1483
+ "loss": 0.0048,
1484
+ "step": 1800
1485
+ },
1486
+ {
1487
+ "epoch": 13.85,
1488
+ "eval_accuracy": 0.7115716753022453,
1489
+ "eval_loss": 1.401125192642212,
1490
+ "eval_runtime": 20.4927,
1491
+ "eval_samples_per_second": 56.508,
1492
+ "eval_steps_per_second": 7.076,
1493
+ "step": 1800
1494
+ },
1495
+ {
1496
+ "epoch": 13.92,
1497
+ "learning_rate": 1.4358974358974359e-05,
1498
+ "loss": 0.0053,
1499
+ "step": 1810
1500
+ },
1501
+ {
1502
+ "epoch": 14.0,
1503
+ "learning_rate": 1.3333333333333333e-05,
1504
+ "loss": 0.0047,
1505
+ "step": 1820
1506
+ },
1507
+ {
1508
+ "epoch": 14.08,
1509
+ "learning_rate": 1.230769230769231e-05,
1510
+ "loss": 0.0146,
1511
+ "step": 1830
1512
+ },
1513
+ {
1514
+ "epoch": 14.15,
1515
+ "learning_rate": 1.1282051282051283e-05,
1516
+ "loss": 0.0054,
1517
+ "step": 1840
1518
+ },
1519
+ {
1520
+ "epoch": 14.15,
1521
+ "eval_accuracy": 0.7115716753022453,
1522
+ "eval_loss": 1.4026780128479004,
1523
+ "eval_runtime": 20.8793,
1524
+ "eval_samples_per_second": 55.462,
1525
+ "eval_steps_per_second": 6.945,
1526
+ "step": 1840
1527
+ },
1528
+ {
1529
+ "epoch": 14.23,
1530
+ "learning_rate": 1.0256410256410256e-05,
1531
+ "loss": 0.0046,
1532
+ "step": 1850
1533
+ },
1534
+ {
1535
+ "epoch": 14.31,
1536
+ "learning_rate": 9.230769230769232e-06,
1537
+ "loss": 0.0046,
1538
+ "step": 1860
1539
+ },
1540
+ {
1541
+ "epoch": 14.38,
1542
+ "learning_rate": 8.205128205128205e-06,
1543
+ "loss": 0.005,
1544
+ "step": 1870
1545
+ },
1546
+ {
1547
+ "epoch": 14.46,
1548
+ "learning_rate": 7.179487179487179e-06,
1549
+ "loss": 0.0048,
1550
+ "step": 1880
1551
+ },
1552
+ {
1553
+ "epoch": 14.46,
1554
+ "eval_accuracy": 0.7115716753022453,
1555
+ "eval_loss": 1.403483510017395,
1556
+ "eval_runtime": 21.9216,
1557
+ "eval_samples_per_second": 52.825,
1558
+ "eval_steps_per_second": 6.614,
1559
+ "step": 1880
1560
+ },
1561
+ {
1562
+ "epoch": 14.54,
1563
+ "learning_rate": 6.153846153846155e-06,
1564
+ "loss": 0.0089,
1565
+ "step": 1890
1566
+ },
1567
+ {
1568
+ "epoch": 14.62,
1569
+ "learning_rate": 5.128205128205128e-06,
1570
+ "loss": 0.0047,
1571
+ "step": 1900
1572
+ },
1573
+ {
1574
+ "epoch": 14.69,
1575
+ "learning_rate": 4.102564102564103e-06,
1576
+ "loss": 0.0048,
1577
+ "step": 1910
1578
+ },
1579
+ {
1580
+ "epoch": 14.77,
1581
+ "learning_rate": 3.0769230769230774e-06,
1582
+ "loss": 0.0047,
1583
+ "step": 1920
1584
+ },
1585
+ {
1586
+ "epoch": 14.77,
1587
+ "eval_accuracy": 0.7124352331606217,
1588
+ "eval_loss": 1.4049346446990967,
1589
+ "eval_runtime": 20.2662,
1590
+ "eval_samples_per_second": 57.139,
1591
+ "eval_steps_per_second": 7.155,
1592
+ "step": 1920
1593
+ },
1594
+ {
1595
+ "epoch": 14.85,
1596
+ "learning_rate": 2.0512820512820513e-06,
1597
+ "loss": 0.005,
1598
+ "step": 1930
1599
+ },
1600
+ {
1601
+ "epoch": 14.92,
1602
+ "learning_rate": 1.0256410256410257e-06,
1603
+ "loss": 0.0049,
1604
+ "step": 1940
1605
+ },
1606
+ {
1607
+ "epoch": 15.0,
1608
+ "learning_rate": 0.0,
1609
+ "loss": 0.0109,
1610
+ "step": 1950
1611
+ },
1612
+ {
1613
+ "epoch": 15.0,
1614
+ "step": 1950,
1615
+ "total_flos": 2.4159665336144486e+18,
1616
+ "train_loss": 0.33013623780929124,
1617
+ "train_runtime": 2443.4946,
1618
+ "train_samples_per_second": 12.756,
1619
+ "train_steps_per_second": 0.798
1620
+ }
1621
+ ],
1622
+ "max_steps": 1950,
1623
+ "num_train_epochs": 15,
1624
+ "total_flos": 2.4159665336144486e+18,
1625
+ "trial_name": null,
1626
+ "trial_params": null
1627
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a67954eb3bb115d5fc916b53e59475d474112dfa86034717eadf5ee507e440
3
+ size 3963