File size: 8,086 Bytes
056dde1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
{
  "best_metric": 0.8910891089108911,
  "best_model_checkpoint": "Electrcical-IMAGE-finetuned-eurosat/checkpoint-199",
  "epoch": 9.824561403508772,
  "eval_steps": 500,
  "global_step": 280,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.3508771929824561,
      "grad_norm": 10.464795112609863,
      "learning_rate": 1.785714285714286e-05,
      "loss": 1.7823,
      "step": 10
    },
    {
      "epoch": 0.7017543859649122,
      "grad_norm": 9.879911422729492,
      "learning_rate": 3.571428571428572e-05,
      "loss": 1.4879,
      "step": 20
    },
    {
      "epoch": 0.9824561403508771,
      "eval_accuracy": 0.7326732673267327,
      "eval_loss": 0.9158226847648621,
      "eval_runtime": 116.3827,
      "eval_samples_per_second": 3.471,
      "eval_steps_per_second": 0.112,
      "step": 28
    },
    {
      "epoch": 1.0526315789473684,
      "grad_norm": 7.517858028411865,
      "learning_rate": 4.960317460317461e-05,
      "loss": 1.1987,
      "step": 30
    },
    {
      "epoch": 1.4035087719298245,
      "grad_norm": 7.376903533935547,
      "learning_rate": 4.761904761904762e-05,
      "loss": 0.8925,
      "step": 40
    },
    {
      "epoch": 1.7543859649122808,
      "grad_norm": 8.916149139404297,
      "learning_rate": 4.563492063492064e-05,
      "loss": 0.7072,
      "step": 50
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.8366336633663366,
      "eval_loss": 0.4648021161556244,
      "eval_runtime": 4.3367,
      "eval_samples_per_second": 93.158,
      "eval_steps_per_second": 2.998,
      "step": 57
    },
    {
      "epoch": 2.1052631578947367,
      "grad_norm": 8.09045124053955,
      "learning_rate": 4.3650793650793655e-05,
      "loss": 0.665,
      "step": 60
    },
    {
      "epoch": 2.456140350877193,
      "grad_norm": 9.807299613952637,
      "learning_rate": 4.166666666666667e-05,
      "loss": 0.5713,
      "step": 70
    },
    {
      "epoch": 2.807017543859649,
      "grad_norm": 10.828336715698242,
      "learning_rate": 3.968253968253968e-05,
      "loss": 0.521,
      "step": 80
    },
    {
      "epoch": 2.982456140350877,
      "eval_accuracy": 0.8712871287128713,
      "eval_loss": 0.3816491961479187,
      "eval_runtime": 4.1894,
      "eval_samples_per_second": 96.434,
      "eval_steps_per_second": 3.103,
      "step": 85
    },
    {
      "epoch": 3.1578947368421053,
      "grad_norm": 6.668788433074951,
      "learning_rate": 3.76984126984127e-05,
      "loss": 0.5152,
      "step": 90
    },
    {
      "epoch": 3.5087719298245617,
      "grad_norm": 5.836483478546143,
      "learning_rate": 3.571428571428572e-05,
      "loss": 0.473,
      "step": 100
    },
    {
      "epoch": 3.8596491228070176,
      "grad_norm": 5.453497409820557,
      "learning_rate": 3.3730158730158734e-05,
      "loss": 0.4664,
      "step": 110
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.8564356435643564,
      "eval_loss": 0.40334010124206543,
      "eval_runtime": 4.398,
      "eval_samples_per_second": 91.86,
      "eval_steps_per_second": 2.956,
      "step": 114
    },
    {
      "epoch": 4.2105263157894735,
      "grad_norm": 6.095137596130371,
      "learning_rate": 3.1746031746031745e-05,
      "loss": 0.3831,
      "step": 120
    },
    {
      "epoch": 4.56140350877193,
      "grad_norm": 6.058220386505127,
      "learning_rate": 2.9761904761904762e-05,
      "loss": 0.4376,
      "step": 130
    },
    {
      "epoch": 4.912280701754386,
      "grad_norm": 6.937771797180176,
      "learning_rate": 2.777777777777778e-05,
      "loss": 0.3944,
      "step": 140
    },
    {
      "epoch": 4.982456140350877,
      "eval_accuracy": 0.8737623762376238,
      "eval_loss": 0.3690718114376068,
      "eval_runtime": 4.2858,
      "eval_samples_per_second": 94.264,
      "eval_steps_per_second": 3.033,
      "step": 142
    },
    {
      "epoch": 5.2631578947368425,
      "grad_norm": 6.101373672485352,
      "learning_rate": 2.5793650793650796e-05,
      "loss": 0.3582,
      "step": 150
    },
    {
      "epoch": 5.614035087719298,
      "grad_norm": 8.85653305053711,
      "learning_rate": 2.380952380952381e-05,
      "loss": 0.3584,
      "step": 160
    },
    {
      "epoch": 5.964912280701754,
      "grad_norm": 7.283915996551514,
      "learning_rate": 2.1825396825396827e-05,
      "loss": 0.3627,
      "step": 170
    },
    {
      "epoch": 6.0,
      "eval_accuracy": 0.8886138613861386,
      "eval_loss": 0.3214375674724579,
      "eval_runtime": 4.5245,
      "eval_samples_per_second": 89.292,
      "eval_steps_per_second": 2.873,
      "step": 171
    },
    {
      "epoch": 6.315789473684211,
      "grad_norm": 5.079178810119629,
      "learning_rate": 1.984126984126984e-05,
      "loss": 0.3304,
      "step": 180
    },
    {
      "epoch": 6.666666666666667,
      "grad_norm": 5.342247486114502,
      "learning_rate": 1.785714285714286e-05,
      "loss": 0.3298,
      "step": 190
    },
    {
      "epoch": 6.982456140350877,
      "eval_accuracy": 0.8910891089108911,
      "eval_loss": 0.3172283470630646,
      "eval_runtime": 4.1397,
      "eval_samples_per_second": 97.593,
      "eval_steps_per_second": 3.14,
      "step": 199
    },
    {
      "epoch": 7.017543859649122,
      "grad_norm": 6.236889362335205,
      "learning_rate": 1.5873015873015872e-05,
      "loss": 0.342,
      "step": 200
    },
    {
      "epoch": 7.368421052631579,
      "grad_norm": 9.212471008300781,
      "learning_rate": 1.388888888888889e-05,
      "loss": 0.3288,
      "step": 210
    },
    {
      "epoch": 7.719298245614035,
      "grad_norm": 5.810153484344482,
      "learning_rate": 1.1904761904761905e-05,
      "loss": 0.3203,
      "step": 220
    },
    {
      "epoch": 8.0,
      "eval_accuracy": 0.8910891089108911,
      "eval_loss": 0.3060537278652191,
      "eval_runtime": 4.5988,
      "eval_samples_per_second": 87.848,
      "eval_steps_per_second": 2.827,
      "step": 228
    },
    {
      "epoch": 8.070175438596491,
      "grad_norm": 5.650562763214111,
      "learning_rate": 9.92063492063492e-06,
      "loss": 0.3379,
      "step": 230
    },
    {
      "epoch": 8.421052631578947,
      "grad_norm": 3.9896435737609863,
      "learning_rate": 7.936507936507936e-06,
      "loss": 0.2913,
      "step": 240
    },
    {
      "epoch": 8.771929824561404,
      "grad_norm": 7.3288397789001465,
      "learning_rate": 5.9523809523809525e-06,
      "loss": 0.2737,
      "step": 250
    },
    {
      "epoch": 8.982456140350877,
      "eval_accuracy": 0.8861386138613861,
      "eval_loss": 0.3128886818885803,
      "eval_runtime": 4.2212,
      "eval_samples_per_second": 95.706,
      "eval_steps_per_second": 3.08,
      "step": 256
    },
    {
      "epoch": 9.12280701754386,
      "grad_norm": 5.5674285888671875,
      "learning_rate": 3.968253968253968e-06,
      "loss": 0.2991,
      "step": 260
    },
    {
      "epoch": 9.473684210526315,
      "grad_norm": 6.089910984039307,
      "learning_rate": 1.984126984126984e-06,
      "loss": 0.2943,
      "step": 270
    },
    {
      "epoch": 9.824561403508772,
      "grad_norm": 5.232763290405273,
      "learning_rate": 0.0,
      "loss": 0.2728,
      "step": 280
    },
    {
      "epoch": 9.824561403508772,
      "eval_accuracy": 0.8861386138613861,
      "eval_loss": 0.308758020401001,
      "eval_runtime": 4.3884,
      "eval_samples_per_second": 92.061,
      "eval_steps_per_second": 2.962,
      "step": 280
    },
    {
      "epoch": 9.824561403508772,
      "step": 280,
      "total_flos": 8.878060093031055e+17,
      "train_loss": 0.5355421313217708,
      "train_runtime": 1702.5623,
      "train_samples_per_second": 21.344,
      "train_steps_per_second": 0.164
    }
  ],
  "logging_steps": 10,
  "max_steps": 280,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 10,
  "save_steps": 500,
  "total_flos": 8.878060093031055e+17,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}