ChasingMercer commited on
Commit
b3efbb1
1 Parent(s): 4b5241b

Training in progress, epoch 1

Browse files
.ipynb_checkpoints/README-checkpoint.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tags:
4
+ - generated_from_trainer
5
+ datasets:
6
+ - imagefolder
7
+ metrics:
8
+ - accuracy
9
+ model-index:
10
+ - name: weather-mod
11
+ results:
12
+ - task:
13
+ name: Image Classification
14
+ type: image-classification
15
+ dataset:
16
+ name: imagefolder
17
+ type: imagefolder
18
+ config: dataset
19
+ split: train
20
+ args: dataset
21
+ metrics:
22
+ - name: Accuracy
23
+ type: accuracy
24
+ value: 0.9426751592356688
25
+ ---
26
+
27
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
28
+ should probably proofread and complete it, then remove this comment. -->
29
+
30
+ # weather-mod
31
+
32
+ This model is a fine-tuned version of [microsoft/beit-base-patch16-224-pt22k-ft22k](https://huggingface.co/microsoft/beit-base-patch16-224-pt22k-ft22k) on the imagefolder dataset.
33
+ It achieves the following results on the evaluation set:
34
+ - Loss: 0.2331
35
+ - Accuracy: 0.9427
36
+
37
+ ## Model description
38
+
39
+ More information needed
40
+
41
+ ## Intended uses & limitations
42
+
43
+ More information needed
44
+
45
+ ## Training and evaluation data
46
+
47
+ More information needed
48
+
49
+ ## Training procedure
50
+
51
+ ### Training hyperparameters
52
+
53
+ The following hyperparameters were used during training:
54
+ - learning_rate: 5e-05
55
+ - train_batch_size: 8
56
+ - eval_batch_size: 8
57
+ - seed: 42
58
+ - gradient_accumulation_steps: 4
59
+ - total_train_batch_size: 32
60
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
61
+ - lr_scheduler_type: linear
62
+ - lr_scheduler_warmup_ratio: 0.1
63
+ - num_epochs: 6
64
+
65
+ ### Training results
66
+
67
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
68
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
69
+ | 0.1517 | 1.0 | 118 | 0.2654 | 0.9151 |
70
+ | 0.1627 | 2.0 | 236 | 0.2255 | 0.9321 |
71
+ | 0.1071 | 3.0 | 354 | 0.2734 | 0.9342 |
72
+ | 0.0757 | 4.0 | 472 | 0.2343 | 0.9448 |
73
+ | 0.059 | 5.0 | 590 | 0.2578 | 0.9384 |
74
+ | 0.0266 | 6.0 | 708 | 0.2331 | 0.9427 |
75
+
76
+
77
+ ### Framework versions
78
+
79
+ - Transformers 4.26.1
80
+ - Pytorch 1.13.1+cu116
81
+ - Datasets 2.10.1
82
+ - Tokenizers 0.13.2
.ipynb_checkpoints/all_results-checkpoint.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.0,
3
+ "total_flos": 1.7589230025365053e+18,
4
+ "train_loss": 0.10344488720941004,
5
+ "train_runtime": 2153.5363,
6
+ "train_samples_per_second": 10.506,
7
+ "train_steps_per_second": 0.329
8
+ }
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/beit-base-patch16-224-pt22k-ft22k",
3
+ "architectures": [
4
+ "BeitForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "auxiliary_channels": 256,
8
+ "auxiliary_concat_input": false,
9
+ "auxiliary_loss_weight": 0.4,
10
+ "auxiliary_num_convs": 1,
11
+ "drop_path_rate": 0.1,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.0,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "dew",
17
+ "1": "fogsmog",
18
+ "2": "frost",
19
+ "3": "glaze",
20
+ "4": "hail",
21
+ "5": "lightning",
22
+ "6": "rain",
23
+ "7": "rainbow",
24
+ "8": "rime",
25
+ "9": "sandstorm",
26
+ "10": "snow"
27
+ },
28
+ "image_size": 224,
29
+ "initializer_range": 0.02,
30
+ "intermediate_size": 3072,
31
+ "label2id": {
32
+ "dew": 0,
33
+ "fogsmog": 1,
34
+ "frost": 2,
35
+ "glaze": 3,
36
+ "hail": 4,
37
+ "lightning": 5,
38
+ "rain": 6,
39
+ "rainbow": 7,
40
+ "rime": 8,
41
+ "sandstorm": 9,
42
+ "snow": 10
43
+ },
44
+ "layer_norm_eps": 1e-12,
45
+ "layer_scale_init_value": 0.1,
46
+ "model_type": "beit",
47
+ "num_attention_heads": 12,
48
+ "num_channels": 3,
49
+ "num_hidden_layers": 12,
50
+ "out_indices": [
51
+ 3,
52
+ 5,
53
+ 7,
54
+ 11
55
+ ],
56
+ "patch_size": 16,
57
+ "pool_scales": [
58
+ 1,
59
+ 2,
60
+ 3,
61
+ 6
62
+ ],
63
+ "problem_type": "single_label_classification",
64
+ "semantic_loss_ignore_index": 255,
65
+ "torch_dtype": "float32",
66
+ "transformers_version": "4.26.1",
67
+ "use_absolute_position_embeddings": false,
68
+ "use_auxiliary_head": true,
69
+ "use_mask_token": false,
70
+ "use_mean_pooling": true,
71
+ "use_relative_position_bias": true,
72
+ "use_shared_relative_position_bias": false,
73
+ "vocab_size": 8192
74
+ }
.ipynb_checkpoints/train_results-checkpoint.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.0,
3
+ "total_flos": 1.7589230025365053e+18,
4
+ "train_loss": 0.10344488720941004,
5
+ "train_runtime": 2153.5363,
6
+ "train_samples_per_second": 10.506,
7
+ "train_steps_per_second": 0.329
8
+ }
.ipynb_checkpoints/trainer_state-checkpoint.json ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9447983014861996,
3
+ "best_model_checkpoint": "./weather-mod/checkpoint-472",
4
+ "epoch": 6.0,
5
+ "global_step": 708,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 1.5492957746478872e-05,
13
+ "loss": 0.179,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.17,
18
+ "learning_rate": 2.2535211267605634e-05,
19
+ "loss": 0.1308,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.25,
24
+ "learning_rate": 2.9577464788732395e-05,
25
+ "loss": 0.1013,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.34,
30
+ "learning_rate": 3.661971830985916e-05,
31
+ "loss": 0.1276,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.42,
36
+ "learning_rate": 4.366197183098591e-05,
37
+ "loss": 0.1344,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.51,
42
+ "learning_rate": 4.992150706436421e-05,
43
+ "loss": 0.1502,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.59,
48
+ "learning_rate": 4.913657770800628e-05,
49
+ "loss": 0.1448,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.68,
54
+ "learning_rate": 4.8351648351648355e-05,
55
+ "loss": 0.1864,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.76,
60
+ "learning_rate": 4.756671899529043e-05,
61
+ "loss": 0.2092,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.85,
66
+ "learning_rate": 4.67817896389325e-05,
67
+ "loss": 0.2008,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.93,
72
+ "learning_rate": 4.599686028257457e-05,
73
+ "loss": 0.1517,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 1.0,
78
+ "eval_accuracy": 0.9150743099787686,
79
+ "eval_loss": 0.26539650559425354,
80
+ "eval_runtime": 14.9714,
81
+ "eval_samples_per_second": 31.46,
82
+ "eval_steps_per_second": 3.941,
83
+ "step": 118
84
+ },
85
+ {
86
+ "epoch": 1.02,
87
+ "learning_rate": 4.5211930926216645e-05,
88
+ "loss": 0.145,
89
+ "step": 120
90
+ },
91
+ {
92
+ "epoch": 1.1,
93
+ "learning_rate": 4.442700156985872e-05,
94
+ "loss": 0.1558,
95
+ "step": 130
96
+ },
97
+ {
98
+ "epoch": 1.19,
99
+ "learning_rate": 4.364207221350079e-05,
100
+ "loss": 0.1556,
101
+ "step": 140
102
+ },
103
+ {
104
+ "epoch": 1.27,
105
+ "learning_rate": 4.2857142857142856e-05,
106
+ "loss": 0.1194,
107
+ "step": 150
108
+ },
109
+ {
110
+ "epoch": 1.36,
111
+ "learning_rate": 4.207221350078493e-05,
112
+ "loss": 0.1738,
113
+ "step": 160
114
+ },
115
+ {
116
+ "epoch": 1.44,
117
+ "learning_rate": 4.1287284144427e-05,
118
+ "loss": 0.1495,
119
+ "step": 170
120
+ },
121
+ {
122
+ "epoch": 1.53,
123
+ "learning_rate": 4.0502354788069074e-05,
124
+ "loss": 0.1603,
125
+ "step": 180
126
+ },
127
+ {
128
+ "epoch": 1.61,
129
+ "learning_rate": 3.971742543171115e-05,
130
+ "loss": 0.1494,
131
+ "step": 190
132
+ },
133
+ {
134
+ "epoch": 1.69,
135
+ "learning_rate": 3.893249607535322e-05,
136
+ "loss": 0.1469,
137
+ "step": 200
138
+ },
139
+ {
140
+ "epoch": 1.78,
141
+ "learning_rate": 3.814756671899529e-05,
142
+ "loss": 0.1922,
143
+ "step": 210
144
+ },
145
+ {
146
+ "epoch": 1.86,
147
+ "learning_rate": 3.7362637362637365e-05,
148
+ "loss": 0.1371,
149
+ "step": 220
150
+ },
151
+ {
152
+ "epoch": 1.95,
153
+ "learning_rate": 3.657770800627944e-05,
154
+ "loss": 0.1627,
155
+ "step": 230
156
+ },
157
+ {
158
+ "epoch": 2.0,
159
+ "eval_accuracy": 0.9320594479830149,
160
+ "eval_loss": 0.2255283147096634,
161
+ "eval_runtime": 34.5035,
162
+ "eval_samples_per_second": 13.651,
163
+ "eval_steps_per_second": 1.71,
164
+ "step": 236
165
+ },
166
+ {
167
+ "epoch": 2.03,
168
+ "learning_rate": 3.579277864992151e-05,
169
+ "loss": 0.2128,
170
+ "step": 240
171
+ },
172
+ {
173
+ "epoch": 2.12,
174
+ "learning_rate": 3.500784929356358e-05,
175
+ "loss": 0.0869,
176
+ "step": 250
177
+ },
178
+ {
179
+ "epoch": 2.2,
180
+ "learning_rate": 3.4222919937205655e-05,
181
+ "loss": 0.1261,
182
+ "step": 260
183
+ },
184
+ {
185
+ "epoch": 2.29,
186
+ "learning_rate": 3.343799058084773e-05,
187
+ "loss": 0.1104,
188
+ "step": 270
189
+ },
190
+ {
191
+ "epoch": 2.37,
192
+ "learning_rate": 3.265306122448979e-05,
193
+ "loss": 0.1111,
194
+ "step": 280
195
+ },
196
+ {
197
+ "epoch": 2.46,
198
+ "learning_rate": 3.1868131868131866e-05,
199
+ "loss": 0.0662,
200
+ "step": 290
201
+ },
202
+ {
203
+ "epoch": 2.54,
204
+ "learning_rate": 3.108320251177394e-05,
205
+ "loss": 0.1436,
206
+ "step": 300
207
+ },
208
+ {
209
+ "epoch": 2.63,
210
+ "learning_rate": 3.029827315541601e-05,
211
+ "loss": 0.1594,
212
+ "step": 310
213
+ },
214
+ {
215
+ "epoch": 2.71,
216
+ "learning_rate": 2.9513343799058084e-05,
217
+ "loss": 0.105,
218
+ "step": 320
219
+ },
220
+ {
221
+ "epoch": 2.8,
222
+ "learning_rate": 2.8728414442700156e-05,
223
+ "loss": 0.063,
224
+ "step": 330
225
+ },
226
+ {
227
+ "epoch": 2.88,
228
+ "learning_rate": 2.794348508634223e-05,
229
+ "loss": 0.104,
230
+ "step": 340
231
+ },
232
+ {
233
+ "epoch": 2.97,
234
+ "learning_rate": 2.71585557299843e-05,
235
+ "loss": 0.1071,
236
+ "step": 350
237
+ },
238
+ {
239
+ "epoch": 3.0,
240
+ "eval_accuracy": 0.9341825902335457,
241
+ "eval_loss": 0.2734454274177551,
242
+ "eval_runtime": 34.3105,
243
+ "eval_samples_per_second": 13.728,
244
+ "eval_steps_per_second": 1.72,
245
+ "step": 354
246
+ },
247
+ {
248
+ "epoch": 3.05,
249
+ "learning_rate": 2.6373626373626374e-05,
250
+ "loss": 0.0874,
251
+ "step": 360
252
+ },
253
+ {
254
+ "epoch": 3.14,
255
+ "learning_rate": 2.5588697017268447e-05,
256
+ "loss": 0.1084,
257
+ "step": 370
258
+ },
259
+ {
260
+ "epoch": 3.22,
261
+ "learning_rate": 2.480376766091052e-05,
262
+ "loss": 0.0758,
263
+ "step": 380
264
+ },
265
+ {
266
+ "epoch": 3.31,
267
+ "learning_rate": 2.4018838304552592e-05,
268
+ "loss": 0.0895,
269
+ "step": 390
270
+ },
271
+ {
272
+ "epoch": 3.39,
273
+ "learning_rate": 2.3233908948194665e-05,
274
+ "loss": 0.0998,
275
+ "step": 400
276
+ },
277
+ {
278
+ "epoch": 3.47,
279
+ "learning_rate": 2.2448979591836737e-05,
280
+ "loss": 0.0797,
281
+ "step": 410
282
+ },
283
+ {
284
+ "epoch": 3.56,
285
+ "learning_rate": 2.166405023547881e-05,
286
+ "loss": 0.0318,
287
+ "step": 420
288
+ },
289
+ {
290
+ "epoch": 3.64,
291
+ "learning_rate": 2.0879120879120882e-05,
292
+ "loss": 0.0744,
293
+ "step": 430
294
+ },
295
+ {
296
+ "epoch": 3.73,
297
+ "learning_rate": 2.0094191522762955e-05,
298
+ "loss": 0.0453,
299
+ "step": 440
300
+ },
301
+ {
302
+ "epoch": 3.81,
303
+ "learning_rate": 1.9309262166405024e-05,
304
+ "loss": 0.1569,
305
+ "step": 450
306
+ },
307
+ {
308
+ "epoch": 3.9,
309
+ "learning_rate": 1.8524332810047097e-05,
310
+ "loss": 0.0866,
311
+ "step": 460
312
+ },
313
+ {
314
+ "epoch": 3.98,
315
+ "learning_rate": 1.773940345368917e-05,
316
+ "loss": 0.0757,
317
+ "step": 470
318
+ },
319
+ {
320
+ "epoch": 4.0,
321
+ "eval_accuracy": 0.9447983014861996,
322
+ "eval_loss": 0.23432743549346924,
323
+ "eval_runtime": 35.6046,
324
+ "eval_samples_per_second": 13.229,
325
+ "eval_steps_per_second": 1.657,
326
+ "step": 472
327
+ },
328
+ {
329
+ "epoch": 4.07,
330
+ "learning_rate": 1.6954474097331242e-05,
331
+ "loss": 0.0881,
332
+ "step": 480
333
+ },
334
+ {
335
+ "epoch": 4.15,
336
+ "learning_rate": 1.6169544740973315e-05,
337
+ "loss": 0.0385,
338
+ "step": 490
339
+ },
340
+ {
341
+ "epoch": 4.24,
342
+ "learning_rate": 1.5384615384615387e-05,
343
+ "loss": 0.0238,
344
+ "step": 500
345
+ },
346
+ {
347
+ "epoch": 4.32,
348
+ "learning_rate": 1.4599686028257458e-05,
349
+ "loss": 0.0733,
350
+ "step": 510
351
+ },
352
+ {
353
+ "epoch": 4.41,
354
+ "learning_rate": 1.3814756671899529e-05,
355
+ "loss": 0.1086,
356
+ "step": 520
357
+ },
358
+ {
359
+ "epoch": 4.49,
360
+ "learning_rate": 1.3029827315541602e-05,
361
+ "loss": 0.0896,
362
+ "step": 530
363
+ },
364
+ {
365
+ "epoch": 4.58,
366
+ "learning_rate": 1.2244897959183674e-05,
367
+ "loss": 0.0461,
368
+ "step": 540
369
+ },
370
+ {
371
+ "epoch": 4.66,
372
+ "learning_rate": 1.1459968602825747e-05,
373
+ "loss": 0.0427,
374
+ "step": 550
375
+ },
376
+ {
377
+ "epoch": 4.75,
378
+ "learning_rate": 1.067503924646782e-05,
379
+ "loss": 0.0634,
380
+ "step": 560
381
+ },
382
+ {
383
+ "epoch": 4.83,
384
+ "learning_rate": 9.89010989010989e-06,
385
+ "loss": 0.0591,
386
+ "step": 570
387
+ },
388
+ {
389
+ "epoch": 4.92,
390
+ "learning_rate": 9.105180533751963e-06,
391
+ "loss": 0.0816,
392
+ "step": 580
393
+ },
394
+ {
395
+ "epoch": 5.0,
396
+ "learning_rate": 8.320251177394036e-06,
397
+ "loss": 0.059,
398
+ "step": 590
399
+ },
400
+ {
401
+ "epoch": 5.0,
402
+ "eval_accuracy": 0.9384288747346072,
403
+ "eval_loss": 0.2577723562717438,
404
+ "eval_runtime": 13.3966,
405
+ "eval_samples_per_second": 35.158,
406
+ "eval_steps_per_second": 4.404,
407
+ "step": 590
408
+ },
409
+ {
410
+ "epoch": 5.08,
411
+ "learning_rate": 7.535321821036106e-06,
412
+ "loss": 0.0398,
413
+ "step": 600
414
+ },
415
+ {
416
+ "epoch": 5.17,
417
+ "learning_rate": 6.750392464678179e-06,
418
+ "loss": 0.0226,
419
+ "step": 610
420
+ },
421
+ {
422
+ "epoch": 5.25,
423
+ "learning_rate": 5.965463108320252e-06,
424
+ "loss": 0.0777,
425
+ "step": 620
426
+ },
427
+ {
428
+ "epoch": 5.34,
429
+ "learning_rate": 5.180533751962323e-06,
430
+ "loss": 0.1017,
431
+ "step": 630
432
+ },
433
+ {
434
+ "epoch": 5.42,
435
+ "learning_rate": 4.395604395604396e-06,
436
+ "loss": 0.0144,
437
+ "step": 640
438
+ },
439
+ {
440
+ "epoch": 5.51,
441
+ "learning_rate": 3.610675039246468e-06,
442
+ "loss": 0.0738,
443
+ "step": 650
444
+ },
445
+ {
446
+ "epoch": 5.59,
447
+ "learning_rate": 2.8257456828885403e-06,
448
+ "loss": 0.0168,
449
+ "step": 660
450
+ },
451
+ {
452
+ "epoch": 5.68,
453
+ "learning_rate": 2.040816326530612e-06,
454
+ "loss": 0.029,
455
+ "step": 670
456
+ },
457
+ {
458
+ "epoch": 5.76,
459
+ "learning_rate": 1.2558869701726847e-06,
460
+ "loss": 0.0407,
461
+ "step": 680
462
+ },
463
+ {
464
+ "epoch": 5.85,
465
+ "learning_rate": 4.7095761381475665e-07,
466
+ "loss": 0.1009,
467
+ "step": 690
468
+ },
469
+ {
470
+ "epoch": 5.93,
471
+ "learning_rate": 0.0,
472
+ "loss": 0.0266,
473
+ "step": 700
474
+ },
475
+ {
476
+ "epoch": 6.0,
477
+ "eval_accuracy": 0.9426751592356688,
478
+ "eval_loss": 0.23305891454219818,
479
+ "eval_runtime": 33.4701,
480
+ "eval_samples_per_second": 14.072,
481
+ "eval_steps_per_second": 1.763,
482
+ "step": 708
483
+ },
484
+ {
485
+ "epoch": 6.0,
486
+ "step": 708,
487
+ "total_flos": 1.7589230025365053e+18,
488
+ "train_loss": 0.10344488720941004,
489
+ "train_runtime": 2153.5363,
490
+ "train_samples_per_second": 10.506,
491
+ "train_steps_per_second": 0.329
492
+ }
493
+ ],
494
+ "max_steps": 708,
495
+ "num_train_epochs": 6,
496
+ "total_flos": 1.7589230025365053e+18,
497
+ "trial_name": null,
498
+ "trial_params": null
499
+ }
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.0,
3
+ "total_flos": 1.7589230025365053e+18,
4
+ "train_loss": 0.10344488720941004,
5
+ "train_runtime": 2153.5363,
6
+ "train_samples_per_second": 10.506,
7
+ "train_steps_per_second": 0.329
8
+ }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a728d1159179a597102b0c2f24996af9daa2961b43597a41fbbccd39cc41c00
3
  size 346888057
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dad74a455081d04a37b78dcb7ce50f5386723243dffbedf397c041b3a1e3e8ad
3
  size 346888057
runs/Mar13_15-46-49_pop-os/1678722500.0429041/events.out.tfevents.1678722500.pop-os.8344.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0295b1f35698ccaf1f3b3853365e14c4ae8f4c589173add56893ff052c42e73d
3
+ size 5701
runs/Mar13_15-46-49_pop-os/events.out.tfevents.1678722499.pop-os.8344.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e7e13dd6ff2fa483c43e701f61fa39e3f249143dfc672afcea02ed36a6752f3
3
+ size 6732
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.0,
3
+ "total_flos": 1.7589230025365053e+18,
4
+ "train_loss": 0.10344488720941004,
5
+ "train_runtime": 2153.5363,
6
+ "train_samples_per_second": 10.506,
7
+ "train_steps_per_second": 0.329
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9447983014861996,
3
+ "best_model_checkpoint": "./weather-mod/checkpoint-472",
4
+ "epoch": 6.0,
5
+ "global_step": 708,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.08,
12
+ "learning_rate": 1.5492957746478872e-05,
13
+ "loss": 0.179,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.17,
18
+ "learning_rate": 2.2535211267605634e-05,
19
+ "loss": 0.1308,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.25,
24
+ "learning_rate": 2.9577464788732395e-05,
25
+ "loss": 0.1013,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.34,
30
+ "learning_rate": 3.661971830985916e-05,
31
+ "loss": 0.1276,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.42,
36
+ "learning_rate": 4.366197183098591e-05,
37
+ "loss": 0.1344,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.51,
42
+ "learning_rate": 4.992150706436421e-05,
43
+ "loss": 0.1502,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.59,
48
+ "learning_rate": 4.913657770800628e-05,
49
+ "loss": 0.1448,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.68,
54
+ "learning_rate": 4.8351648351648355e-05,
55
+ "loss": 0.1864,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.76,
60
+ "learning_rate": 4.756671899529043e-05,
61
+ "loss": 0.2092,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 0.85,
66
+ "learning_rate": 4.67817896389325e-05,
67
+ "loss": 0.2008,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 0.93,
72
+ "learning_rate": 4.599686028257457e-05,
73
+ "loss": 0.1517,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 1.0,
78
+ "eval_accuracy": 0.9150743099787686,
79
+ "eval_loss": 0.26539650559425354,
80
+ "eval_runtime": 14.9714,
81
+ "eval_samples_per_second": 31.46,
82
+ "eval_steps_per_second": 3.941,
83
+ "step": 118
84
+ },
85
+ {
86
+ "epoch": 1.02,
87
+ "learning_rate": 4.5211930926216645e-05,
88
+ "loss": 0.145,
89
+ "step": 120
90
+ },
91
+ {
92
+ "epoch": 1.1,
93
+ "learning_rate": 4.442700156985872e-05,
94
+ "loss": 0.1558,
95
+ "step": 130
96
+ },
97
+ {
98
+ "epoch": 1.19,
99
+ "learning_rate": 4.364207221350079e-05,
100
+ "loss": 0.1556,
101
+ "step": 140
102
+ },
103
+ {
104
+ "epoch": 1.27,
105
+ "learning_rate": 4.2857142857142856e-05,
106
+ "loss": 0.1194,
107
+ "step": 150
108
+ },
109
+ {
110
+ "epoch": 1.36,
111
+ "learning_rate": 4.207221350078493e-05,
112
+ "loss": 0.1738,
113
+ "step": 160
114
+ },
115
+ {
116
+ "epoch": 1.44,
117
+ "learning_rate": 4.1287284144427e-05,
118
+ "loss": 0.1495,
119
+ "step": 170
120
+ },
121
+ {
122
+ "epoch": 1.53,
123
+ "learning_rate": 4.0502354788069074e-05,
124
+ "loss": 0.1603,
125
+ "step": 180
126
+ },
127
+ {
128
+ "epoch": 1.61,
129
+ "learning_rate": 3.971742543171115e-05,
130
+ "loss": 0.1494,
131
+ "step": 190
132
+ },
133
+ {
134
+ "epoch": 1.69,
135
+ "learning_rate": 3.893249607535322e-05,
136
+ "loss": 0.1469,
137
+ "step": 200
138
+ },
139
+ {
140
+ "epoch": 1.78,
141
+ "learning_rate": 3.814756671899529e-05,
142
+ "loss": 0.1922,
143
+ "step": 210
144
+ },
145
+ {
146
+ "epoch": 1.86,
147
+ "learning_rate": 3.7362637362637365e-05,
148
+ "loss": 0.1371,
149
+ "step": 220
150
+ },
151
+ {
152
+ "epoch": 1.95,
153
+ "learning_rate": 3.657770800627944e-05,
154
+ "loss": 0.1627,
155
+ "step": 230
156
+ },
157
+ {
158
+ "epoch": 2.0,
159
+ "eval_accuracy": 0.9320594479830149,
160
+ "eval_loss": 0.2255283147096634,
161
+ "eval_runtime": 34.5035,
162
+ "eval_samples_per_second": 13.651,
163
+ "eval_steps_per_second": 1.71,
164
+ "step": 236
165
+ },
166
+ {
167
+ "epoch": 2.03,
168
+ "learning_rate": 3.579277864992151e-05,
169
+ "loss": 0.2128,
170
+ "step": 240
171
+ },
172
+ {
173
+ "epoch": 2.12,
174
+ "learning_rate": 3.500784929356358e-05,
175
+ "loss": 0.0869,
176
+ "step": 250
177
+ },
178
+ {
179
+ "epoch": 2.2,
180
+ "learning_rate": 3.4222919937205655e-05,
181
+ "loss": 0.1261,
182
+ "step": 260
183
+ },
184
+ {
185
+ "epoch": 2.29,
186
+ "learning_rate": 3.343799058084773e-05,
187
+ "loss": 0.1104,
188
+ "step": 270
189
+ },
190
+ {
191
+ "epoch": 2.37,
192
+ "learning_rate": 3.265306122448979e-05,
193
+ "loss": 0.1111,
194
+ "step": 280
195
+ },
196
+ {
197
+ "epoch": 2.46,
198
+ "learning_rate": 3.1868131868131866e-05,
199
+ "loss": 0.0662,
200
+ "step": 290
201
+ },
202
+ {
203
+ "epoch": 2.54,
204
+ "learning_rate": 3.108320251177394e-05,
205
+ "loss": 0.1436,
206
+ "step": 300
207
+ },
208
+ {
209
+ "epoch": 2.63,
210
+ "learning_rate": 3.029827315541601e-05,
211
+ "loss": 0.1594,
212
+ "step": 310
213
+ },
214
+ {
215
+ "epoch": 2.71,
216
+ "learning_rate": 2.9513343799058084e-05,
217
+ "loss": 0.105,
218
+ "step": 320
219
+ },
220
+ {
221
+ "epoch": 2.8,
222
+ "learning_rate": 2.8728414442700156e-05,
223
+ "loss": 0.063,
224
+ "step": 330
225
+ },
226
+ {
227
+ "epoch": 2.88,
228
+ "learning_rate": 2.794348508634223e-05,
229
+ "loss": 0.104,
230
+ "step": 340
231
+ },
232
+ {
233
+ "epoch": 2.97,
234
+ "learning_rate": 2.71585557299843e-05,
235
+ "loss": 0.1071,
236
+ "step": 350
237
+ },
238
+ {
239
+ "epoch": 3.0,
240
+ "eval_accuracy": 0.9341825902335457,
241
+ "eval_loss": 0.2734454274177551,
242
+ "eval_runtime": 34.3105,
243
+ "eval_samples_per_second": 13.728,
244
+ "eval_steps_per_second": 1.72,
245
+ "step": 354
246
+ },
247
+ {
248
+ "epoch": 3.05,
249
+ "learning_rate": 2.6373626373626374e-05,
250
+ "loss": 0.0874,
251
+ "step": 360
252
+ },
253
+ {
254
+ "epoch": 3.14,
255
+ "learning_rate": 2.5588697017268447e-05,
256
+ "loss": 0.1084,
257
+ "step": 370
258
+ },
259
+ {
260
+ "epoch": 3.22,
261
+ "learning_rate": 2.480376766091052e-05,
262
+ "loss": 0.0758,
263
+ "step": 380
264
+ },
265
+ {
266
+ "epoch": 3.31,
267
+ "learning_rate": 2.4018838304552592e-05,
268
+ "loss": 0.0895,
269
+ "step": 390
270
+ },
271
+ {
272
+ "epoch": 3.39,
273
+ "learning_rate": 2.3233908948194665e-05,
274
+ "loss": 0.0998,
275
+ "step": 400
276
+ },
277
+ {
278
+ "epoch": 3.47,
279
+ "learning_rate": 2.2448979591836737e-05,
280
+ "loss": 0.0797,
281
+ "step": 410
282
+ },
283
+ {
284
+ "epoch": 3.56,
285
+ "learning_rate": 2.166405023547881e-05,
286
+ "loss": 0.0318,
287
+ "step": 420
288
+ },
289
+ {
290
+ "epoch": 3.64,
291
+ "learning_rate": 2.0879120879120882e-05,
292
+ "loss": 0.0744,
293
+ "step": 430
294
+ },
295
+ {
296
+ "epoch": 3.73,
297
+ "learning_rate": 2.0094191522762955e-05,
298
+ "loss": 0.0453,
299
+ "step": 440
300
+ },
301
+ {
302
+ "epoch": 3.81,
303
+ "learning_rate": 1.9309262166405024e-05,
304
+ "loss": 0.1569,
305
+ "step": 450
306
+ },
307
+ {
308
+ "epoch": 3.9,
309
+ "learning_rate": 1.8524332810047097e-05,
310
+ "loss": 0.0866,
311
+ "step": 460
312
+ },
313
+ {
314
+ "epoch": 3.98,
315
+ "learning_rate": 1.773940345368917e-05,
316
+ "loss": 0.0757,
317
+ "step": 470
318
+ },
319
+ {
320
+ "epoch": 4.0,
321
+ "eval_accuracy": 0.9447983014861996,
322
+ "eval_loss": 0.23432743549346924,
323
+ "eval_runtime": 35.6046,
324
+ "eval_samples_per_second": 13.229,
325
+ "eval_steps_per_second": 1.657,
326
+ "step": 472
327
+ },
328
+ {
329
+ "epoch": 4.07,
330
+ "learning_rate": 1.6954474097331242e-05,
331
+ "loss": 0.0881,
332
+ "step": 480
333
+ },
334
+ {
335
+ "epoch": 4.15,
336
+ "learning_rate": 1.6169544740973315e-05,
337
+ "loss": 0.0385,
338
+ "step": 490
339
+ },
340
+ {
341
+ "epoch": 4.24,
342
+ "learning_rate": 1.5384615384615387e-05,
343
+ "loss": 0.0238,
344
+ "step": 500
345
+ },
346
+ {
347
+ "epoch": 4.32,
348
+ "learning_rate": 1.4599686028257458e-05,
349
+ "loss": 0.0733,
350
+ "step": 510
351
+ },
352
+ {
353
+ "epoch": 4.41,
354
+ "learning_rate": 1.3814756671899529e-05,
355
+ "loss": 0.1086,
356
+ "step": 520
357
+ },
358
+ {
359
+ "epoch": 4.49,
360
+ "learning_rate": 1.3029827315541602e-05,
361
+ "loss": 0.0896,
362
+ "step": 530
363
+ },
364
+ {
365
+ "epoch": 4.58,
366
+ "learning_rate": 1.2244897959183674e-05,
367
+ "loss": 0.0461,
368
+ "step": 540
369
+ },
370
+ {
371
+ "epoch": 4.66,
372
+ "learning_rate": 1.1459968602825747e-05,
373
+ "loss": 0.0427,
374
+ "step": 550
375
+ },
376
+ {
377
+ "epoch": 4.75,
378
+ "learning_rate": 1.067503924646782e-05,
379
+ "loss": 0.0634,
380
+ "step": 560
381
+ },
382
+ {
383
+ "epoch": 4.83,
384
+ "learning_rate": 9.89010989010989e-06,
385
+ "loss": 0.0591,
386
+ "step": 570
387
+ },
388
+ {
389
+ "epoch": 4.92,
390
+ "learning_rate": 9.105180533751963e-06,
391
+ "loss": 0.0816,
392
+ "step": 580
393
+ },
394
+ {
395
+ "epoch": 5.0,
396
+ "learning_rate": 8.320251177394036e-06,
397
+ "loss": 0.059,
398
+ "step": 590
399
+ },
400
+ {
401
+ "epoch": 5.0,
402
+ "eval_accuracy": 0.9384288747346072,
403
+ "eval_loss": 0.2577723562717438,
404
+ "eval_runtime": 13.3966,
405
+ "eval_samples_per_second": 35.158,
406
+ "eval_steps_per_second": 4.404,
407
+ "step": 590
408
+ },
409
+ {
410
+ "epoch": 5.08,
411
+ "learning_rate": 7.535321821036106e-06,
412
+ "loss": 0.0398,
413
+ "step": 600
414
+ },
415
+ {
416
+ "epoch": 5.17,
417
+ "learning_rate": 6.750392464678179e-06,
418
+ "loss": 0.0226,
419
+ "step": 610
420
+ },
421
+ {
422
+ "epoch": 5.25,
423
+ "learning_rate": 5.965463108320252e-06,
424
+ "loss": 0.0777,
425
+ "step": 620
426
+ },
427
+ {
428
+ "epoch": 5.34,
429
+ "learning_rate": 5.180533751962323e-06,
430
+ "loss": 0.1017,
431
+ "step": 630
432
+ },
433
+ {
434
+ "epoch": 5.42,
435
+ "learning_rate": 4.395604395604396e-06,
436
+ "loss": 0.0144,
437
+ "step": 640
438
+ },
439
+ {
440
+ "epoch": 5.51,
441
+ "learning_rate": 3.610675039246468e-06,
442
+ "loss": 0.0738,
443
+ "step": 650
444
+ },
445
+ {
446
+ "epoch": 5.59,
447
+ "learning_rate": 2.8257456828885403e-06,
448
+ "loss": 0.0168,
449
+ "step": 660
450
+ },
451
+ {
452
+ "epoch": 5.68,
453
+ "learning_rate": 2.040816326530612e-06,
454
+ "loss": 0.029,
455
+ "step": 670
456
+ },
457
+ {
458
+ "epoch": 5.76,
459
+ "learning_rate": 1.2558869701726847e-06,
460
+ "loss": 0.0407,
461
+ "step": 680
462
+ },
463
+ {
464
+ "epoch": 5.85,
465
+ "learning_rate": 4.7095761381475665e-07,
466
+ "loss": 0.1009,
467
+ "step": 690
468
+ },
469
+ {
470
+ "epoch": 5.93,
471
+ "learning_rate": 0.0,
472
+ "loss": 0.0266,
473
+ "step": 700
474
+ },
475
+ {
476
+ "epoch": 6.0,
477
+ "eval_accuracy": 0.9426751592356688,
478
+ "eval_loss": 0.23305891454219818,
479
+ "eval_runtime": 33.4701,
480
+ "eval_samples_per_second": 14.072,
481
+ "eval_steps_per_second": 1.763,
482
+ "step": 708
483
+ },
484
+ {
485
+ "epoch": 6.0,
486
+ "step": 708,
487
+ "total_flos": 1.7589230025365053e+18,
488
+ "train_loss": 0.10344488720941004,
489
+ "train_runtime": 2153.5363,
490
+ "train_samples_per_second": 10.506,
491
+ "train_steps_per_second": 0.329
492
+ }
493
+ ],
494
+ "max_steps": 708,
495
+ "num_train_epochs": 6,
496
+ "total_flos": 1.7589230025365053e+18,
497
+ "trial_name": null,
498
+ "trial_params": null
499
+ }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2749e7415fae06b1ca963ff78ae9e4ef2786dee6106b26910488abffe95acefc
3
  size 3515
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08da4e18c479ad36fc4203e76ba6c0d1465c8208176b534a260353a8e82a76b4
3
  size 3515