|
{ |
|
"best_metric": 0.9383116883116883, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-1218", |
|
"epoch": 29.655172413793103, |
|
"eval_steps": 500, |
|
"global_step": 1290, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.875968992248062e-06, |
|
"loss": 2.033, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.751937984496124e-06, |
|
"loss": 1.819, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.1627906976744187e-05, |
|
"loss": 1.3652, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5503875968992248e-05, |
|
"loss": 0.9843, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.6948051948051948, |
|
"eval_loss": 0.850001335144043, |
|
"eval_runtime": 6.318, |
|
"eval_samples_per_second": 97.499, |
|
"eval_steps_per_second": 3.166, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.937984496124031e-05, |
|
"loss": 0.816, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.3255813953488374e-05, |
|
"loss": 0.7631, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.7131782945736434e-05, |
|
"loss": 0.6499, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.1007751937984497e-05, |
|
"loss": 0.5335, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7824675324675324, |
|
"eval_loss": 0.5584108233451843, |
|
"eval_runtime": 6.0937, |
|
"eval_samples_per_second": 101.088, |
|
"eval_steps_per_second": 3.282, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.488372093023256e-05, |
|
"loss": 0.5594, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.875968992248062e-05, |
|
"loss": 0.4565, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.263565891472868e-05, |
|
"loss": 0.4759, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.651162790697675e-05, |
|
"loss": 0.4542, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.995693367786391e-05, |
|
"loss": 0.4263, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.8116883116883117, |
|
"eval_loss": 0.4790755808353424, |
|
"eval_runtime": 6.348, |
|
"eval_samples_per_second": 97.038, |
|
"eval_steps_per_second": 3.151, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 4.952627045650302e-05, |
|
"loss": 0.3769, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.9095607235142123e-05, |
|
"loss": 0.3852, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 4.866494401378123e-05, |
|
"loss": 0.3409, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 4.823428079242033e-05, |
|
"loss": 0.3308, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8344155844155844, |
|
"eval_loss": 0.4268946349620819, |
|
"eval_runtime": 6.5538, |
|
"eval_samples_per_second": 93.991, |
|
"eval_steps_per_second": 3.052, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 4.780361757105943e-05, |
|
"loss": 0.3093, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 4.737295434969854e-05, |
|
"loss": 0.3429, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 4.694229112833764e-05, |
|
"loss": 0.2697, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 4.651162790697675e-05, |
|
"loss": 0.2882, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.8636363636363636, |
|
"eval_loss": 0.35674524307250977, |
|
"eval_runtime": 6.3226, |
|
"eval_samples_per_second": 97.428, |
|
"eval_steps_per_second": 3.163, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 4.6080964685615853e-05, |
|
"loss": 0.2729, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 4.565030146425496e-05, |
|
"loss": 0.2543, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 4.521963824289406e-05, |
|
"loss": 0.2526, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 4.478897502153316e-05, |
|
"loss": 0.255, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 4.435831180017227e-05, |
|
"loss": 0.2517, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8701298701298701, |
|
"eval_loss": 0.33171918988227844, |
|
"eval_runtime": 5.9688, |
|
"eval_samples_per_second": 103.204, |
|
"eval_steps_per_second": 3.351, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 4.392764857881137e-05, |
|
"loss": 0.2317, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 4.349698535745048e-05, |
|
"loss": 0.1944, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 4.306632213608958e-05, |
|
"loss": 0.248, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 4.263565891472868e-05, |
|
"loss": 0.1908, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.8814935064935064, |
|
"eval_loss": 0.3081734776496887, |
|
"eval_runtime": 6.4591, |
|
"eval_samples_per_second": 95.369, |
|
"eval_steps_per_second": 3.096, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 4.220499569336779e-05, |
|
"loss": 0.1726, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 4.177433247200689e-05, |
|
"loss": 0.1663, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 4.1343669250646e-05, |
|
"loss": 0.1602, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 4.09130060292851e-05, |
|
"loss": 0.187, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8798701298701299, |
|
"eval_loss": 0.3230064809322357, |
|
"eval_runtime": 6.0202, |
|
"eval_samples_per_second": 102.322, |
|
"eval_steps_per_second": 3.322, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 4.048234280792421e-05, |
|
"loss": 0.1676, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 4.005167958656331e-05, |
|
"loss": 0.1681, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 3.962101636520241e-05, |
|
"loss": 0.1818, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 3.919035314384152e-05, |
|
"loss": 0.1299, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 3.875968992248062e-05, |
|
"loss": 0.1434, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.900974025974026, |
|
"eval_loss": 0.3322867453098297, |
|
"eval_runtime": 6.3218, |
|
"eval_samples_per_second": 97.44, |
|
"eval_steps_per_second": 3.164, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 3.832902670111973e-05, |
|
"loss": 0.1432, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 3.789836347975883e-05, |
|
"loss": 0.1313, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 3.746770025839794e-05, |
|
"loss": 0.1546, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.1277, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9074675324675324, |
|
"eval_loss": 0.2488991618156433, |
|
"eval_runtime": 6.4317, |
|
"eval_samples_per_second": 95.776, |
|
"eval_steps_per_second": 3.11, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 3.660637381567614e-05, |
|
"loss": 0.1349, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 10.34, |
|
"learning_rate": 3.617571059431525e-05, |
|
"loss": 0.1074, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 10.57, |
|
"learning_rate": 3.5745047372954346e-05, |
|
"loss": 0.1218, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 3.531438415159346e-05, |
|
"loss": 0.156, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.887987012987013, |
|
"eval_loss": 0.3245611786842346, |
|
"eval_runtime": 6.5235, |
|
"eval_samples_per_second": 94.429, |
|
"eval_steps_per_second": 3.066, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 11.03, |
|
"learning_rate": 3.488372093023256e-05, |
|
"loss": 0.1113, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 3.445305770887167e-05, |
|
"loss": 0.0888, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 3.4022394487510767e-05, |
|
"loss": 0.1083, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 3.359173126614987e-05, |
|
"loss": 0.1126, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 3.316106804478898e-05, |
|
"loss": 0.0781, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.900974025974026, |
|
"eval_loss": 0.31213870644569397, |
|
"eval_runtime": 5.9923, |
|
"eval_samples_per_second": 102.799, |
|
"eval_steps_per_second": 3.338, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 12.18, |
|
"learning_rate": 3.273040482342808e-05, |
|
"loss": 0.0906, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 3.229974160206719e-05, |
|
"loss": 0.0864, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 12.64, |
|
"learning_rate": 3.186907838070629e-05, |
|
"loss": 0.0854, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 12.87, |
|
"learning_rate": 3.143841515934539e-05, |
|
"loss": 0.1001, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.9058441558441559, |
|
"eval_loss": 0.27083244919776917, |
|
"eval_runtime": 6.2499, |
|
"eval_samples_per_second": 98.561, |
|
"eval_steps_per_second": 3.2, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 3.1007751937984497e-05, |
|
"loss": 0.0923, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 3.05770887166236e-05, |
|
"loss": 0.0771, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 3.0146425495262704e-05, |
|
"loss": 0.0873, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 13.79, |
|
"learning_rate": 2.971576227390181e-05, |
|
"loss": 0.0892, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.913961038961039, |
|
"eval_loss": 0.2581653296947479, |
|
"eval_runtime": 6.4729, |
|
"eval_samples_per_second": 95.166, |
|
"eval_steps_per_second": 3.09, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 14.02, |
|
"learning_rate": 2.9285099052540914e-05, |
|
"loss": 0.0802, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 2.8854435831180023e-05, |
|
"loss": 0.073, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 2.842377260981912e-05, |
|
"loss": 0.0584, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 2.7993109388458226e-05, |
|
"loss": 0.0552, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 2.7562446167097332e-05, |
|
"loss": 0.0644, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.24860848486423492, |
|
"eval_runtime": 6.4617, |
|
"eval_samples_per_second": 95.332, |
|
"eval_steps_per_second": 3.095, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 2.7131782945736434e-05, |
|
"loss": 0.0906, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 2.670111972437554e-05, |
|
"loss": 0.0507, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 15.63, |
|
"learning_rate": 2.6270456503014644e-05, |
|
"loss": 0.0597, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 15.86, |
|
"learning_rate": 2.5839793281653746e-05, |
|
"loss": 0.0689, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9237012987012987, |
|
"eval_loss": 0.246454119682312, |
|
"eval_runtime": 6.0878, |
|
"eval_samples_per_second": 101.186, |
|
"eval_steps_per_second": 3.285, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 16.09, |
|
"learning_rate": 2.540913006029285e-05, |
|
"loss": 0.0696, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 2.4978466838931956e-05, |
|
"loss": 0.0563, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"learning_rate": 2.4547803617571062e-05, |
|
"loss": 0.058, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"learning_rate": 2.4117140396210164e-05, |
|
"loss": 0.0547, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.9334415584415584, |
|
"eval_loss": 0.24022094905376434, |
|
"eval_runtime": 6.0529, |
|
"eval_samples_per_second": 101.769, |
|
"eval_steps_per_second": 3.304, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 2.368647717484927e-05, |
|
"loss": 0.068, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 2.3255813953488374e-05, |
|
"loss": 0.0556, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 17.47, |
|
"learning_rate": 2.282515073212748e-05, |
|
"loss": 0.0541, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 17.7, |
|
"learning_rate": 2.239448751076658e-05, |
|
"loss": 0.0444, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 17.93, |
|
"learning_rate": 2.1963824289405686e-05, |
|
"loss": 0.0597, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9237012987012987, |
|
"eval_loss": 0.2533910572528839, |
|
"eval_runtime": 6.1928, |
|
"eval_samples_per_second": 99.47, |
|
"eval_steps_per_second": 3.23, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 18.16, |
|
"learning_rate": 2.153316106804479e-05, |
|
"loss": 0.0461, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 18.39, |
|
"learning_rate": 2.1102497846683894e-05, |
|
"loss": 0.044, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 18.62, |
|
"learning_rate": 2.0671834625323e-05, |
|
"loss": 0.0474, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"learning_rate": 2.0241171403962104e-05, |
|
"loss": 0.0512, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.24004700779914856, |
|
"eval_runtime": 6.4994, |
|
"eval_samples_per_second": 94.777, |
|
"eval_steps_per_second": 3.077, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 1.9810508182601206e-05, |
|
"loss": 0.056, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 1.937984496124031e-05, |
|
"loss": 0.0527, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 1.8949181739879416e-05, |
|
"loss": 0.0282, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 19.77, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0486, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.8087855297157624e-05, |
|
"loss": 0.041, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9285714285714286, |
|
"eval_loss": 0.23969942331314087, |
|
"eval_runtime": 6.4042, |
|
"eval_samples_per_second": 96.187, |
|
"eval_steps_per_second": 3.123, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 20.23, |
|
"learning_rate": 1.765719207579673e-05, |
|
"loss": 0.0347, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 20.46, |
|
"learning_rate": 1.7226528854435834e-05, |
|
"loss": 0.0523, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"learning_rate": 1.6795865633074936e-05, |
|
"loss": 0.0334, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 20.92, |
|
"learning_rate": 1.636520241171404e-05, |
|
"loss": 0.0376, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.926948051948052, |
|
"eval_loss": 0.26630115509033203, |
|
"eval_runtime": 6.6399, |
|
"eval_samples_per_second": 92.772, |
|
"eval_steps_per_second": 3.012, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 1.5934539190353146e-05, |
|
"loss": 0.0334, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 21.38, |
|
"learning_rate": 1.5503875968992248e-05, |
|
"loss": 0.042, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 21.61, |
|
"learning_rate": 1.5073212747631352e-05, |
|
"loss": 0.0444, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 21.84, |
|
"learning_rate": 1.4642549526270457e-05, |
|
"loss": 0.0412, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.922077922077922, |
|
"eval_loss": 0.3025703430175781, |
|
"eval_runtime": 6.5146, |
|
"eval_samples_per_second": 94.556, |
|
"eval_steps_per_second": 3.07, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 22.07, |
|
"learning_rate": 1.421188630490956e-05, |
|
"loss": 0.0321, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 22.3, |
|
"learning_rate": 1.3781223083548666e-05, |
|
"loss": 0.0383, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 22.53, |
|
"learning_rate": 1.335055986218777e-05, |
|
"loss": 0.027, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 22.76, |
|
"learning_rate": 1.2919896640826873e-05, |
|
"loss": 0.0446, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"learning_rate": 1.2489233419465978e-05, |
|
"loss": 0.0423, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.9301948051948052, |
|
"eval_loss": 0.26784056425094604, |
|
"eval_runtime": 6.0495, |
|
"eval_samples_per_second": 101.827, |
|
"eval_steps_per_second": 3.306, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 23.22, |
|
"learning_rate": 1.2058570198105082e-05, |
|
"loss": 0.0327, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 23.45, |
|
"learning_rate": 1.1627906976744187e-05, |
|
"loss": 0.0463, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 1.119724375538329e-05, |
|
"loss": 0.0314, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 1.0766580534022396e-05, |
|
"loss": 0.0266, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.25100043416023254, |
|
"eval_runtime": 6.0247, |
|
"eval_samples_per_second": 102.246, |
|
"eval_steps_per_second": 3.32, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 24.14, |
|
"learning_rate": 1.03359173126615e-05, |
|
"loss": 0.0257, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 24.37, |
|
"learning_rate": 9.905254091300603e-06, |
|
"loss": 0.0292, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 24.6, |
|
"learning_rate": 9.474590869939708e-06, |
|
"loss": 0.0329, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 24.83, |
|
"learning_rate": 9.043927648578812e-06, |
|
"loss": 0.0313, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.9334415584415584, |
|
"eval_loss": 0.2541840672492981, |
|
"eval_runtime": 6.4751, |
|
"eval_samples_per_second": 95.134, |
|
"eval_steps_per_second": 3.089, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 25.06, |
|
"learning_rate": 8.613264427217917e-06, |
|
"loss": 0.0178, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 25.29, |
|
"learning_rate": 8.18260120585702e-06, |
|
"loss": 0.0189, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 25.52, |
|
"learning_rate": 7.751937984496124e-06, |
|
"loss": 0.0399, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 25.75, |
|
"learning_rate": 7.3212747631352285e-06, |
|
"loss": 0.0269, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 25.98, |
|
"learning_rate": 6.890611541774333e-06, |
|
"loss": 0.0207, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.9334415584415584, |
|
"eval_loss": 0.2742658257484436, |
|
"eval_runtime": 6.484, |
|
"eval_samples_per_second": 95.003, |
|
"eval_steps_per_second": 3.084, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 26.21, |
|
"learning_rate": 6.4599483204134365e-06, |
|
"loss": 0.0412, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 26.44, |
|
"learning_rate": 6.029285099052541e-06, |
|
"loss": 0.0271, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 26.67, |
|
"learning_rate": 5.598621877691645e-06, |
|
"loss": 0.0268, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 26.9, |
|
"learning_rate": 5.16795865633075e-06, |
|
"loss": 0.0292, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.9318181818181818, |
|
"eval_loss": 0.2614338994026184, |
|
"eval_runtime": 6.1763, |
|
"eval_samples_per_second": 99.736, |
|
"eval_steps_per_second": 3.238, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 27.13, |
|
"learning_rate": 4.737295434969854e-06, |
|
"loss": 0.0248, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 4.3066322136089585e-06, |
|
"loss": 0.0329, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 27.59, |
|
"learning_rate": 3.875968992248062e-06, |
|
"loss": 0.0307, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 27.82, |
|
"learning_rate": 3.4453057708871665e-06, |
|
"loss": 0.0242, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.9383116883116883, |
|
"eval_loss": 0.24687811732292175, |
|
"eval_runtime": 5.9381, |
|
"eval_samples_per_second": 103.737, |
|
"eval_steps_per_second": 3.368, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 28.05, |
|
"learning_rate": 3.0146425495262704e-06, |
|
"loss": 0.0295, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 28.28, |
|
"learning_rate": 2.583979328165375e-06, |
|
"loss": 0.0161, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 28.51, |
|
"learning_rate": 2.1533161068044793e-06, |
|
"loss": 0.034, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 28.74, |
|
"learning_rate": 1.7226528854435832e-06, |
|
"loss": 0.0247, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"learning_rate": 1.2919896640826874e-06, |
|
"loss": 0.0201, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.9366883116883117, |
|
"eval_loss": 0.2533850371837616, |
|
"eval_runtime": 6.3893, |
|
"eval_samples_per_second": 96.411, |
|
"eval_steps_per_second": 3.13, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 8.613264427217916e-07, |
|
"loss": 0.0262, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 29.43, |
|
"learning_rate": 4.306632213608958e-07, |
|
"loss": 0.0226, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 29.66, |
|
"learning_rate": 0.0, |
|
"loss": 0.0354, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 29.66, |
|
"eval_accuracy": 0.9366883116883117, |
|
"eval_loss": 0.2525167465209961, |
|
"eval_runtime": 6.455, |
|
"eval_samples_per_second": 95.43, |
|
"eval_steps_per_second": 3.098, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 29.66, |
|
"step": 1290, |
|
"total_flos": 4.0838875031628657e+18, |
|
"train_loss": 0.17642173106356185, |
|
"train_runtime": 3123.2223, |
|
"train_samples_per_second": 53.205, |
|
"train_steps_per_second": 0.413 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1290, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 4.0838875031628657e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|