|
{ |
|
"best_metric": 0.7314792275428772, |
|
"best_model_checkpoint": "./vit-base-beans/checkpoint-640", |
|
"epoch": 13.061224489795919, |
|
"global_step": 640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019795918367346938, |
|
"loss": 3.374, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001959183673469388, |
|
"loss": 3.3572, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00019387755102040816, |
|
"loss": 3.3211, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00019183673469387756, |
|
"loss": 3.2257, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.03470031545741325, |
|
"eval_loss": 3.4292807579040527, |
|
"eval_runtime": 3.7472, |
|
"eval_samples_per_second": 84.597, |
|
"eval_steps_per_second": 10.675, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00018979591836734697, |
|
"loss": 3.1143, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00018775510204081634, |
|
"loss": 3.032, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00018571428571428572, |
|
"loss": 2.8104, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00018367346938775512, |
|
"loss": 2.6674, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.17350157728706625, |
|
"eval_loss": 2.952009916305542, |
|
"eval_runtime": 3.3938, |
|
"eval_samples_per_second": 93.405, |
|
"eval_steps_per_second": 11.786, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0001816326530612245, |
|
"loss": 2.5131, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0001795918367346939, |
|
"loss": 2.5287, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00017755102040816327, |
|
"loss": 2.2121, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00017551020408163265, |
|
"loss": 2.0048, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.48580441640378547, |
|
"eval_loss": 2.363018035888672, |
|
"eval_runtime": 3.7903, |
|
"eval_samples_per_second": 83.635, |
|
"eval_steps_per_second": 10.553, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00017346938775510205, |
|
"loss": 1.8413, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00017142857142857143, |
|
"loss": 1.7897, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00016938775510204083, |
|
"loss": 1.607, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.00016734693877551023, |
|
"loss": 1.3493, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_accuracy": 0.5488958990536278, |
|
"eval_loss": 1.8864167928695679, |
|
"eval_runtime": 4.2623, |
|
"eval_samples_per_second": 74.372, |
|
"eval_steps_per_second": 9.385, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.0001653061224489796, |
|
"loss": 1.2421, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.00016326530612244898, |
|
"loss": 1.0779, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 0.00016122448979591838, |
|
"loss": 1.0038, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 0.00015918367346938776, |
|
"loss": 1.0887, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_accuracy": 0.7350157728706624, |
|
"eval_loss": 1.4275710582733154, |
|
"eval_runtime": 3.3105, |
|
"eval_samples_per_second": 95.757, |
|
"eval_steps_per_second": 12.083, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00015714285714285716, |
|
"loss": 0.7734, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00015510204081632654, |
|
"loss": 0.7969, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 0.0001530612244897959, |
|
"loss": 0.7953, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.0001510204081632653, |
|
"loss": 0.6649, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.7728706624605678, |
|
"eval_loss": 1.2706444263458252, |
|
"eval_runtime": 4.1354, |
|
"eval_samples_per_second": 76.656, |
|
"eval_steps_per_second": 9.673, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.00014897959183673472, |
|
"loss": 0.5647, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 0.0001469387755102041, |
|
"loss": 0.7201, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0001448979591836735, |
|
"loss": 0.606, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.00014285714285714287, |
|
"loss": 0.5396, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"eval_accuracy": 0.7981072555205048, |
|
"eval_loss": 1.125301480293274, |
|
"eval_runtime": 3.3532, |
|
"eval_samples_per_second": 94.537, |
|
"eval_steps_per_second": 11.929, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.00014081632653061224, |
|
"loss": 0.3407, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00013877551020408165, |
|
"loss": 0.3539, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.00013673469387755102, |
|
"loss": 0.3608, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 0.0001346938775510204, |
|
"loss": 0.3162, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_accuracy": 0.804416403785489, |
|
"eval_loss": 1.0156543254852295, |
|
"eval_runtime": 4.7629, |
|
"eval_samples_per_second": 66.556, |
|
"eval_steps_per_second": 8.398, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 0.0001326530612244898, |
|
"loss": 0.335, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 0.00013061224489795917, |
|
"loss": 0.2333, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00012857142857142858, |
|
"loss": 0.1931, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.00012653061224489798, |
|
"loss": 0.1909, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"eval_accuracy": 0.8548895899053628, |
|
"eval_loss": 0.8516904711723328, |
|
"eval_runtime": 3.3664, |
|
"eval_samples_per_second": 94.165, |
|
"eval_steps_per_second": 11.882, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.00012448979591836735, |
|
"loss": 0.1801, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.00012244897959183676, |
|
"loss": 0.1785, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.00012040816326530613, |
|
"loss": 0.1627, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 0.00011836734693877552, |
|
"loss": 0.154, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"eval_accuracy": 0.8138801261829653, |
|
"eval_loss": 0.945625364780426, |
|
"eval_runtime": 3.3591, |
|
"eval_samples_per_second": 94.37, |
|
"eval_steps_per_second": 11.908, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 0.0001163265306122449, |
|
"loss": 0.1268, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.00011428571428571428, |
|
"loss": 0.1352, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 0.00011224489795918367, |
|
"loss": 0.1214, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 0.00011020408163265306, |
|
"loss": 0.1519, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"eval_accuracy": 0.8454258675078864, |
|
"eval_loss": 0.8139908313751221, |
|
"eval_runtime": 4.5811, |
|
"eval_samples_per_second": 69.197, |
|
"eval_steps_per_second": 8.731, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 0.00010816326530612246, |
|
"loss": 0.0773, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 0.00010612244897959185, |
|
"loss": 0.1083, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 0.00010408163265306123, |
|
"loss": 0.086, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.00010204081632653062, |
|
"loss": 0.0672, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_accuracy": 0.8517350157728707, |
|
"eval_loss": 0.7407823204994202, |
|
"eval_runtime": 3.8145, |
|
"eval_samples_per_second": 83.104, |
|
"eval_steps_per_second": 10.486, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001, |
|
"loss": 0.067, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 9.79591836734694e-05, |
|
"loss": 0.0559, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 9.591836734693878e-05, |
|
"loss": 0.055, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 9.387755102040817e-05, |
|
"loss": 0.0498, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"eval_accuracy": 0.8580441640378549, |
|
"eval_loss": 0.7550804615020752, |
|
"eval_runtime": 3.3075, |
|
"eval_samples_per_second": 95.842, |
|
"eval_steps_per_second": 12.094, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 9.183673469387756e-05, |
|
"loss": 0.0483, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 8.979591836734695e-05, |
|
"loss": 0.0461, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 8.775510204081632e-05, |
|
"loss": 0.043, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 8.571428571428571e-05, |
|
"loss": 0.0618, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"eval_accuracy": 0.8675078864353313, |
|
"eval_loss": 0.7529485821723938, |
|
"eval_runtime": 3.2693, |
|
"eval_samples_per_second": 96.964, |
|
"eval_steps_per_second": 12.235, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 8.367346938775511e-05, |
|
"loss": 0.0409, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 8.163265306122449e-05, |
|
"loss": 0.0433, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 7.959183673469388e-05, |
|
"loss": 0.0367, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 7.755102040816327e-05, |
|
"loss": 0.0352, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"eval_accuracy": 0.8643533123028391, |
|
"eval_loss": 0.7547315955162048, |
|
"eval_runtime": 4.666, |
|
"eval_samples_per_second": 67.938, |
|
"eval_steps_per_second": 8.573, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 7.551020408163266e-05, |
|
"loss": 0.0377, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 12.65, |
|
"learning_rate": 7.346938775510205e-05, |
|
"loss": 0.0331, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 7.142857142857143e-05, |
|
"loss": 0.0678, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 6.938775510204082e-05, |
|
"loss": 0.0381, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"eval_accuracy": 0.8769716088328076, |
|
"eval_loss": 0.7314792275428772, |
|
"eval_runtime": 4.0948, |
|
"eval_samples_per_second": 77.416, |
|
"eval_steps_per_second": 9.769, |
|
"step": 640 |
|
} |
|
], |
|
"max_steps": 980, |
|
"num_train_epochs": 20, |
|
"total_flos": 7.79603043959083e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|