|
{ |
|
"best_metric": 0.7777777777777778, |
|
"best_model_checkpoint": "CIRCL_website_classifier\\checkpoint-658", |
|
"epoch": 15.0, |
|
"global_step": 705, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 7.042253521126762e-06, |
|
"loss": 0.6238, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.4084507042253523e-05, |
|
"loss": 0.5542, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.112676056338028e-05, |
|
"loss": 0.4952, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.8169014084507046e-05, |
|
"loss": 0.5278, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7160493827160493, |
|
"eval_loss": 0.7332170009613037, |
|
"eval_runtime": 3.93, |
|
"eval_samples_per_second": 20.611, |
|
"eval_steps_per_second": 1.527, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.5211267605633805e-05, |
|
"loss": 0.4947, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 4.225352112676056e-05, |
|
"loss": 0.4824, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.929577464788733e-05, |
|
"loss": 0.4869, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.929022082018927e-05, |
|
"loss": 0.5236, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.850157728706625e-05, |
|
"loss": 0.473, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.691358024691358, |
|
"eval_loss": 0.7856547832489014, |
|
"eval_runtime": 4.0912, |
|
"eval_samples_per_second": 19.799, |
|
"eval_steps_per_second": 1.467, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.771293375394322e-05, |
|
"loss": 0.4583, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.6924290220820195e-05, |
|
"loss": 0.5197, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.6135646687697165e-05, |
|
"loss": 0.4843, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 4.5347003154574134e-05, |
|
"loss": 0.519, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.4558359621451104e-05, |
|
"loss": 0.5199, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7037037037037037, |
|
"eval_loss": 0.6900804042816162, |
|
"eval_runtime": 4.0734, |
|
"eval_samples_per_second": 19.885, |
|
"eval_steps_per_second": 1.473, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.376971608832808e-05, |
|
"loss": 0.489, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.298107255520505e-05, |
|
"loss": 0.481, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.219242902208202e-05, |
|
"loss": 0.5719, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.140378548895899e-05, |
|
"loss": 0.578, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7160493827160493, |
|
"eval_loss": 0.6246393918991089, |
|
"eval_runtime": 4.3226, |
|
"eval_samples_per_second": 18.739, |
|
"eval_steps_per_second": 1.388, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.0615141955835965e-05, |
|
"loss": 0.6731, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 3.982649842271294e-05, |
|
"loss": 0.6082, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 3.903785488958991e-05, |
|
"loss": 0.628, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 3.824921135646688e-05, |
|
"loss": 0.6112, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 3.746056782334385e-05, |
|
"loss": 0.5931, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6172839506172839, |
|
"eval_loss": 0.7297191619873047, |
|
"eval_runtime": 4.3049, |
|
"eval_samples_per_second": 18.816, |
|
"eval_steps_per_second": 1.394, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 3.667192429022082e-05, |
|
"loss": 0.5872, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 3.5883280757097795e-05, |
|
"loss": 0.5647, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 3.5094637223974765e-05, |
|
"loss": 0.6125, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 3.4305993690851734e-05, |
|
"loss": 0.6125, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 3.351735015772871e-05, |
|
"loss": 0.567, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7037037037037037, |
|
"eval_loss": 0.7199741005897522, |
|
"eval_runtime": 4.5208, |
|
"eval_samples_per_second": 17.917, |
|
"eval_steps_per_second": 1.327, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 3.272870662460568e-05, |
|
"loss": 0.5294, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 3.194006309148265e-05, |
|
"loss": 0.6032, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.1151419558359626e-05, |
|
"loss": 0.5943, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 3.0362776025236596e-05, |
|
"loss": 0.5512, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7283950617283951, |
|
"eval_loss": 0.6853248476982117, |
|
"eval_runtime": 4.5083, |
|
"eval_samples_per_second": 17.967, |
|
"eval_steps_per_second": 1.331, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 2.9574132492113565e-05, |
|
"loss": 0.5808, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 2.8785488958990535e-05, |
|
"loss": 0.5226, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 2.7996845425867508e-05, |
|
"loss": 0.5454, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 2.7208201892744477e-05, |
|
"loss": 0.5752, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 2.6419558359621453e-05, |
|
"loss": 0.529, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7037037037037037, |
|
"eval_loss": 0.7038930058479309, |
|
"eval_runtime": 4.4325, |
|
"eval_samples_per_second": 18.274, |
|
"eval_steps_per_second": 1.354, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 2.5630914826498426e-05, |
|
"loss": 0.4872, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 2.4842271293375396e-05, |
|
"loss": 0.548, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 2.405362776025237e-05, |
|
"loss": 0.5507, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 2.3264984227129338e-05, |
|
"loss": 0.5179, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 2.2476340694006308e-05, |
|
"loss": 0.5467, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6790123456790124, |
|
"eval_loss": 0.8216166496276855, |
|
"eval_runtime": 3.9158, |
|
"eval_samples_per_second": 20.685, |
|
"eval_steps_per_second": 1.532, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 2.1687697160883284e-05, |
|
"loss": 0.5055, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 2.0899053627760254e-05, |
|
"loss": 0.5338, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.0110410094637226e-05, |
|
"loss": 0.4838, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 1.9321766561514196e-05, |
|
"loss": 0.5182, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.853312302839117e-05, |
|
"loss": 0.5517, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7407407407407407, |
|
"eval_loss": 0.7132633328437805, |
|
"eval_runtime": 4.247, |
|
"eval_samples_per_second": 19.072, |
|
"eval_steps_per_second": 1.413, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 1.7744479495268142e-05, |
|
"loss": 0.5663, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 1.695583596214511e-05, |
|
"loss": 0.527, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 10.64, |
|
"learning_rate": 1.616719242902208e-05, |
|
"loss": 0.4708, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 1.5378548895899054e-05, |
|
"loss": 0.4917, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7283950617283951, |
|
"eval_loss": 0.7398880124092102, |
|
"eval_runtime": 5.0919, |
|
"eval_samples_per_second": 15.908, |
|
"eval_steps_per_second": 1.178, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 11.06, |
|
"learning_rate": 1.4589905362776027e-05, |
|
"loss": 0.5304, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 1.3801261829652998e-05, |
|
"loss": 0.537, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 1.3012618296529969e-05, |
|
"loss": 0.4953, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 1.222397476340694e-05, |
|
"loss": 0.4726, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.91, |
|
"learning_rate": 1.1435331230283911e-05, |
|
"loss": 0.4638, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7283950617283951, |
|
"eval_loss": 0.7797142863273621, |
|
"eval_runtime": 4.4579, |
|
"eval_samples_per_second": 18.17, |
|
"eval_steps_per_second": 1.346, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 1.0646687697160884e-05, |
|
"loss": 0.4413, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 9.858044164037856e-06, |
|
"loss": 0.5107, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 9.069400630914827e-06, |
|
"loss": 0.4521, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 8.280757097791798e-06, |
|
"loss": 0.5601, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.98, |
|
"learning_rate": 7.492113564668771e-06, |
|
"loss": 0.5082, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7160493827160493, |
|
"eval_loss": 0.6203879714012146, |
|
"eval_runtime": 4.284, |
|
"eval_samples_per_second": 18.908, |
|
"eval_steps_per_second": 1.401, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 6.703470031545741e-06, |
|
"loss": 0.5038, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 13.4, |
|
"learning_rate": 5.914826498422713e-06, |
|
"loss": 0.4934, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 13.62, |
|
"learning_rate": 5.1261829652996846e-06, |
|
"loss": 0.4834, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 13.83, |
|
"learning_rate": 4.337539432176657e-06, |
|
"loss": 0.5358, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7777777777777778, |
|
"eval_loss": 0.626596212387085, |
|
"eval_runtime": 4.1062, |
|
"eval_samples_per_second": 19.726, |
|
"eval_steps_per_second": 1.461, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 3.5488958990536283e-06, |
|
"loss": 0.5051, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 14.26, |
|
"learning_rate": 2.7602523659305995e-06, |
|
"loss": 0.4712, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 14.47, |
|
"learning_rate": 1.971608832807571e-06, |
|
"loss": 0.4649, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 1.1829652996845425e-06, |
|
"loss": 0.4604, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 3.943217665615142e-07, |
|
"loss": 0.5267, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6790123456790124, |
|
"eval_loss": 0.7902321815490723, |
|
"eval_runtime": 4.7108, |
|
"eval_samples_per_second": 17.194, |
|
"eval_steps_per_second": 1.274, |
|
"step": 705 |
|
} |
|
], |
|
"max_steps": 705, |
|
"num_train_epochs": 15, |
|
"total_flos": 4.550617827979776e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|