Spaces:
Sleeping
Sleeping
{ | |
"best_metric": 0.9917032698877501, | |
"best_model_checkpoint": "./checkpoint/checkpoint-905", | |
"epoch": 4.997245179063361, | |
"global_step": 905, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.06, | |
"learning_rate": 3.6764705882352942e-06, | |
"loss": 1.0502, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.11, | |
"learning_rate": 7.3529411764705884e-06, | |
"loss": 1.0183, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.17, | |
"learning_rate": 1.1029411764705883e-05, | |
"loss": 0.9565, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 1.4705882352941177e-05, | |
"loss": 0.8426, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.28, | |
"learning_rate": 1.8382352941176472e-05, | |
"loss": 0.6867, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.33, | |
"learning_rate": 2.2058823529411766e-05, | |
"loss": 0.5298, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.39, | |
"learning_rate": 2.5735294117647057e-05, | |
"loss": 0.3856, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.44, | |
"learning_rate": 2.9411764705882354e-05, | |
"loss": 0.2925, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.5, | |
"learning_rate": 3.308823529411765e-05, | |
"loss": 0.234, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.55, | |
"learning_rate": 3.6764705882352945e-05, | |
"loss": 0.1777, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.61, | |
"learning_rate": 4.044117647058824e-05, | |
"loss": 0.1539, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.66, | |
"learning_rate": 4.411764705882353e-05, | |
"loss": 0.114, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.72, | |
"learning_rate": 4.7794117647058826e-05, | |
"loss": 0.123, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.77, | |
"learning_rate": 5.147058823529411e-05, | |
"loss": 0.1118, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.83, | |
"learning_rate": 5.514705882352942e-05, | |
"loss": 0.1032, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.88, | |
"learning_rate": 5.882352941176471e-05, | |
"loss": 0.1201, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.94, | |
"learning_rate": 6.25e-05, | |
"loss": 0.0914, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.99, | |
"learning_rate": 6.61764705882353e-05, | |
"loss": 0.0825, | |
"step": 180 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_accuracy": 0.9858467545143973, | |
"eval_loss": 0.07265167683362961, | |
"eval_runtime": 59.8085, | |
"eval_samples_per_second": 68.519, | |
"eval_steps_per_second": 2.157, | |
"step": 181 | |
}, | |
{ | |
"epoch": 1.05, | |
"learning_rate": 6.985294117647059e-05, | |
"loss": 0.0836, | |
"step": 190 | |
}, | |
{ | |
"epoch": 1.1, | |
"learning_rate": 7.352941176470589e-05, | |
"loss": 0.0691, | |
"step": 200 | |
}, | |
{ | |
"epoch": 1.16, | |
"learning_rate": 7.720588235294119e-05, | |
"loss": 0.0834, | |
"step": 210 | |
}, | |
{ | |
"epoch": 1.21, | |
"learning_rate": 8.088235294117648e-05, | |
"loss": 0.0876, | |
"step": 220 | |
}, | |
{ | |
"epoch": 1.27, | |
"learning_rate": 8.455882352941176e-05, | |
"loss": 0.077, | |
"step": 230 | |
}, | |
{ | |
"epoch": 1.33, | |
"learning_rate": 8.823529411764706e-05, | |
"loss": 0.0671, | |
"step": 240 | |
}, | |
{ | |
"epoch": 1.38, | |
"learning_rate": 9.191176470588235e-05, | |
"loss": 0.0623, | |
"step": 250 | |
}, | |
{ | |
"epoch": 1.44, | |
"learning_rate": 9.558823529411765e-05, | |
"loss": 0.0539, | |
"step": 260 | |
}, | |
{ | |
"epoch": 1.49, | |
"learning_rate": 9.926470588235295e-05, | |
"loss": 0.0645, | |
"step": 270 | |
}, | |
{ | |
"epoch": 1.55, | |
"learning_rate": 9.873617693522908e-05, | |
"loss": 0.061, | |
"step": 280 | |
}, | |
{ | |
"epoch": 1.6, | |
"learning_rate": 9.715639810426542e-05, | |
"loss": 0.0519, | |
"step": 290 | |
}, | |
{ | |
"epoch": 1.66, | |
"learning_rate": 9.557661927330174e-05, | |
"loss": 0.0697, | |
"step": 300 | |
}, | |
{ | |
"epoch": 1.71, | |
"learning_rate": 9.399684044233808e-05, | |
"loss": 0.062, | |
"step": 310 | |
}, | |
{ | |
"epoch": 1.77, | |
"learning_rate": 9.241706161137442e-05, | |
"loss": 0.0681, | |
"step": 320 | |
}, | |
{ | |
"epoch": 1.82, | |
"learning_rate": 9.083728278041075e-05, | |
"loss": 0.0698, | |
"step": 330 | |
}, | |
{ | |
"epoch": 1.88, | |
"learning_rate": 8.925750394944709e-05, | |
"loss": 0.067, | |
"step": 340 | |
}, | |
{ | |
"epoch": 1.93, | |
"learning_rate": 8.767772511848341e-05, | |
"loss": 0.0728, | |
"step": 350 | |
}, | |
{ | |
"epoch": 1.99, | |
"learning_rate": 8.609794628751975e-05, | |
"loss": 0.06, | |
"step": 360 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_accuracy": 0.9812103465104929, | |
"eval_loss": 0.06039927154779434, | |
"eval_runtime": 59.4929, | |
"eval_samples_per_second": 68.882, | |
"eval_steps_per_second": 2.168, | |
"step": 362 | |
}, | |
{ | |
"epoch": 2.04, | |
"learning_rate": 8.451816745655609e-05, | |
"loss": 0.065, | |
"step": 370 | |
}, | |
{ | |
"epoch": 2.1, | |
"learning_rate": 8.293838862559243e-05, | |
"loss": 0.0635, | |
"step": 380 | |
}, | |
{ | |
"epoch": 2.15, | |
"learning_rate": 8.135860979462876e-05, | |
"loss": 0.0532, | |
"step": 390 | |
}, | |
{ | |
"epoch": 2.21, | |
"learning_rate": 7.977883096366509e-05, | |
"loss": 0.0497, | |
"step": 400 | |
}, | |
{ | |
"epoch": 2.26, | |
"learning_rate": 7.819905213270142e-05, | |
"loss": 0.0665, | |
"step": 410 | |
}, | |
{ | |
"epoch": 2.32, | |
"learning_rate": 7.661927330173776e-05, | |
"loss": 0.0525, | |
"step": 420 | |
}, | |
{ | |
"epoch": 2.37, | |
"learning_rate": 7.50394944707741e-05, | |
"loss": 0.0514, | |
"step": 430 | |
}, | |
{ | |
"epoch": 2.43, | |
"learning_rate": 7.345971563981043e-05, | |
"loss": 0.0649, | |
"step": 440 | |
}, | |
{ | |
"epoch": 2.48, | |
"learning_rate": 7.187993680884676e-05, | |
"loss": 0.064, | |
"step": 450 | |
}, | |
{ | |
"epoch": 2.54, | |
"learning_rate": 7.03001579778831e-05, | |
"loss": 0.0488, | |
"step": 460 | |
}, | |
{ | |
"epoch": 2.6, | |
"learning_rate": 6.872037914691943e-05, | |
"loss": 0.0405, | |
"step": 470 | |
}, | |
{ | |
"epoch": 2.65, | |
"learning_rate": 6.714060031595577e-05, | |
"loss": 0.0357, | |
"step": 480 | |
}, | |
{ | |
"epoch": 2.71, | |
"learning_rate": 6.556082148499211e-05, | |
"loss": 0.0439, | |
"step": 490 | |
}, | |
{ | |
"epoch": 2.76, | |
"learning_rate": 6.398104265402843e-05, | |
"loss": 0.0469, | |
"step": 500 | |
}, | |
{ | |
"epoch": 2.82, | |
"learning_rate": 6.240126382306477e-05, | |
"loss": 0.0289, | |
"step": 510 | |
}, | |
{ | |
"epoch": 2.87, | |
"learning_rate": 6.0821484992101105e-05, | |
"loss": 0.0431, | |
"step": 520 | |
}, | |
{ | |
"epoch": 2.93, | |
"learning_rate": 5.924170616113744e-05, | |
"loss": 0.0284, | |
"step": 530 | |
}, | |
{ | |
"epoch": 2.98, | |
"learning_rate": 5.766192733017378e-05, | |
"loss": 0.0467, | |
"step": 540 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_accuracy": 0.9838945827232797, | |
"eval_loss": 0.04717087373137474, | |
"eval_runtime": 59.6298, | |
"eval_samples_per_second": 68.724, | |
"eval_steps_per_second": 2.163, | |
"step": 543 | |
}, | |
{ | |
"epoch": 3.04, | |
"learning_rate": 5.608214849921012e-05, | |
"loss": 0.0503, | |
"step": 550 | |
}, | |
{ | |
"epoch": 3.09, | |
"learning_rate": 5.450236966824645e-05, | |
"loss": 0.043, | |
"step": 560 | |
}, | |
{ | |
"epoch": 3.15, | |
"learning_rate": 5.2922590837282785e-05, | |
"loss": 0.0381, | |
"step": 570 | |
}, | |
{ | |
"epoch": 3.2, | |
"learning_rate": 5.134281200631912e-05, | |
"loss": 0.0295, | |
"step": 580 | |
}, | |
{ | |
"epoch": 3.26, | |
"learning_rate": 4.976303317535545e-05, | |
"loss": 0.0411, | |
"step": 590 | |
}, | |
{ | |
"epoch": 3.31, | |
"learning_rate": 4.818325434439179e-05, | |
"loss": 0.0242, | |
"step": 600 | |
}, | |
{ | |
"epoch": 3.37, | |
"learning_rate": 4.660347551342813e-05, | |
"loss": 0.0365, | |
"step": 610 | |
}, | |
{ | |
"epoch": 3.42, | |
"learning_rate": 4.502369668246446e-05, | |
"loss": 0.021, | |
"step": 620 | |
}, | |
{ | |
"epoch": 3.48, | |
"learning_rate": 4.3443917851500794e-05, | |
"loss": 0.0492, | |
"step": 630 | |
}, | |
{ | |
"epoch": 3.53, | |
"learning_rate": 4.1864139020537125e-05, | |
"loss": 0.0391, | |
"step": 640 | |
}, | |
{ | |
"epoch": 3.59, | |
"learning_rate": 4.028436018957346e-05, | |
"loss": 0.0291, | |
"step": 650 | |
}, | |
{ | |
"epoch": 3.64, | |
"learning_rate": 3.87045813586098e-05, | |
"loss": 0.0317, | |
"step": 660 | |
}, | |
{ | |
"epoch": 3.7, | |
"learning_rate": 3.712480252764613e-05, | |
"loss": 0.0355, | |
"step": 670 | |
}, | |
{ | |
"epoch": 3.75, | |
"learning_rate": 3.554502369668247e-05, | |
"loss": 0.0407, | |
"step": 680 | |
}, | |
{ | |
"epoch": 3.81, | |
"learning_rate": 3.39652448657188e-05, | |
"loss": 0.0257, | |
"step": 690 | |
}, | |
{ | |
"epoch": 3.87, | |
"learning_rate": 3.2385466034755135e-05, | |
"loss": 0.0382, | |
"step": 700 | |
}, | |
{ | |
"epoch": 3.92, | |
"learning_rate": 3.080568720379147e-05, | |
"loss": 0.0323, | |
"step": 710 | |
}, | |
{ | |
"epoch": 3.98, | |
"learning_rate": 2.9225908372827802e-05, | |
"loss": 0.0233, | |
"step": 720 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.9897510980966325, | |
"eval_loss": 0.028671853244304657, | |
"eval_runtime": 59.9206, | |
"eval_samples_per_second": 68.391, | |
"eval_steps_per_second": 2.153, | |
"step": 724 | |
}, | |
{ | |
"epoch": 4.03, | |
"learning_rate": 2.764612954186414e-05, | |
"loss": 0.031, | |
"step": 730 | |
}, | |
{ | |
"epoch": 4.09, | |
"learning_rate": 2.6066350710900477e-05, | |
"loss": 0.0272, | |
"step": 740 | |
}, | |
{ | |
"epoch": 4.14, | |
"learning_rate": 2.448657187993681e-05, | |
"loss": 0.0283, | |
"step": 750 | |
}, | |
{ | |
"epoch": 4.2, | |
"learning_rate": 2.2906793048973144e-05, | |
"loss": 0.0208, | |
"step": 760 | |
}, | |
{ | |
"epoch": 4.25, | |
"learning_rate": 2.132701421800948e-05, | |
"loss": 0.0439, | |
"step": 770 | |
}, | |
{ | |
"epoch": 4.31, | |
"learning_rate": 1.9747235387045815e-05, | |
"loss": 0.0339, | |
"step": 780 | |
}, | |
{ | |
"epoch": 4.36, | |
"learning_rate": 1.816745655608215e-05, | |
"loss": 0.0298, | |
"step": 790 | |
}, | |
{ | |
"epoch": 4.42, | |
"learning_rate": 1.6587677725118483e-05, | |
"loss": 0.0279, | |
"step": 800 | |
}, | |
{ | |
"epoch": 4.47, | |
"learning_rate": 1.500789889415482e-05, | |
"loss": 0.0227, | |
"step": 810 | |
}, | |
{ | |
"epoch": 4.53, | |
"learning_rate": 1.3428120063191154e-05, | |
"loss": 0.0215, | |
"step": 820 | |
}, | |
{ | |
"epoch": 4.58, | |
"learning_rate": 1.184834123222749e-05, | |
"loss": 0.0276, | |
"step": 830 | |
}, | |
{ | |
"epoch": 4.64, | |
"learning_rate": 1.0268562401263823e-05, | |
"loss": 0.0356, | |
"step": 840 | |
}, | |
{ | |
"epoch": 4.69, | |
"learning_rate": 8.688783570300159e-06, | |
"loss": 0.0168, | |
"step": 850 | |
}, | |
{ | |
"epoch": 4.75, | |
"learning_rate": 7.109004739336493e-06, | |
"loss": 0.0316, | |
"step": 860 | |
}, | |
{ | |
"epoch": 4.8, | |
"learning_rate": 5.529225908372828e-06, | |
"loss": 0.0237, | |
"step": 870 | |
}, | |
{ | |
"epoch": 4.86, | |
"learning_rate": 3.949447077409163e-06, | |
"loss": 0.0207, | |
"step": 880 | |
}, | |
{ | |
"epoch": 4.91, | |
"learning_rate": 2.3696682464454976e-06, | |
"loss": 0.0305, | |
"step": 890 | |
}, | |
{ | |
"epoch": 4.97, | |
"learning_rate": 7.898894154818326e-07, | |
"loss": 0.0327, | |
"step": 900 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_accuracy": 0.9917032698877501, | |
"eval_loss": 0.025840837508440018, | |
"eval_runtime": 59.7361, | |
"eval_samples_per_second": 68.602, | |
"eval_steps_per_second": 2.159, | |
"step": 905 | |
} | |
], | |
"max_steps": 905, | |
"num_train_epochs": 5, | |
"total_flos": 8.992561039593578e+18, | |
"trial_name": null, | |
"trial_params": null | |
} | |