|
{ |
|
"best_metric": 0.23302823305130005, |
|
"best_model_checkpoint": "./results_train/roberta-base/qnli/checkpoint-7000", |
|
"epoch": 10.0, |
|
"global_step": 65470, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.545176889793841e-06, |
|
"loss": 0.6919, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.7501372872048325, |
|
"eval_loss": 0.6161059141159058, |
|
"eval_runtime": 15.4359, |
|
"eval_samples_per_second": 353.916, |
|
"eval_steps_per_second": 44.248, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5.090353779587682e-06, |
|
"loss": 0.4801, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.8550247116968699, |
|
"eval_loss": 0.3524323105812073, |
|
"eval_runtime": 15.4744, |
|
"eval_samples_per_second": 353.035, |
|
"eval_steps_per_second": 44.137, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 7.635530669381522e-06, |
|
"loss": 0.4049, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.8742449203734212, |
|
"eval_loss": 0.301090806722641, |
|
"eval_runtime": 15.4636, |
|
"eval_samples_per_second": 353.282, |
|
"eval_steps_per_second": 44.168, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.0180707559175364e-05, |
|
"loss": 0.3827, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.876807614863628, |
|
"eval_loss": 0.3125079274177551, |
|
"eval_runtime": 15.5809, |
|
"eval_samples_per_second": 350.621, |
|
"eval_steps_per_second": 43.836, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2725884448969203e-05, |
|
"loss": 0.3445, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.8923668314113125, |
|
"eval_loss": 0.29156529903411865, |
|
"eval_runtime": 15.4606, |
|
"eval_samples_per_second": 353.351, |
|
"eval_steps_per_second": 44.177, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.5271061338763045e-05, |
|
"loss": 0.3567, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.8991396668497162, |
|
"eval_loss": 0.2662096619606018, |
|
"eval_runtime": 15.4956, |
|
"eval_samples_per_second": 352.551, |
|
"eval_steps_per_second": 44.077, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.7816238228556887e-05, |
|
"loss": 0.3422, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.8980413692110562, |
|
"eval_loss": 0.2656656801700592, |
|
"eval_runtime": 15.5011, |
|
"eval_samples_per_second": 352.426, |
|
"eval_steps_per_second": 44.061, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.997692595180449e-05, |
|
"loss": 0.3257, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.9020684605528098, |
|
"eval_loss": 0.2830497622489929, |
|
"eval_runtime": 15.5605, |
|
"eval_samples_per_second": 351.081, |
|
"eval_steps_per_second": 43.893, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9814432654652997e-05, |
|
"loss": 0.3506, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.9062786015010068, |
|
"eval_loss": 0.24342051148414612, |
|
"eval_runtime": 15.5612, |
|
"eval_samples_per_second": 351.065, |
|
"eval_steps_per_second": 43.891, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.9651939357501505e-05, |
|
"loss": 0.317, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.9051803038623467, |
|
"eval_loss": 0.24402067065238953, |
|
"eval_runtime": 15.5216, |
|
"eval_samples_per_second": 351.96, |
|
"eval_steps_per_second": 44.003, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.9489446060350013e-05, |
|
"loss": 0.3152, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.9015193117334798, |
|
"eval_loss": 0.2785772681236267, |
|
"eval_runtime": 15.4885, |
|
"eval_samples_per_second": 352.712, |
|
"eval_steps_per_second": 44.097, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.932695276319852e-05, |
|
"loss": 0.2966, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.9082921471718836, |
|
"eval_loss": 0.2599336504936218, |
|
"eval_runtime": 15.5494, |
|
"eval_samples_per_second": 351.332, |
|
"eval_steps_per_second": 43.924, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.916445946604703e-05, |
|
"loss": 0.298, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.9070107999267801, |
|
"eval_loss": 0.2616613209247589, |
|
"eval_runtime": 16.1579, |
|
"eval_samples_per_second": 338.101, |
|
"eval_steps_per_second": 42.27, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.9001966168895533e-05, |
|
"loss": 0.2634, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.9154310818231741, |
|
"eval_loss": 0.23302823305130005, |
|
"eval_runtime": 15.5105, |
|
"eval_samples_per_second": 352.213, |
|
"eval_steps_per_second": 44.035, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.883947287174404e-05, |
|
"loss": 0.2625, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.9108548416620904, |
|
"eval_loss": 0.259826123714447, |
|
"eval_runtime": 16.1363, |
|
"eval_samples_per_second": 338.553, |
|
"eval_steps_per_second": 42.327, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.867697957459255e-05, |
|
"loss": 0.2596, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.9099395936298736, |
|
"eval_loss": 0.36161935329437256, |
|
"eval_runtime": 15.5589, |
|
"eval_samples_per_second": 351.116, |
|
"eval_steps_per_second": 43.898, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.8514486277441056e-05, |
|
"loss": 0.2457, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.909573494416987, |
|
"eval_loss": 0.2799856662750244, |
|
"eval_runtime": 15.5157, |
|
"eval_samples_per_second": 352.095, |
|
"eval_steps_per_second": 44.02, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.8351992980289564e-05, |
|
"loss": 0.2545, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_accuracy": 0.9081090975654402, |
|
"eval_loss": 0.29598304629325867, |
|
"eval_runtime": 15.4941, |
|
"eval_samples_per_second": 352.587, |
|
"eval_steps_per_second": 44.081, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.8189499683138072e-05, |
|
"loss": 0.2535, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_accuracy": 0.9114039904814205, |
|
"eval_loss": 0.23894734680652618, |
|
"eval_runtime": 16.236, |
|
"eval_samples_per_second": 336.474, |
|
"eval_steps_per_second": 42.067, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.802700638598658e-05, |
|
"loss": 0.2639, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.8912685337726524, |
|
"eval_loss": 0.33427268266677856, |
|
"eval_runtime": 15.4977, |
|
"eval_samples_per_second": 352.503, |
|
"eval_steps_per_second": 44.071, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.7864513088835088e-05, |
|
"loss": 0.2434, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.9115870400878638, |
|
"eval_loss": 0.24697424471378326, |
|
"eval_runtime": 15.5007, |
|
"eval_samples_per_second": 352.436, |
|
"eval_steps_per_second": 44.063, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 1.7702019791683592e-05, |
|
"loss": 0.2613, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_accuracy": 0.9092073952041003, |
|
"eval_loss": 0.294897198677063, |
|
"eval_runtime": 15.4616, |
|
"eval_samples_per_second": 353.327, |
|
"eval_steps_per_second": 44.174, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.75395264945321e-05, |
|
"loss": 0.2456, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_accuracy": 0.9163463298553908, |
|
"eval_loss": 0.2556602358818054, |
|
"eval_runtime": 15.4884, |
|
"eval_samples_per_second": 352.715, |
|
"eval_steps_per_second": 44.097, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.737703319738061e-05, |
|
"loss": 0.2483, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.9141497345780707, |
|
"eval_loss": 0.24623550474643707, |
|
"eval_runtime": 15.544, |
|
"eval_samples_per_second": 351.453, |
|
"eval_steps_per_second": 43.94, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.7214539900229116e-05, |
|
"loss": 0.2524, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_accuracy": 0.9114039904814205, |
|
"eval_loss": 0.2453071027994156, |
|
"eval_runtime": 15.5138, |
|
"eval_samples_per_second": 352.139, |
|
"eval_steps_per_second": 44.025, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.7052046603077624e-05, |
|
"loss": 0.2467, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.9161632802489474, |
|
"eval_loss": 0.2610779404640198, |
|
"eval_runtime": 15.5605, |
|
"eval_samples_per_second": 351.082, |
|
"eval_steps_per_second": 43.893, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.6889553305926132e-05, |
|
"loss": 0.2059, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_accuracy": 0.9157971810360608, |
|
"eval_loss": 0.3070930540561676, |
|
"eval_runtime": 15.4895, |
|
"eval_samples_per_second": 352.691, |
|
"eval_steps_per_second": 44.094, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.672706000877464e-05, |
|
"loss": 0.1968, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.9209225700164745, |
|
"eval_loss": 0.3205060660839081, |
|
"eval_runtime": 15.5462, |
|
"eval_samples_per_second": 351.404, |
|
"eval_steps_per_second": 43.933, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.6564566711623148e-05, |
|
"loss": 0.1944, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.9145158337909574, |
|
"eval_loss": 0.342997282743454, |
|
"eval_runtime": 15.462, |
|
"eval_samples_per_second": 353.317, |
|
"eval_steps_per_second": 44.173, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.6402073414471655e-05, |
|
"loss": 0.2065, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_accuracy": 0.9146988833974007, |
|
"eval_loss": 0.33879798650741577, |
|
"eval_runtime": 15.4872, |
|
"eval_samples_per_second": 352.743, |
|
"eval_steps_per_second": 44.101, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.6239580117320163e-05, |
|
"loss": 0.1992, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_accuracy": 0.9157971810360608, |
|
"eval_loss": 0.2569463551044464, |
|
"eval_runtime": 15.4551, |
|
"eval_samples_per_second": 353.476, |
|
"eval_steps_per_second": 44.193, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.607708682016867e-05, |
|
"loss": 0.1994, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_accuracy": 0.9108548416620904, |
|
"eval_loss": 0.33493661880493164, |
|
"eval_runtime": 15.5323, |
|
"eval_samples_per_second": 351.718, |
|
"eval_steps_per_second": 43.973, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.5914593523017176e-05, |
|
"loss": 0.2001, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_accuracy": 0.909573494416987, |
|
"eval_loss": 0.2850426137447357, |
|
"eval_runtime": 15.4842, |
|
"eval_samples_per_second": 352.811, |
|
"eval_steps_per_second": 44.109, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.5752100225865684e-05, |
|
"loss": 0.2014, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_accuracy": 0.9200073219842577, |
|
"eval_loss": 0.3214400112628937, |
|
"eval_runtime": 15.5272, |
|
"eval_samples_per_second": 351.833, |
|
"eval_steps_per_second": 43.987, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.558960692871419e-05, |
|
"loss": 0.2156, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_accuracy": 0.9134175361522973, |
|
"eval_loss": 0.3078743815422058, |
|
"eval_runtime": 15.5189, |
|
"eval_samples_per_second": 352.023, |
|
"eval_steps_per_second": 44.011, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.54271136315627e-05, |
|
"loss": 0.2036, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_accuracy": 0.9163463298553908, |
|
"eval_loss": 0.2739076316356659, |
|
"eval_runtime": 15.5712, |
|
"eval_samples_per_second": 350.84, |
|
"eval_steps_per_second": 43.863, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.5264620334411207e-05, |
|
"loss": 0.2118, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_accuracy": 0.918542925132711, |
|
"eval_loss": 0.2790161669254303, |
|
"eval_runtime": 15.4896, |
|
"eval_samples_per_second": 352.687, |
|
"eval_steps_per_second": 44.094, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.5102127037259715e-05, |
|
"loss": 0.2167, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_accuracy": 0.9167124290682775, |
|
"eval_loss": 0.2698538899421692, |
|
"eval_runtime": 15.51, |
|
"eval_samples_per_second": 352.224, |
|
"eval_steps_per_second": 44.036, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.4939633740108221e-05, |
|
"loss": 0.2015, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_accuracy": 0.9189090243455976, |
|
"eval_loss": 0.28947147727012634, |
|
"eval_runtime": 15.4881, |
|
"eval_samples_per_second": 352.721, |
|
"eval_steps_per_second": 44.098, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.477714044295673e-05, |
|
"loss": 0.1649, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_accuracy": 0.9161632802489474, |
|
"eval_loss": 0.37189313769340515, |
|
"eval_runtime": 15.48, |
|
"eval_samples_per_second": 352.907, |
|
"eval_steps_per_second": 44.121, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.4614647145805237e-05, |
|
"loss": 0.1505, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_accuracy": 0.9132344865458539, |
|
"eval_loss": 0.3699614107608795, |
|
"eval_runtime": 15.4613, |
|
"eval_samples_per_second": 353.334, |
|
"eval_steps_per_second": 44.175, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.4452153848653745e-05, |
|
"loss": 0.1509, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.9156141314296175, |
|
"eval_loss": 0.3720746636390686, |
|
"eval_runtime": 15.4811, |
|
"eval_samples_per_second": 352.881, |
|
"eval_steps_per_second": 44.118, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.4289660551502251e-05, |
|
"loss": 0.1517, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_accuracy": 0.9154310818231741, |
|
"eval_loss": 0.35659804940223694, |
|
"eval_runtime": 15.4964, |
|
"eval_samples_per_second": 352.533, |
|
"eval_steps_per_second": 44.075, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.412716725435076e-05, |
|
"loss": 0.1583, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"eval_accuracy": 0.9139666849716274, |
|
"eval_loss": 0.3975317180156708, |
|
"eval_runtime": 15.5028, |
|
"eval_samples_per_second": 352.388, |
|
"eval_steps_per_second": 44.057, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.3964673957199267e-05, |
|
"loss": 0.1568, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_accuracy": 0.9136005857587406, |
|
"eval_loss": 0.4135216772556305, |
|
"eval_runtime": 15.5258, |
|
"eval_samples_per_second": 351.866, |
|
"eval_steps_per_second": 43.991, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.3802180660047775e-05, |
|
"loss": 0.1642, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_accuracy": 0.9128683873329673, |
|
"eval_loss": 0.37052008509635925, |
|
"eval_runtime": 15.557, |
|
"eval_samples_per_second": 351.161, |
|
"eval_steps_per_second": 43.903, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 1.3639687362896281e-05, |
|
"loss": 0.1781, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"eval_accuracy": 0.9156141314296175, |
|
"eval_loss": 0.33989137411117554, |
|
"eval_runtime": 15.5332, |
|
"eval_samples_per_second": 351.699, |
|
"eval_steps_per_second": 43.97, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.347719406574479e-05, |
|
"loss": 0.1725, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_accuracy": 0.9159802306425041, |
|
"eval_loss": 0.3164858818054199, |
|
"eval_runtime": 15.5466, |
|
"eval_samples_per_second": 351.394, |
|
"eval_steps_per_second": 43.932, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.3314700768593297e-05, |
|
"loss": 0.1675, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_accuracy": 0.917993776313381, |
|
"eval_loss": 0.3278675973415375, |
|
"eval_runtime": 15.4883, |
|
"eval_samples_per_second": 352.717, |
|
"eval_steps_per_second": 44.098, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.3152207471441804e-05, |
|
"loss": 0.165, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"eval_accuracy": 0.920190371590701, |
|
"eval_loss": 0.34237906336784363, |
|
"eval_runtime": 15.4787, |
|
"eval_samples_per_second": 352.937, |
|
"eval_steps_per_second": 44.125, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.298971417429031e-05, |
|
"loss": 0.1608, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.9137836353651839, |
|
"eval_loss": 0.4021676778793335, |
|
"eval_runtime": 15.432, |
|
"eval_samples_per_second": 354.004, |
|
"eval_steps_per_second": 44.259, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.282722087713882e-05, |
|
"loss": 0.1576, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_accuracy": 0.9146988833974007, |
|
"eval_loss": 0.3611091375350952, |
|
"eval_runtime": 15.4994, |
|
"eval_samples_per_second": 352.465, |
|
"eval_steps_per_second": 44.066, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 1.2664727579987326e-05, |
|
"loss": 0.1382, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_accuracy": 0.9139666849716274, |
|
"eval_loss": 0.400115966796875, |
|
"eval_runtime": 15.4319, |
|
"eval_samples_per_second": 354.008, |
|
"eval_steps_per_second": 44.259, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 1.2502234282835834e-05, |
|
"loss": 0.1126, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_accuracy": 0.9168954786747209, |
|
"eval_loss": 0.40148910880088806, |
|
"eval_runtime": 15.5197, |
|
"eval_samples_per_second": 352.005, |
|
"eval_steps_per_second": 44.009, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 1.233974098568434e-05, |
|
"loss": 0.1048, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.9168954786747209, |
|
"eval_loss": 0.391897052526474, |
|
"eval_runtime": 15.4767, |
|
"eval_samples_per_second": 352.982, |
|
"eval_steps_per_second": 44.131, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.217724768853285e-05, |
|
"loss": 0.1057, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 0.9176276771004942, |
|
"eval_loss": 0.4071587324142456, |
|
"eval_runtime": 15.5017, |
|
"eval_samples_per_second": 352.414, |
|
"eval_steps_per_second": 44.06, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 1.2014754391381356e-05, |
|
"loss": 0.1212, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_accuracy": 0.9161632802489474, |
|
"eval_loss": 0.36226457357406616, |
|
"eval_runtime": 15.4788, |
|
"eval_samples_per_second": 352.935, |
|
"eval_steps_per_second": 44.125, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.1852261094229864e-05, |
|
"loss": 0.1152, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_accuracy": 0.914881933003844, |
|
"eval_loss": 0.39458510279655457, |
|
"eval_runtime": 15.5093, |
|
"eval_samples_per_second": 352.239, |
|
"eval_steps_per_second": 44.038, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.168976779707837e-05, |
|
"loss": 0.125, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_accuracy": 0.9156141314296175, |
|
"eval_loss": 0.4142376780509949, |
|
"eval_runtime": 15.4899, |
|
"eval_samples_per_second": 352.682, |
|
"eval_steps_per_second": 44.093, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.152727449992688e-05, |
|
"loss": 0.1195, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_accuracy": 0.9150649826102873, |
|
"eval_loss": 0.40947625041007996, |
|
"eval_runtime": 15.5825, |
|
"eval_samples_per_second": 350.585, |
|
"eval_steps_per_second": 43.831, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.1364781202775386e-05, |
|
"loss": 0.1139, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_accuracy": 0.9088412959912137, |
|
"eval_loss": 0.4585929811000824, |
|
"eval_runtime": 15.5177, |
|
"eval_samples_per_second": 352.05, |
|
"eval_steps_per_second": 44.014, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 1.1202287905623894e-05, |
|
"loss": 0.1279, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"eval_accuracy": 0.9203734211971444, |
|
"eval_loss": 0.39000362157821655, |
|
"eval_runtime": 15.5995, |
|
"eval_samples_per_second": 350.204, |
|
"eval_steps_per_second": 43.784, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.10397946084724e-05, |
|
"loss": 0.1306, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_accuracy": 0.9165293794618341, |
|
"eval_loss": 0.37414777278900146, |
|
"eval_runtime": 15.5095, |
|
"eval_samples_per_second": 352.236, |
|
"eval_steps_per_second": 44.038, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 1.087730131132091e-05, |
|
"loss": 0.1091, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"eval_accuracy": 0.9207395204100312, |
|
"eval_loss": 0.4295918941497803, |
|
"eval_runtime": 15.4949, |
|
"eval_samples_per_second": 352.567, |
|
"eval_steps_per_second": 44.079, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 1.0714808014169416e-05, |
|
"loss": 0.1272, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"eval_accuracy": 0.9189090243455976, |
|
"eval_loss": 0.37242451310157776, |
|
"eval_runtime": 15.5005, |
|
"eval_samples_per_second": 352.44, |
|
"eval_steps_per_second": 44.063, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 1.0552314717017924e-05, |
|
"loss": 0.0906, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"eval_accuracy": 0.9181768259198243, |
|
"eval_loss": 0.4511684477329254, |
|
"eval_runtime": 15.5205, |
|
"eval_samples_per_second": 351.985, |
|
"eval_steps_per_second": 44.006, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 1.038982141986643e-05, |
|
"loss": 0.0915, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_accuracy": 0.9220208676551346, |
|
"eval_loss": 0.4160422086715698, |
|
"eval_runtime": 15.4765, |
|
"eval_samples_per_second": 352.987, |
|
"eval_steps_per_second": 44.131, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.022732812271494e-05, |
|
"loss": 0.0773, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_accuracy": 0.917993776313381, |
|
"eval_loss": 0.47428572177886963, |
|
"eval_runtime": 15.4784, |
|
"eval_samples_per_second": 352.944, |
|
"eval_steps_per_second": 44.126, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 1.0064834825563446e-05, |
|
"loss": 0.0861, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"eval_accuracy": 0.9203734211971444, |
|
"eval_loss": 0.5023528337478638, |
|
"eval_runtime": 15.4707, |
|
"eval_samples_per_second": 353.119, |
|
"eval_steps_per_second": 44.148, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 9.902341528411954e-06, |
|
"loss": 0.0729, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_accuracy": 0.9203734211971444, |
|
"eval_loss": 0.42824575304985046, |
|
"eval_runtime": 15.5357, |
|
"eval_samples_per_second": 351.643, |
|
"eval_steps_per_second": 43.963, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 9.739848231260461e-06, |
|
"loss": 0.0901, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_accuracy": 0.9225700164744646, |
|
"eval_loss": 0.46121296286582947, |
|
"eval_runtime": 15.4855, |
|
"eval_samples_per_second": 352.782, |
|
"eval_steps_per_second": 44.106, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 9.57735493410897e-06, |
|
"loss": 0.0856, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_accuracy": 0.917993776313381, |
|
"eval_loss": 0.44952550530433655, |
|
"eval_runtime": 15.5112, |
|
"eval_samples_per_second": 352.196, |
|
"eval_steps_per_second": 44.033, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 9.414861636957477e-06, |
|
"loss": 0.0839, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"eval_accuracy": 0.9205564708035878, |
|
"eval_loss": 0.45009082555770874, |
|
"eval_runtime": 15.4775, |
|
"eval_samples_per_second": 352.963, |
|
"eval_steps_per_second": 44.128, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 9.252368339805983e-06, |
|
"loss": 0.0874, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_accuracy": 0.9200073219842577, |
|
"eval_loss": 0.41364917159080505, |
|
"eval_runtime": 15.5201, |
|
"eval_samples_per_second": 351.994, |
|
"eval_steps_per_second": 44.007, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 9.089875042654491e-06, |
|
"loss": 0.0944, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_accuracy": 0.9165293794618341, |
|
"eval_loss": 0.46293067932128906, |
|
"eval_runtime": 15.474, |
|
"eval_samples_per_second": 353.043, |
|
"eval_steps_per_second": 44.138, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 8.927381745502999e-06, |
|
"loss": 0.0874, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_accuracy": 0.9159802306425041, |
|
"eval_loss": 0.4790218770503998, |
|
"eval_runtime": 15.4804, |
|
"eval_samples_per_second": 352.897, |
|
"eval_steps_per_second": 44.12, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 8.764888448351507e-06, |
|
"loss": 0.0859, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"eval_accuracy": 0.9132344865458539, |
|
"eval_loss": 0.4725111722946167, |
|
"eval_runtime": 15.4939, |
|
"eval_samples_per_second": 352.59, |
|
"eval_steps_per_second": 44.082, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 8.602395151200013e-06, |
|
"loss": 0.0808, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_accuracy": 0.9161632802489474, |
|
"eval_loss": 0.4613119959831238, |
|
"eval_runtime": 15.5324, |
|
"eval_samples_per_second": 351.717, |
|
"eval_steps_per_second": 43.973, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 8.439901854048521e-06, |
|
"loss": 0.0723, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_accuracy": 0.9194581731649277, |
|
"eval_loss": 0.4815793037414551, |
|
"eval_runtime": 15.4962, |
|
"eval_samples_per_second": 352.537, |
|
"eval_steps_per_second": 44.075, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 8.277408556897029e-06, |
|
"loss": 0.0568, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_accuracy": 0.9187259747391543, |
|
"eval_loss": 0.5257057547569275, |
|
"eval_runtime": 15.5592, |
|
"eval_samples_per_second": 351.112, |
|
"eval_steps_per_second": 43.897, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 8.114915259745537e-06, |
|
"loss": 0.0628, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"eval_accuracy": 0.9194581731649277, |
|
"eval_loss": 0.4516303837299347, |
|
"eval_runtime": 15.5547, |
|
"eval_samples_per_second": 351.213, |
|
"eval_steps_per_second": 43.91, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 7.952421962594043e-06, |
|
"loss": 0.053, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_accuracy": 0.9187259747391543, |
|
"eval_loss": 0.4928816258907318, |
|
"eval_runtime": 15.5194, |
|
"eval_samples_per_second": 352.012, |
|
"eval_steps_per_second": 44.01, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 7.789928665442551e-06, |
|
"loss": 0.0574, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"eval_accuracy": 0.919092073952041, |
|
"eval_loss": 0.48882895708084106, |
|
"eval_runtime": 15.4527, |
|
"eval_samples_per_second": 353.531, |
|
"eval_steps_per_second": 44.2, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 7.627435368291059e-06, |
|
"loss": 0.0717, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_accuracy": 0.9165293794618341, |
|
"eval_loss": 0.4769105911254883, |
|
"eval_runtime": 15.4715, |
|
"eval_samples_per_second": 353.101, |
|
"eval_steps_per_second": 44.146, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 7.464942071139566e-06, |
|
"loss": 0.0622, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"eval_accuracy": 0.9183598755262676, |
|
"eval_loss": 0.5081596374511719, |
|
"eval_runtime": 15.4674, |
|
"eval_samples_per_second": 353.193, |
|
"eval_steps_per_second": 44.157, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 7.302448773988074e-06, |
|
"loss": 0.0593, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_accuracy": 0.9211056196229178, |
|
"eval_loss": 0.44598302245140076, |
|
"eval_runtime": 15.4988, |
|
"eval_samples_per_second": 352.48, |
|
"eval_steps_per_second": 44.068, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 7.139955476836581e-06, |
|
"loss": 0.0603, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_accuracy": 0.9205564708035878, |
|
"eval_loss": 0.43451356887817383, |
|
"eval_runtime": 15.4424, |
|
"eval_samples_per_second": 353.766, |
|
"eval_steps_per_second": 44.229, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 6.9774621796850885e-06, |
|
"loss": 0.0659, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"eval_accuracy": 0.9189090243455976, |
|
"eval_loss": 0.4423438310623169, |
|
"eval_runtime": 15.4892, |
|
"eval_samples_per_second": 352.697, |
|
"eval_steps_per_second": 44.095, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 6.8149688825335956e-06, |
|
"loss": 0.0629, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_accuracy": 0.919092073952041, |
|
"eval_loss": 0.47710955142974854, |
|
"eval_runtime": 15.4296, |
|
"eval_samples_per_second": 354.061, |
|
"eval_steps_per_second": 44.266, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 6.6524755853821034e-06, |
|
"loss": 0.058, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_accuracy": 0.9227530660809079, |
|
"eval_loss": 0.4589206576347351, |
|
"eval_runtime": 15.5247, |
|
"eval_samples_per_second": 351.892, |
|
"eval_steps_per_second": 43.995, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 6.4899822882306105e-06, |
|
"loss": 0.0545, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_accuracy": 0.9200073219842577, |
|
"eval_loss": 0.508368730545044, |
|
"eval_runtime": 15.453, |
|
"eval_samples_per_second": 353.524, |
|
"eval_steps_per_second": 44.199, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 6.327488991079118e-06, |
|
"loss": 0.0465, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"eval_accuracy": 0.9192751235584844, |
|
"eval_loss": 0.5422417521476746, |
|
"eval_runtime": 15.5091, |
|
"eval_samples_per_second": 352.244, |
|
"eval_steps_per_second": 44.039, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 6.164995693927625e-06, |
|
"loss": 0.0424, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_accuracy": 0.920190371590701, |
|
"eval_loss": 0.5029627680778503, |
|
"eval_runtime": 15.5152, |
|
"eval_samples_per_second": 352.107, |
|
"eval_steps_per_second": 44.021, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 6.002502396776133e-06, |
|
"loss": 0.0317, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"eval_accuracy": 0.9212886692293611, |
|
"eval_loss": 0.5393094420433044, |
|
"eval_runtime": 15.5001, |
|
"eval_samples_per_second": 352.449, |
|
"eval_steps_per_second": 44.064, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 5.84000909962464e-06, |
|
"loss": 0.029, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"eval_accuracy": 0.9174446274940509, |
|
"eval_loss": 0.5618457794189453, |
|
"eval_runtime": 15.5012, |
|
"eval_samples_per_second": 352.424, |
|
"eval_steps_per_second": 44.061, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 5.677515802473148e-06, |
|
"loss": 0.0439, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"eval_accuracy": 0.9194581731649277, |
|
"eval_loss": 0.5000433325767517, |
|
"eval_runtime": 15.5179, |
|
"eval_samples_per_second": 352.046, |
|
"eval_steps_per_second": 44.014, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 5.515022505321655e-06, |
|
"loss": 0.0347, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_accuracy": 0.9200073219842577, |
|
"eval_loss": 0.5093263387680054, |
|
"eval_runtime": 15.4926, |
|
"eval_samples_per_second": 352.62, |
|
"eval_steps_per_second": 44.086, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 5.352529208170163e-06, |
|
"loss": 0.0425, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_accuracy": 0.9174446274940509, |
|
"eval_loss": 0.5311388373374939, |
|
"eval_runtime": 15.4852, |
|
"eval_samples_per_second": 352.789, |
|
"eval_steps_per_second": 44.107, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 5.19003591101867e-06, |
|
"loss": 0.0384, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_accuracy": 0.9198242723778144, |
|
"eval_loss": 0.501017689704895, |
|
"eval_runtime": 15.5002, |
|
"eval_samples_per_second": 352.448, |
|
"eval_steps_per_second": 44.064, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 5.027542613867178e-06, |
|
"loss": 0.039, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"eval_accuracy": 0.9209225700164745, |
|
"eval_loss": 0.5181994438171387, |
|
"eval_runtime": 15.4866, |
|
"eval_samples_per_second": 352.755, |
|
"eval_steps_per_second": 44.103, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 4.865049316715686e-06, |
|
"loss": 0.04, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_accuracy": 0.9214717188358045, |
|
"eval_loss": 0.5238317847251892, |
|
"eval_runtime": 15.4962, |
|
"eval_samples_per_second": 352.538, |
|
"eval_steps_per_second": 44.075, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 4.702556019564194e-06, |
|
"loss": 0.0374, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"eval_accuracy": 0.9218378180486912, |
|
"eval_loss": 0.5560519695281982, |
|
"eval_runtime": 15.4921, |
|
"eval_samples_per_second": 352.63, |
|
"eval_steps_per_second": 44.087, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 4.540062722412701e-06, |
|
"loss": 0.0366, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"eval_accuracy": 0.9200073219842577, |
|
"eval_loss": 0.5412092208862305, |
|
"eval_runtime": 15.4629, |
|
"eval_samples_per_second": 353.297, |
|
"eval_steps_per_second": 44.17, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 4.377569425261209e-06, |
|
"loss": 0.036, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"eval_accuracy": 0.9212886692293611, |
|
"eval_loss": 0.5213342905044556, |
|
"eval_runtime": 15.4852, |
|
"eval_samples_per_second": 352.789, |
|
"eval_steps_per_second": 44.107, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 4.215076128109716e-06, |
|
"loss": 0.0348, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_accuracy": 0.9216547684422478, |
|
"eval_loss": 0.51399165391922, |
|
"eval_runtime": 15.5008, |
|
"eval_samples_per_second": 352.434, |
|
"eval_steps_per_second": 44.062, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 4.0525828309582235e-06, |
|
"loss": 0.0186, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_accuracy": 0.9240344133260113, |
|
"eval_loss": 0.5692733526229858, |
|
"eval_runtime": 15.5019, |
|
"eval_samples_per_second": 352.408, |
|
"eval_steps_per_second": 44.059, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 3.8900895338067306e-06, |
|
"loss": 0.0275, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"eval_accuracy": 0.923851363719568, |
|
"eval_loss": 0.5007224082946777, |
|
"eval_runtime": 15.4606, |
|
"eval_samples_per_second": 353.349, |
|
"eval_steps_per_second": 44.177, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 3.727596236655238e-06, |
|
"loss": 0.0219, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_accuracy": 0.9240344133260113, |
|
"eval_loss": 0.5399835705757141, |
|
"eval_runtime": 15.5486, |
|
"eval_samples_per_second": 351.35, |
|
"eval_steps_per_second": 43.927, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 3.5651029395037455e-06, |
|
"loss": 0.0238, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"eval_accuracy": 0.9227530660809079, |
|
"eval_loss": 0.5536710023880005, |
|
"eval_runtime": 15.4711, |
|
"eval_samples_per_second": 353.109, |
|
"eval_steps_per_second": 44.147, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 3.402609642352253e-06, |
|
"loss": 0.0201, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_accuracy": 0.9214717188358045, |
|
"eval_loss": 0.5851342678070068, |
|
"eval_runtime": 15.5145, |
|
"eval_samples_per_second": 352.122, |
|
"eval_steps_per_second": 44.023, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 3.2401163452007604e-06, |
|
"loss": 0.0253, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"eval_accuracy": 0.9216547684422478, |
|
"eval_loss": 0.5654112696647644, |
|
"eval_runtime": 15.537, |
|
"eval_samples_per_second": 351.612, |
|
"eval_steps_per_second": 43.96, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 3.0776230480492682e-06, |
|
"loss": 0.0243, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_accuracy": 0.9212886692293611, |
|
"eval_loss": 0.5833209156990051, |
|
"eval_runtime": 15.4958, |
|
"eval_samples_per_second": 352.547, |
|
"eval_steps_per_second": 44.076, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 2.9151297508977757e-06, |
|
"loss": 0.0298, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"eval_accuracy": 0.9209225700164745, |
|
"eval_loss": 0.5482963919639587, |
|
"eval_runtime": 15.4637, |
|
"eval_samples_per_second": 353.28, |
|
"eval_steps_per_second": 44.168, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 2.752636453746283e-06, |
|
"loss": 0.0232, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"eval_accuracy": 0.9214717188358045, |
|
"eval_loss": 0.5724154114723206, |
|
"eval_runtime": 15.5462, |
|
"eval_samples_per_second": 351.403, |
|
"eval_steps_per_second": 43.933, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 2.5901431565947906e-06, |
|
"loss": 0.0239, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"eval_accuracy": 0.9194581731649277, |
|
"eval_loss": 0.5574042201042175, |
|
"eval_runtime": 15.4883, |
|
"eval_samples_per_second": 352.717, |
|
"eval_steps_per_second": 44.098, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 2.427649859443298e-06, |
|
"loss": 0.0263, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_accuracy": 0.9234852645066813, |
|
"eval_loss": 0.5491210222244263, |
|
"eval_runtime": 15.5424, |
|
"eval_samples_per_second": 351.489, |
|
"eval_steps_per_second": 43.944, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 2.2651565622918055e-06, |
|
"loss": 0.0333, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"eval_accuracy": 0.9209225700164745, |
|
"eval_loss": 0.5322240591049194, |
|
"eval_runtime": 15.4522, |
|
"eval_samples_per_second": 353.541, |
|
"eval_steps_per_second": 44.201, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 2.102663265140313e-06, |
|
"loss": 0.0259, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"eval_accuracy": 0.9216547684422478, |
|
"eval_loss": 0.5492663979530334, |
|
"eval_runtime": 15.4874, |
|
"eval_samples_per_second": 352.738, |
|
"eval_steps_per_second": 44.1, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 1.9401699679888204e-06, |
|
"loss": 0.0197, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_accuracy": 0.9209225700164745, |
|
"eval_loss": 0.5670880675315857, |
|
"eval_runtime": 15.4679, |
|
"eval_samples_per_second": 353.183, |
|
"eval_steps_per_second": 44.156, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 1.777676670837328e-06, |
|
"loss": 0.0237, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_accuracy": 0.9209225700164745, |
|
"eval_loss": 0.5536203980445862, |
|
"eval_runtime": 15.492, |
|
"eval_samples_per_second": 352.633, |
|
"eval_steps_per_second": 44.087, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 1.6151833736858355e-06, |
|
"loss": 0.022, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"eval_accuracy": 0.9216547684422478, |
|
"eval_loss": 0.5523190498352051, |
|
"eval_runtime": 15.499, |
|
"eval_samples_per_second": 352.475, |
|
"eval_steps_per_second": 44.067, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 1.452690076534343e-06, |
|
"loss": 0.0246, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"eval_accuracy": 0.9220208676551346, |
|
"eval_loss": 0.5619478821754456, |
|
"eval_runtime": 15.4813, |
|
"eval_samples_per_second": 352.878, |
|
"eval_steps_per_second": 44.118, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 1.2901967793828504e-06, |
|
"loss": 0.0202, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"eval_accuracy": 0.9227530660809079, |
|
"eval_loss": 0.5619451999664307, |
|
"eval_runtime": 15.4982, |
|
"eval_samples_per_second": 352.492, |
|
"eval_steps_per_second": 44.07, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 1.127703482231358e-06, |
|
"loss": 0.0184, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_accuracy": 0.9216547684422478, |
|
"eval_loss": 0.5728563070297241, |
|
"eval_runtime": 15.4841, |
|
"eval_samples_per_second": 352.814, |
|
"eval_steps_per_second": 44.11, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 9.652101850798656e-07, |
|
"loss": 0.0122, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"eval_accuracy": 0.920190371590701, |
|
"eval_loss": 0.594571053981781, |
|
"eval_runtime": 15.5059, |
|
"eval_samples_per_second": 352.317, |
|
"eval_steps_per_second": 44.048, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 8.02716887928373e-07, |
|
"loss": 0.015, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_accuracy": 0.9214717188358045, |
|
"eval_loss": 0.601406455039978, |
|
"eval_runtime": 15.5587, |
|
"eval_samples_per_second": 351.121, |
|
"eval_steps_per_second": 43.898, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 6.402235907768806e-07, |
|
"loss": 0.0189, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_accuracy": 0.9225700164744646, |
|
"eval_loss": 0.5927833318710327, |
|
"eval_runtime": 15.5411, |
|
"eval_samples_per_second": 351.518, |
|
"eval_steps_per_second": 43.948, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 4.777302936253879e-07, |
|
"loss": 0.0194, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_accuracy": 0.9220208676551346, |
|
"eval_loss": 0.5897760987281799, |
|
"eval_runtime": 15.5041, |
|
"eval_samples_per_second": 352.359, |
|
"eval_steps_per_second": 44.053, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 3.1523699647389547e-07, |
|
"loss": 0.0219, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_accuracy": 0.9218378180486912, |
|
"eval_loss": 0.5851386189460754, |
|
"eval_runtime": 15.4835, |
|
"eval_samples_per_second": 352.826, |
|
"eval_steps_per_second": 44.111, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 1.5274369932240295e-07, |
|
"loss": 0.017, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"eval_accuracy": 0.9218378180486912, |
|
"eval_loss": 0.5890805125236511, |
|
"eval_runtime": 15.4838, |
|
"eval_samples_per_second": 352.819, |
|
"eval_steps_per_second": 44.111, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 65470, |
|
"total_flos": 6.88976031789312e+16, |
|
"train_loss": 0.1338691882801653, |
|
"train_runtime": 14647.724, |
|
"train_samples_per_second": 71.508, |
|
"train_steps_per_second": 4.47 |
|
} |
|
], |
|
"max_steps": 65470, |
|
"num_train_epochs": 10, |
|
"total_flos": 6.88976031789312e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|