|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 3510, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"eval_f1": 0.3722687284743791, |
|
"eval_loss": 1.3361328840255737, |
|
"eval_runtime": 3.2985, |
|
"eval_samples_per_second": 29.71, |
|
"eval_steps_per_second": 29.71, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_f1": 0.4344699929794222, |
|
"eval_loss": 1.1836130619049072, |
|
"eval_runtime": 3.274, |
|
"eval_samples_per_second": 29.933, |
|
"eval_steps_per_second": 29.933, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_f1": 0.3996935122704078, |
|
"eval_loss": 1.1635534763336182, |
|
"eval_runtime": 3.2772, |
|
"eval_samples_per_second": 29.904, |
|
"eval_steps_per_second": 29.904, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_f1": 0.5028054395723479, |
|
"eval_loss": 1.353499174118042, |
|
"eval_runtime": 3.2926, |
|
"eval_samples_per_second": 29.764, |
|
"eval_steps_per_second": 29.764, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.572649572649573e-05, |
|
"loss": 1.2064, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_f1": 0.47071964928887344, |
|
"eval_loss": 1.2940737009048462, |
|
"eval_runtime": 3.3744, |
|
"eval_samples_per_second": 29.042, |
|
"eval_steps_per_second": 29.042, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_f1": 0.49370416278560797, |
|
"eval_loss": 1.2891041040420532, |
|
"eval_runtime": 3.3094, |
|
"eval_samples_per_second": 29.613, |
|
"eval_steps_per_second": 29.613, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.47736617527595926, |
|
"eval_loss": 1.2047343254089355, |
|
"eval_runtime": 3.3145, |
|
"eval_samples_per_second": 29.567, |
|
"eval_steps_per_second": 29.567, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_f1": 0.4943830125990876, |
|
"eval_loss": 1.2190661430358887, |
|
"eval_runtime": 3.3377, |
|
"eval_samples_per_second": 29.362, |
|
"eval_steps_per_second": 29.362, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_f1": 0.4777863203104454, |
|
"eval_loss": 1.174961805343628, |
|
"eval_runtime": 3.3391, |
|
"eval_samples_per_second": 29.349, |
|
"eval_steps_per_second": 29.349, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.1452991452991456e-05, |
|
"loss": 0.9207, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_f1": 0.49086129207075757, |
|
"eval_loss": 1.3087153434753418, |
|
"eval_runtime": 3.4003, |
|
"eval_samples_per_second": 28.821, |
|
"eval_steps_per_second": 28.821, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_f1": 0.49757882395260544, |
|
"eval_loss": 1.2435556650161743, |
|
"eval_runtime": 3.4366, |
|
"eval_samples_per_second": 28.517, |
|
"eval_steps_per_second": 28.517, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_f1": 0.503327058221218, |
|
"eval_loss": 1.1465363502502441, |
|
"eval_runtime": 3.2944, |
|
"eval_samples_per_second": 29.748, |
|
"eval_steps_per_second": 29.748, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_f1": 0.5141589868888157, |
|
"eval_loss": 1.113364577293396, |
|
"eval_runtime": 3.3642, |
|
"eval_samples_per_second": 29.131, |
|
"eval_steps_per_second": 29.131, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_f1": 0.5383469405673188, |
|
"eval_loss": 1.1939647197723389, |
|
"eval_runtime": 3.3033, |
|
"eval_samples_per_second": 29.668, |
|
"eval_steps_per_second": 29.668, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.7179487179487178e-05, |
|
"loss": 0.8149, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_f1": 0.5291030100787034, |
|
"eval_loss": 1.2552497386932373, |
|
"eval_runtime": 3.7541, |
|
"eval_samples_per_second": 26.105, |
|
"eval_steps_per_second": 26.105, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_f1": 0.5259736412492381, |
|
"eval_loss": 1.3746747970581055, |
|
"eval_runtime": 4.9995, |
|
"eval_samples_per_second": 19.602, |
|
"eval_steps_per_second": 19.602, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_f1": 0.5329388682083431, |
|
"eval_loss": 1.3680145740509033, |
|
"eval_runtime": 5.1597, |
|
"eval_samples_per_second": 18.993, |
|
"eval_steps_per_second": 18.993, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_f1": 0.5189920532535025, |
|
"eval_loss": 1.27865469455719, |
|
"eval_runtime": 5.0223, |
|
"eval_samples_per_second": 19.513, |
|
"eval_steps_per_second": 19.513, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_f1": 0.5409205239275264, |
|
"eval_loss": 1.3888845443725586, |
|
"eval_runtime": 3.3132, |
|
"eval_samples_per_second": 29.579, |
|
"eval_steps_per_second": 29.579, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.2905982905982905e-05, |
|
"loss": 0.6152, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_f1": 0.543504294934508, |
|
"eval_loss": 1.3602004051208496, |
|
"eval_runtime": 3.3336, |
|
"eval_samples_per_second": 29.398, |
|
"eval_steps_per_second": 29.398, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_f1": 0.5467811408362643, |
|
"eval_loss": 1.3174574375152588, |
|
"eval_runtime": 5.0569, |
|
"eval_samples_per_second": 19.379, |
|
"eval_steps_per_second": 19.379, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_f1": 0.5365057187973831, |
|
"eval_loss": 1.5886870622634888, |
|
"eval_runtime": 4.5058, |
|
"eval_samples_per_second": 21.75, |
|
"eval_steps_per_second": 21.75, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_f1": 0.5563382534701277, |
|
"eval_loss": 1.517231822013855, |
|
"eval_runtime": 4.5269, |
|
"eval_samples_per_second": 21.648, |
|
"eval_steps_per_second": 21.648, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_f1": 0.5661390498930103, |
|
"eval_loss": 1.5470443964004517, |
|
"eval_runtime": 3.6905, |
|
"eval_samples_per_second": 26.555, |
|
"eval_steps_per_second": 26.555, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 8.632478632478633e-06, |
|
"loss": 0.4719, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_f1": 0.521216772952552, |
|
"eval_loss": 1.4928430318832397, |
|
"eval_runtime": 3.3155, |
|
"eval_samples_per_second": 29.558, |
|
"eval_steps_per_second": 29.558, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_f1": 0.5356457612585566, |
|
"eval_loss": 1.6497721672058105, |
|
"eval_runtime": 4.8518, |
|
"eval_samples_per_second": 20.199, |
|
"eval_steps_per_second": 20.199, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_f1": 0.5596834952223371, |
|
"eval_loss": 1.4976708889007568, |
|
"eval_runtime": 4.6972, |
|
"eval_samples_per_second": 20.863, |
|
"eval_steps_per_second": 20.863, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_f1": 0.5470066167039311, |
|
"eval_loss": 1.471981167793274, |
|
"eval_runtime": 4.5019, |
|
"eval_samples_per_second": 21.769, |
|
"eval_steps_per_second": 21.769, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_f1": 0.5492964393504802, |
|
"eval_loss": 1.5796676874160767, |
|
"eval_runtime": 4.7041, |
|
"eval_samples_per_second": 20.833, |
|
"eval_steps_per_second": 20.833, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 4.358974358974359e-06, |
|
"loss": 0.372, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_f1": 0.5445354826532323, |
|
"eval_loss": 1.6874395608901978, |
|
"eval_runtime": 3.9793, |
|
"eval_samples_per_second": 24.628, |
|
"eval_steps_per_second": 24.628, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"eval_f1": 0.5544723066439012, |
|
"eval_loss": 1.6702477931976318, |
|
"eval_runtime": 4.7443, |
|
"eval_samples_per_second": 20.656, |
|
"eval_steps_per_second": 20.656, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_f1": 0.5469058666319371, |
|
"eval_loss": 1.7671833038330078, |
|
"eval_runtime": 4.6665, |
|
"eval_samples_per_second": 21.001, |
|
"eval_steps_per_second": 21.001, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_f1": 0.5485370297299399, |
|
"eval_loss": 1.7351080179214478, |
|
"eval_runtime": 4.8851, |
|
"eval_samples_per_second": 20.061, |
|
"eval_steps_per_second": 20.061, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_f1": 0.5497797755164764, |
|
"eval_loss": 1.7282612323760986, |
|
"eval_runtime": 5.1791, |
|
"eval_samples_per_second": 18.922, |
|
"eval_steps_per_second": 18.922, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 8.547008547008547e-08, |
|
"loss": 0.2944, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_f1": 0.5503861750639598, |
|
"eval_loss": 1.698703408241272, |
|
"eval_runtime": 5.6701, |
|
"eval_samples_per_second": 17.284, |
|
"eval_steps_per_second": 17.284, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 3510, |
|
"total_flos": 2890172619430200.0, |
|
"train_loss": 0.6695007115008145, |
|
"train_runtime": 916.4977, |
|
"train_samples_per_second": 3.83, |
|
"train_steps_per_second": 3.83 |
|
} |
|
], |
|
"max_steps": 3510, |
|
"num_train_epochs": 5, |
|
"total_flos": 2890172619430200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|