|
{ |
|
"best_metric": 0.511543134872418, |
|
"best_model_checkpoint": "/content/our_data/checkpoint-9500", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 12410, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.91941982272361e-05, |
|
"loss": 1.7886, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.6168970448045757, |
|
"eval_f1": 0.19641754169240272, |
|
"eval_loss": 1.5075236558914185, |
|
"eval_precision": 0.1842410196987254, |
|
"eval_recall": 0.21031746031746032, |
|
"eval_runtime": 4.697, |
|
"eval_samples_per_second": 64.723, |
|
"eval_steps_per_second": 32.361, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.83883964544722e-05, |
|
"loss": 1.3644, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.6491897044804575, |
|
"eval_f1": 0.26658972879399884, |
|
"eval_loss": 1.3342480659484863, |
|
"eval_precision": 0.23643807574206754, |
|
"eval_recall": 0.3055555555555556, |
|
"eval_runtime": 3.8755, |
|
"eval_samples_per_second": 78.442, |
|
"eval_steps_per_second": 39.221, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 1.75825946817083e-05, |
|
"loss": 1.1181, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.6812440419447092, |
|
"eval_f1": 0.3241626794258373, |
|
"eval_loss": 1.2655349969863892, |
|
"eval_precision": 0.29585152838427947, |
|
"eval_recall": 0.3584656084656085, |
|
"eval_runtime": 4.9077, |
|
"eval_samples_per_second": 61.944, |
|
"eval_steps_per_second": 30.972, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.67767929089444e-05, |
|
"loss": 0.9833, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_accuracy": 0.6777883698760725, |
|
"eval_f1": 0.33541785105173394, |
|
"eval_loss": 1.2368232011795044, |
|
"eval_precision": 0.29411764705882354, |
|
"eval_recall": 0.39021164021164023, |
|
"eval_runtime": 4.7308, |
|
"eval_samples_per_second": 64.26, |
|
"eval_steps_per_second": 32.13, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.59709911361805e-05, |
|
"loss": 0.9036, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.7023355576739753, |
|
"eval_f1": 0.37717121588089325, |
|
"eval_loss": 1.268211007118225, |
|
"eval_precision": 0.35514018691588783, |
|
"eval_recall": 0.4021164021164021, |
|
"eval_runtime": 3.7593, |
|
"eval_samples_per_second": 80.867, |
|
"eval_steps_per_second": 40.434, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.5165189363416601e-05, |
|
"loss": 0.7102, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_accuracy": 0.7159199237368923, |
|
"eval_f1": 0.40775558166862513, |
|
"eval_loss": 1.2175697088241577, |
|
"eval_precision": 0.3668076109936575, |
|
"eval_recall": 0.458994708994709, |
|
"eval_runtime": 4.1645, |
|
"eval_samples_per_second": 72.998, |
|
"eval_steps_per_second": 36.499, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.4359387590652701e-05, |
|
"loss": 0.6868, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.7147283126787417, |
|
"eval_f1": 0.41918294849023086, |
|
"eval_loss": 1.2170379161834717, |
|
"eval_precision": 0.37942122186495175, |
|
"eval_recall": 0.46825396825396826, |
|
"eval_runtime": 4.987, |
|
"eval_samples_per_second": 60.958, |
|
"eval_steps_per_second": 30.479, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.35535858178888e-05, |
|
"loss": 0.5671, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_accuracy": 0.7259294566253575, |
|
"eval_f1": 0.4366863905325444, |
|
"eval_loss": 1.2602918148040771, |
|
"eval_precision": 0.3950749464668094, |
|
"eval_recall": 0.4880952380952381, |
|
"eval_runtime": 3.9634, |
|
"eval_samples_per_second": 76.702, |
|
"eval_steps_per_second": 38.351, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.27477840451249e-05, |
|
"loss": 0.4878, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_accuracy": 0.7333174451858914, |
|
"eval_f1": 0.4432930339666091, |
|
"eval_loss": 1.2460124492645264, |
|
"eval_precision": 0.3924566768603466, |
|
"eval_recall": 0.5092592592592593, |
|
"eval_runtime": 5.1761, |
|
"eval_samples_per_second": 58.732, |
|
"eval_steps_per_second": 29.366, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.1941982272361e-05, |
|
"loss": 0.4942, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_accuracy": 0.7284318398474738, |
|
"eval_f1": 0.43920145190562615, |
|
"eval_loss": 1.314683198928833, |
|
"eval_precision": 0.40468227424749165, |
|
"eval_recall": 0.4801587301587302, |
|
"eval_runtime": 3.8399, |
|
"eval_samples_per_second": 79.169, |
|
"eval_steps_per_second": 39.585, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.11361804995971e-05, |
|
"loss": 0.3812, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_accuracy": 0.7351048617731173, |
|
"eval_f1": 0.46281975014872095, |
|
"eval_loss": 1.3308484554290771, |
|
"eval_precision": 0.4205405405405405, |
|
"eval_recall": 0.5145502645502645, |
|
"eval_runtime": 4.4524, |
|
"eval_samples_per_second": 68.278, |
|
"eval_steps_per_second": 34.139, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 1.0330378726833199e-05, |
|
"loss": 0.421, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"eval_accuracy": 0.738560533841754, |
|
"eval_f1": 0.4702380952380952, |
|
"eval_loss": 1.3030654191970825, |
|
"eval_precision": 0.4274891774891775, |
|
"eval_recall": 0.5224867724867724, |
|
"eval_runtime": 4.5652, |
|
"eval_samples_per_second": 66.59, |
|
"eval_steps_per_second": 33.295, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 9.5245769540693e-06, |
|
"loss": 0.3157, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_accuracy": 0.7292659675881792, |
|
"eval_f1": 0.4541120381406436, |
|
"eval_loss": 1.394273042678833, |
|
"eval_precision": 0.41323210412147504, |
|
"eval_recall": 0.503968253968254, |
|
"eval_runtime": 4.8385, |
|
"eval_samples_per_second": 62.83, |
|
"eval_steps_per_second": 31.415, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 8.7187751813054e-06, |
|
"loss": 0.3072, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"eval_accuracy": 0.7396329837940896, |
|
"eval_f1": 0.47030593881223753, |
|
"eval_loss": 1.4086649417877197, |
|
"eval_precision": 0.43029637760702527, |
|
"eval_recall": 0.5185185185185185, |
|
"eval_runtime": 3.6352, |
|
"eval_samples_per_second": 83.626, |
|
"eval_steps_per_second": 41.813, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 7.9129734085415e-06, |
|
"loss": 0.3436, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"eval_accuracy": 0.7362964728312679, |
|
"eval_f1": 0.48238153098420417, |
|
"eval_loss": 1.4196876287460327, |
|
"eval_precision": 0.4460674157303371, |
|
"eval_recall": 0.5251322751322751, |
|
"eval_runtime": 3.9627, |
|
"eval_samples_per_second": 76.716, |
|
"eval_steps_per_second": 38.358, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 7.107171635777599e-06, |
|
"loss": 0.2774, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_accuracy": 0.7377264061010487, |
|
"eval_f1": 0.4702380952380952, |
|
"eval_loss": 1.4248775243759155, |
|
"eval_precision": 0.4274891774891775, |
|
"eval_recall": 0.5224867724867724, |
|
"eval_runtime": 4.1017, |
|
"eval_samples_per_second": 74.116, |
|
"eval_steps_per_second": 37.058, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 6.301369863013699e-06, |
|
"loss": 0.2629, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_accuracy": 0.732721639656816, |
|
"eval_f1": 0.4932844932844933, |
|
"eval_loss": 1.4811160564422607, |
|
"eval_precision": 0.4580498866213152, |
|
"eval_recall": 0.5343915343915344, |
|
"eval_runtime": 3.7972, |
|
"eval_samples_per_second": 80.06, |
|
"eval_steps_per_second": 40.03, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 5.495568090249799e-06, |
|
"loss": 0.2271, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_accuracy": 0.7415395614871306, |
|
"eval_f1": 0.5043263288009888, |
|
"eval_loss": 1.5576499700546265, |
|
"eval_precision": 0.4733178654292343, |
|
"eval_recall": 0.5396825396825397, |
|
"eval_runtime": 3.7692, |
|
"eval_samples_per_second": 80.653, |
|
"eval_steps_per_second": 40.326, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 4.689766317485899e-06, |
|
"loss": 0.235, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"eval_accuracy": 0.7401096282173498, |
|
"eval_f1": 0.511543134872418, |
|
"eval_loss": 1.5468007326126099, |
|
"eval_precision": 0.4730337078651685, |
|
"eval_recall": 0.5568783068783069, |
|
"eval_runtime": 4.6282, |
|
"eval_samples_per_second": 65.684, |
|
"eval_steps_per_second": 32.842, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 3.883964544721999e-06, |
|
"loss": 0.2415, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"eval_accuracy": 0.7433269780743565, |
|
"eval_f1": 0.5058461538461538, |
|
"eval_loss": 1.5955547094345093, |
|
"eval_precision": 0.47295742232451093, |
|
"eval_recall": 0.5436507936507936, |
|
"eval_runtime": 4.7141, |
|
"eval_samples_per_second": 64.487, |
|
"eval_steps_per_second": 32.244, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"learning_rate": 3.0781627719580986e-06, |
|
"loss": 0.1826, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.46, |
|
"eval_accuracy": 0.7413012392755005, |
|
"eval_f1": 0.48864994026284353, |
|
"eval_loss": 1.616766333580017, |
|
"eval_precision": 0.4455337690631808, |
|
"eval_recall": 0.541005291005291, |
|
"eval_runtime": 4.6222, |
|
"eval_samples_per_second": 65.77, |
|
"eval_steps_per_second": 32.885, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 2.2723609991941985e-06, |
|
"loss": 0.2083, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_accuracy": 0.7413012392755005, |
|
"eval_f1": 0.49219687875150064, |
|
"eval_loss": 1.5865833759307861, |
|
"eval_precision": 0.45054945054945056, |
|
"eval_recall": 0.5423280423280423, |
|
"eval_runtime": 4.7524, |
|
"eval_samples_per_second": 63.967, |
|
"eval_steps_per_second": 31.984, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 1.4665592264302982e-06, |
|
"loss": 0.2169, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"eval_accuracy": 0.7467826501429933, |
|
"eval_f1": 0.5046040515653776, |
|
"eval_loss": 1.5974096059799194, |
|
"eval_precision": 0.47079037800687284, |
|
"eval_recall": 0.5436507936507936, |
|
"eval_runtime": 3.7982, |
|
"eval_samples_per_second": 80.038, |
|
"eval_steps_per_second": 40.019, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 6.607574536663981e-07, |
|
"loss": 0.1747, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_accuracy": 0.740467111534795, |
|
"eval_f1": 0.4963768115942029, |
|
"eval_loss": 1.6219406127929688, |
|
"eval_precision": 0.45666666666666667, |
|
"eval_recall": 0.5436507936507936, |
|
"eval_runtime": 4.7158, |
|
"eval_samples_per_second": 64.464, |
|
"eval_steps_per_second": 32.232, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 12410, |
|
"total_flos": 499833589185600.0, |
|
"train_loss": 0.5249143927833133, |
|
"train_runtime": 2240.4976, |
|
"train_samples_per_second": 11.073, |
|
"train_steps_per_second": 5.539 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12410, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 499833589185600.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|