|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.127659574468085, |
|
"eval_steps": 20, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0004995563442768412, |
|
"loss": 1.3139, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0004986690328305235, |
|
"loss": 1.2812, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.8499402403831482, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 125.9807, |
|
"eval_samples_per_second": 11.907, |
|
"eval_steps_per_second": 0.373, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0004977817213842058, |
|
"loss": 0.832, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0004968944099378882, |
|
"loss": 0.8156, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.8848057985305786, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 46.3365, |
|
"eval_samples_per_second": 32.372, |
|
"eval_steps_per_second": 1.014, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0004960070984915705, |
|
"loss": 0.7475, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0004951197870452529, |
|
"loss": 0.7478, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7691774964332581, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.0551, |
|
"eval_samples_per_second": 247.726, |
|
"eval_steps_per_second": 7.762, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0004942324755989353, |
|
"loss": 0.7663, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0004933451641526176, |
|
"loss": 0.6034, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7553095817565918, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.025, |
|
"eval_samples_per_second": 248.962, |
|
"eval_steps_per_second": 7.801, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0004924578527063, |
|
"loss": 0.9511, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0004915705412599822, |
|
"loss": 0.7548, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7768574357032776, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 5.9796, |
|
"eval_samples_per_second": 250.851, |
|
"eval_steps_per_second": 7.86, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0004906832298136646, |
|
"loss": 0.858, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0004897959183673469, |
|
"loss": 0.8452, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.745326817035675, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.1179, |
|
"eval_samples_per_second": 245.183, |
|
"eval_steps_per_second": 7.682, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0004889086069210293, |
|
"loss": 0.8152, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00048802129547471164, |
|
"loss": 0.8972, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7610827088356018, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.0456, |
|
"eval_samples_per_second": 248.113, |
|
"eval_steps_per_second": 7.774, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.000487133984028394, |
|
"loss": 0.7226, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0004862466725820763, |
|
"loss": 0.9377, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7714303135871887, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 5.9995, |
|
"eval_samples_per_second": 250.022, |
|
"eval_steps_per_second": 7.834, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0004853593611357587, |
|
"loss": 0.594, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00048447204968944104, |
|
"loss": 1.0968, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.9954016804695129, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 5.9871, |
|
"eval_samples_per_second": 250.537, |
|
"eval_steps_per_second": 7.85, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00048358473824312333, |
|
"loss": 0.9644, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00048269742679680566, |
|
"loss": 0.7502, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7544593214988708, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.0077, |
|
"eval_samples_per_second": 249.679, |
|
"eval_steps_per_second": 7.823, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.000481810115350488, |
|
"loss": 0.7231, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0004809228039041704, |
|
"loss": 0.7902, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7758567929267883, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.0531, |
|
"eval_samples_per_second": 247.806, |
|
"eval_steps_per_second": 7.765, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00048003549245785273, |
|
"loss": 0.7591, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00047914818101153507, |
|
"loss": 0.817, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.767899751663208, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.1711, |
|
"eval_samples_per_second": 243.067, |
|
"eval_steps_per_second": 7.616, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0004782608695652174, |
|
"loss": 0.6681, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00047737355811889974, |
|
"loss": 0.9199, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7462519407272339, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.0331, |
|
"eval_samples_per_second": 248.63, |
|
"eval_steps_per_second": 7.79, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00047648624667258213, |
|
"loss": 0.7409, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0004755989352262644, |
|
"loss": 0.7246, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7581822872161865, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.1558, |
|
"eval_samples_per_second": 243.673, |
|
"eval_steps_per_second": 7.635, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00047471162377994675, |
|
"loss": 0.6876, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0004738243123336291, |
|
"loss": 0.7508, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7464602589607239, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.1548, |
|
"eval_samples_per_second": 243.711, |
|
"eval_steps_per_second": 7.636, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0004729370008873114, |
|
"loss": 0.8539, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0004720496894409938, |
|
"loss": 0.7011, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.788334310054779, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.1655, |
|
"eval_samples_per_second": 243.289, |
|
"eval_steps_per_second": 7.623, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00047116237799467615, |
|
"loss": 0.9111, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0004702750665483585, |
|
"loss": 0.8442, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7656621336936951, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.2177, |
|
"eval_samples_per_second": 241.249, |
|
"eval_steps_per_second": 7.559, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00046938775510204083, |
|
"loss": 0.7179, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00046850044365572317, |
|
"loss": 1.0021, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7755117416381836, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.0871, |
|
"eval_samples_per_second": 246.422, |
|
"eval_steps_per_second": 7.721, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0004676131322094055, |
|
"loss": 0.8331, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00046672582076308784, |
|
"loss": 0.7982, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.8436231017112732, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.0719, |
|
"eval_samples_per_second": 247.04, |
|
"eval_steps_per_second": 7.741, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0004658385093167702, |
|
"loss": 0.8408, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0004649511978704525, |
|
"loss": 0.8299, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_accuracy": 0.7386666666666667, |
|
"eval_combined_score": 0.6626504648943422, |
|
"eval_f1": 0.6276400817995911, |
|
"eval_loss": 0.7582988142967224, |
|
"eval_precision": 0.5456284444444445, |
|
"eval_recall": 0.7386666666666667, |
|
"eval_runtime": 6.2274, |
|
"eval_samples_per_second": 240.873, |
|
"eval_steps_per_second": 7.547, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 5640, |
|
"num_train_epochs": 30, |
|
"save_steps": 100, |
|
"total_flos": 839857999970304.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|