{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.127659574468085, "eval_steps": 20, "global_step": 400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 0.0004995563442768412, "loss": 1.3139, "step": 10 }, { "epoch": 0.11, "learning_rate": 0.0004986690328305235, "loss": 1.2812, "step": 20 }, { "epoch": 0.11, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8499402403831482, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 125.9807, "eval_samples_per_second": 11.907, "eval_steps_per_second": 0.373, "step": 20 }, { "epoch": 0.16, "learning_rate": 0.0004977817213842058, "loss": 0.832, "step": 30 }, { "epoch": 0.21, "learning_rate": 0.0004968944099378882, "loss": 0.8156, "step": 40 }, { "epoch": 0.21, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8848057985305786, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 46.3365, "eval_samples_per_second": 32.372, "eval_steps_per_second": 1.014, "step": 40 }, { "epoch": 0.27, "learning_rate": 0.0004960070984915705, "loss": 0.7475, "step": 50 }, { "epoch": 0.32, "learning_rate": 0.0004951197870452529, "loss": 0.7478, "step": 60 }, { "epoch": 0.32, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7691774964332581, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0551, "eval_samples_per_second": 247.726, "eval_steps_per_second": 7.762, "step": 60 }, { "epoch": 0.37, "learning_rate": 0.0004942324755989353, "loss": 0.7663, "step": 70 }, { "epoch": 0.43, "learning_rate": 0.0004933451641526176, "loss": 0.6034, "step": 80 }, { "epoch": 0.43, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7553095817565918, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.025, "eval_samples_per_second": 248.962, "eval_steps_per_second": 7.801, "step": 80 }, { "epoch": 0.48, "learning_rate": 0.0004924578527063, "loss": 0.9511, "step": 90 }, { "epoch": 0.53, "learning_rate": 0.0004915705412599822, "loss": 0.7548, "step": 100 }, { "epoch": 0.53, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7768574357032776, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9796, "eval_samples_per_second": 250.851, "eval_steps_per_second": 7.86, "step": 100 }, { "epoch": 0.59, "learning_rate": 0.0004906832298136646, "loss": 0.858, "step": 110 }, { "epoch": 0.64, "learning_rate": 0.0004897959183673469, "loss": 0.8452, "step": 120 }, { "epoch": 0.64, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.745326817035675, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1179, "eval_samples_per_second": 245.183, "eval_steps_per_second": 7.682, "step": 120 }, { "epoch": 0.69, "learning_rate": 0.0004889086069210293, "loss": 0.8152, "step": 130 }, { "epoch": 0.74, "learning_rate": 0.00048802129547471164, "loss": 0.8972, "step": 140 }, { "epoch": 0.74, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7610827088356018, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0456, "eval_samples_per_second": 248.113, "eval_steps_per_second": 7.774, "step": 140 }, { "epoch": 0.8, "learning_rate": 0.000487133984028394, "loss": 0.7226, "step": 150 }, { "epoch": 0.85, "learning_rate": 0.0004862466725820763, "loss": 0.9377, "step": 160 }, { "epoch": 0.85, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7714303135871887, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9995, "eval_samples_per_second": 250.022, "eval_steps_per_second": 7.834, "step": 160 }, { "epoch": 0.9, "learning_rate": 0.0004853593611357587, "loss": 0.594, "step": 170 }, { "epoch": 0.96, "learning_rate": 0.00048447204968944104, "loss": 1.0968, "step": 180 }, { "epoch": 0.96, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.9954016804695129, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 5.9871, "eval_samples_per_second": 250.537, "eval_steps_per_second": 7.85, "step": 180 }, { "epoch": 1.01, "learning_rate": 0.00048358473824312333, "loss": 0.9644, "step": 190 }, { "epoch": 1.06, "learning_rate": 0.00048269742679680566, "loss": 0.7502, "step": 200 }, { "epoch": 1.06, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7544593214988708, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0077, "eval_samples_per_second": 249.679, "eval_steps_per_second": 7.823, "step": 200 }, { "epoch": 1.12, "learning_rate": 0.000481810115350488, "loss": 0.7231, "step": 210 }, { "epoch": 1.17, "learning_rate": 0.0004809228039041704, "loss": 0.7902, "step": 220 }, { "epoch": 1.17, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7758567929267883, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0531, "eval_samples_per_second": 247.806, "eval_steps_per_second": 7.765, "step": 220 }, { "epoch": 1.22, "learning_rate": 0.00048003549245785273, "loss": 0.7591, "step": 230 }, { "epoch": 1.28, "learning_rate": 0.00047914818101153507, "loss": 0.817, "step": 240 }, { "epoch": 1.28, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.767899751663208, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1711, "eval_samples_per_second": 243.067, "eval_steps_per_second": 7.616, "step": 240 }, { "epoch": 1.33, "learning_rate": 0.0004782608695652174, "loss": 0.6681, "step": 250 }, { "epoch": 1.38, "learning_rate": 0.00047737355811889974, "loss": 0.9199, "step": 260 }, { "epoch": 1.38, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7462519407272339, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0331, "eval_samples_per_second": 248.63, "eval_steps_per_second": 7.79, "step": 260 }, { "epoch": 1.44, "learning_rate": 0.00047648624667258213, "loss": 0.7409, "step": 270 }, { "epoch": 1.49, "learning_rate": 0.0004755989352262644, "loss": 0.7246, "step": 280 }, { "epoch": 1.49, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7581822872161865, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1558, "eval_samples_per_second": 243.673, "eval_steps_per_second": 7.635, "step": 280 }, { "epoch": 1.54, "learning_rate": 0.00047471162377994675, "loss": 0.6876, "step": 290 }, { "epoch": 1.6, "learning_rate": 0.0004738243123336291, "loss": 0.7508, "step": 300 }, { "epoch": 1.6, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7464602589607239, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1548, "eval_samples_per_second": 243.711, "eval_steps_per_second": 7.636, "step": 300 }, { "epoch": 1.65, "learning_rate": 0.0004729370008873114, "loss": 0.8539, "step": 310 }, { "epoch": 1.7, "learning_rate": 0.0004720496894409938, "loss": 0.7011, "step": 320 }, { "epoch": 1.7, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.788334310054779, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.1655, "eval_samples_per_second": 243.289, "eval_steps_per_second": 7.623, "step": 320 }, { "epoch": 1.76, "learning_rate": 0.00047116237799467615, "loss": 0.9111, "step": 330 }, { "epoch": 1.81, "learning_rate": 0.0004702750665483585, "loss": 0.8442, "step": 340 }, { "epoch": 1.81, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7656621336936951, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.2177, "eval_samples_per_second": 241.249, "eval_steps_per_second": 7.559, "step": 340 }, { "epoch": 1.86, "learning_rate": 0.00046938775510204083, "loss": 0.7179, "step": 350 }, { "epoch": 1.91, "learning_rate": 0.00046850044365572317, "loss": 1.0021, "step": 360 }, { "epoch": 1.91, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7755117416381836, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0871, "eval_samples_per_second": 246.422, "eval_steps_per_second": 7.721, "step": 360 }, { "epoch": 1.97, "learning_rate": 0.0004676131322094055, "loss": 0.8331, "step": 370 }, { "epoch": 2.02, "learning_rate": 0.00046672582076308784, "loss": 0.7982, "step": 380 }, { "epoch": 2.02, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.8436231017112732, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.0719, "eval_samples_per_second": 247.04, "eval_steps_per_second": 7.741, "step": 380 }, { "epoch": 2.07, "learning_rate": 0.0004658385093167702, "loss": 0.8408, "step": 390 }, { "epoch": 2.13, "learning_rate": 0.0004649511978704525, "loss": 0.8299, "step": 400 }, { "epoch": 2.13, "eval_accuracy": 0.7386666666666667, "eval_combined_score": 0.6626504648943422, "eval_f1": 0.6276400817995911, "eval_loss": 0.7582988142967224, "eval_precision": 0.5456284444444445, "eval_recall": 0.7386666666666667, "eval_runtime": 6.2274, "eval_samples_per_second": 240.873, "eval_steps_per_second": 7.547, "step": 400 } ], "logging_steps": 10, "max_steps": 5640, "num_train_epochs": 30, "save_steps": 100, "total_flos": 839857999970304.0, "trial_name": null, "trial_params": null }