{ "best_metric": 0.6226160526275635, "best_model_checkpoint": "bert_base_lda_50_v1_mrpc/checkpoint-150", "epoch": 15.0, "eval_steps": 500, "global_step": 225, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 22.977022171020508, "learning_rate": 0.00098, "loss": 1.9018, "step": 15 }, { "epoch": 1.0, "eval_accuracy": 0.3161764705882353, "eval_combined_score": 0.15808823529411764, "eval_f1": 0.0, "eval_loss": 1.746949315071106, "eval_runtime": 0.2773, "eval_samples_per_second": 1471.484, "eval_steps_per_second": 7.213, "step": 15 }, { "epoch": 2.0, "grad_norm": 5.6883745193481445, "learning_rate": 0.00096, "loss": 0.7758, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.713627815246582, "eval_runtime": 0.2735, "eval_samples_per_second": 1491.747, "eval_steps_per_second": 7.312, "step": 30 }, { "epoch": 3.0, "grad_norm": 2.137789249420166, "learning_rate": 0.00094, "loss": 0.6464, "step": 45 }, { "epoch": 3.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6401654481887817, "eval_runtime": 0.3133, "eval_samples_per_second": 1302.407, "eval_steps_per_second": 6.384, "step": 45 }, { "epoch": 4.0, "grad_norm": 0.9102729558944702, "learning_rate": 0.00092, "loss": 0.6412, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6260914206504822, "eval_runtime": 0.2816, "eval_samples_per_second": 1449.033, "eval_steps_per_second": 7.103, "step": 60 }, { "epoch": 5.0, "grad_norm": 0.8048895001411438, "learning_rate": 0.0009000000000000001, "loss": 0.6318, "step": 75 }, { "epoch": 5.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6250861883163452, "eval_runtime": 0.2775, "eval_samples_per_second": 1470.21, "eval_steps_per_second": 7.207, "step": 75 }, { "epoch": 6.0, "grad_norm": 0.975186288356781, "learning_rate": 0.00088, "loss": 0.6352, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6246122717857361, "eval_runtime": 0.2709, "eval_samples_per_second": 1506.067, "eval_steps_per_second": 7.383, "step": 90 }, { "epoch": 7.0, "grad_norm": 0.4961640536785126, "learning_rate": 0.00086, "loss": 0.635, "step": 105 }, { "epoch": 7.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6256270408630371, "eval_runtime": 0.2693, "eval_samples_per_second": 1515.101, "eval_steps_per_second": 7.427, "step": 105 }, { "epoch": 8.0, "grad_norm": 0.3314763903617859, "learning_rate": 0.00084, "loss": 0.634, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6243536472320557, "eval_runtime": 0.2744, "eval_samples_per_second": 1487.06, "eval_steps_per_second": 7.29, "step": 120 }, { "epoch": 9.0, "grad_norm": 0.5980406403541565, "learning_rate": 0.00082, "loss": 0.6297, "step": 135 }, { "epoch": 9.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.634880542755127, "eval_runtime": 0.2751, "eval_samples_per_second": 1482.984, "eval_steps_per_second": 7.27, "step": 135 }, { "epoch": 10.0, "grad_norm": 0.4592650532722473, "learning_rate": 0.0008, "loss": 0.6357, "step": 150 }, { "epoch": 10.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6226160526275635, "eval_runtime": 0.2684, "eval_samples_per_second": 1520.307, "eval_steps_per_second": 7.452, "step": 150 }, { "epoch": 11.0, "grad_norm": 0.7528880834579468, "learning_rate": 0.0007800000000000001, "loss": 0.6341, "step": 165 }, { "epoch": 11.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6255601048469543, "eval_runtime": 0.2722, "eval_samples_per_second": 1499.113, "eval_steps_per_second": 7.349, "step": 165 }, { "epoch": 12.0, "grad_norm": 0.380775511264801, "learning_rate": 0.00076, "loss": 0.6333, "step": 180 }, { "epoch": 12.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6249569058418274, "eval_runtime": 0.2722, "eval_samples_per_second": 1498.681, "eval_steps_per_second": 7.346, "step": 180 }, { "epoch": 13.0, "grad_norm": 0.6090389490127563, "learning_rate": 0.00074, "loss": 0.6309, "step": 195 }, { "epoch": 13.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6240330338478088, "eval_runtime": 0.2747, "eval_samples_per_second": 1485.187, "eval_steps_per_second": 7.28, "step": 195 }, { "epoch": 14.0, "grad_norm": 1.031883716583252, "learning_rate": 0.0007199999999999999, "loss": 0.6343, "step": 210 }, { "epoch": 14.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6258903741836548, "eval_runtime": 0.2733, "eval_samples_per_second": 1492.679, "eval_steps_per_second": 7.317, "step": 210 }, { "epoch": 15.0, "grad_norm": 0.6238628029823303, "learning_rate": 0.0007, "loss": 0.6328, "step": 225 }, { "epoch": 15.0, "eval_accuracy": 0.6838235294117647, "eval_combined_score": 0.7480253018237863, "eval_f1": 0.8122270742358079, "eval_loss": 0.6254164576530457, "eval_runtime": 0.2698, "eval_samples_per_second": 1512.061, "eval_steps_per_second": 7.412, "step": 225 }, { "epoch": 15.0, "step": 225, "total_flos": 7238185132953600.0, "train_loss": 0.7287869644165039, "train_runtime": 125.4342, "train_samples_per_second": 1462.121, "train_steps_per_second": 5.979 } ], "logging_steps": 1, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7238185132953600.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }