{ "best_metric": 0.7006444053895724, "best_model_checkpoint": "tiny-bert-sst2-distilled/run-7/checkpoint-642", "epoch": 3.0, "eval_steps": 500, "global_step": 642, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.4419636726379395, "learning_rate": 4.778212463646413e-05, "loss": 0.4867, "step": 214 }, { "epoch": 1.0, "eval_accuracy": 0.6649091974223784, "eval_f1": 0.01718213058419244, "eval_loss": 0.4545574486255646, "eval_mcc": 0.009529862152017439, "eval_precision": 0.38461538461538464, "eval_recall": 0.008787346221441126, "eval_runtime": 3.147, "eval_samples_per_second": 542.424, "eval_steps_per_second": 17.159, "step": 214 }, { "epoch": 2.0, "grad_norm": 2.8326942920684814, "learning_rate": 3.9818437197053446e-05, "loss": 0.4528, "step": 428 }, { "epoch": 2.0, "eval_accuracy": 0.6959578207381371, "eval_f1": 0.40955631399317405, "eval_loss": 0.44587329030036926, "eval_mcc": 0.24713473389349372, "eval_precision": 0.5806451612903226, "eval_recall": 0.3163444639718805, "eval_runtime": 3.2126, "eval_samples_per_second": 531.351, "eval_steps_per_second": 16.809, "step": 428 }, { "epoch": 3.0, "grad_norm": 2.2975902557373047, "learning_rate": 3.1854749757642754e-05, "loss": 0.4424, "step": 642 }, { "epoch": 3.0, "eval_accuracy": 0.7006444053895724, "eval_f1": 0.396694214876033, "eval_loss": 0.4441916346549988, "eval_mcc": 0.25354492903122977, "eval_precision": 0.60431654676259, "eval_recall": 0.29525483304042177, "eval_runtime": 3.1517, "eval_samples_per_second": 541.618, "eval_steps_per_second": 17.134, "step": 642 } ], "logging_steps": 500, "max_steps": 1498, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "total_flos": 1574326993320.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "alpha": 0.7570540466770024, "learning_rate": 5.574581207587482e-05, "num_train_epochs": 7, "temperature": 25 } }