{ "best_metric": 0.9620120357508649, "best_model_checkpoint": "rubert_classification/checkpoint-2000", "epoch": 4.566210045662101, "eval_steps": 500, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.14, "eval_accuracy": 0.882, "eval_f1": 0.8860252812063524, "eval_loss": 0.3335667848587036, "eval_precision": 0.9101706722396378, "eval_recall": 0.882, "eval_runtime": 7.9932, "eval_samples_per_second": 187.659, "eval_steps_per_second": 11.76, "step": 500 }, { "epoch": 1.37, "grad_norm": 54.402042388916016, "learning_rate": 4.871134020618557e-05, "loss": 0.3238, "step": 600 }, { "epoch": 2.28, "eval_accuracy": 0.952, "eval_f1": 0.952629531198857, "eval_loss": 0.22119292616844177, "eval_precision": 0.955250771109112, "eval_recall": 0.952, "eval_runtime": 7.4028, "eval_samples_per_second": 202.626, "eval_steps_per_second": 12.698, "step": 1000 }, { "epoch": 2.74, "grad_norm": 0.6686217188835144, "learning_rate": 4.097938144329897e-05, "loss": 0.1138, "step": 1200 }, { "epoch": 3.42, "eval_accuracy": 0.9586666666666667, "eval_f1": 0.9584488790426743, "eval_loss": 0.19180671870708466, "eval_precision": 0.9584916263909182, "eval_recall": 0.9586666666666667, "eval_runtime": 7.383, "eval_samples_per_second": 203.169, "eval_steps_per_second": 12.732, "step": 1500 }, { "epoch": 4.11, "grad_norm": 0.031083475798368454, "learning_rate": 3.3247422680412374e-05, "loss": 0.0394, "step": 1800 }, { "epoch": 4.57, "eval_accuracy": 0.962, "eval_f1": 0.9620120357508649, "eval_loss": 0.2063756138086319, "eval_precision": 0.9620250729762693, "eval_recall": 0.962, "eval_runtime": 7.384, "eval_samples_per_second": 203.142, "eval_steps_per_second": 12.73, "step": 2000 } ], "logging_steps": 600, "max_steps": 4380, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 936301011222528.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }