{ "best_metric": 0.48642775416374207, "best_model_checkpoint": "../../experiments_checkpoints/MAdAiLab/distilbert/distilbert_base_uncased_twitter/checkpoint-100", "epoch": 0.7352941176470589, "eval_steps": 50, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "grad_norm": 0.9765774607658386, "learning_rate": 1.950980392156863e-05, "loss": 0.6187, "step": 10 }, { "epoch": 0.15, "grad_norm": 1.5788111686706543, "learning_rate": 1.9019607843137255e-05, "loss": 0.5843, "step": 20 }, { "epoch": 0.22, "grad_norm": 3.5568511486053467, "learning_rate": 1.8529411764705884e-05, "loss": 0.5014, "step": 30 }, { "epoch": 0.29, "grad_norm": 1.6089941263198853, "learning_rate": 1.8039215686274513e-05, "loss": 0.4959, "step": 40 }, { "epoch": 0.37, "grad_norm": 2.2181787490844727, "learning_rate": 1.7549019607843138e-05, "loss": 0.4671, "step": 50 }, { "epoch": 0.37, "eval_accuracy": 0.7665441176470589, "eval_f1_macro": 0.7213912692811775, "eval_f1_micro": 0.7665441176470589, "eval_loss": 0.4990096390247345, "eval_runtime": 0.5393, "eval_samples_per_second": 2017.389, "eval_steps_per_second": 31.522, "step": 50 }, { "epoch": 0.44, "grad_norm": 1.884325623512268, "learning_rate": 1.7058823529411767e-05, "loss": 0.49, "step": 60 }, { "epoch": 0.51, "grad_norm": 2.515151262283325, "learning_rate": 1.6568627450980395e-05, "loss": 0.468, "step": 70 }, { "epoch": 0.59, "grad_norm": 1.46433424949646, "learning_rate": 1.607843137254902e-05, "loss": 0.4629, "step": 80 }, { "epoch": 0.66, "grad_norm": 3.789057731628418, "learning_rate": 1.558823529411765e-05, "loss": 0.4995, "step": 90 }, { "epoch": 0.74, "grad_norm": 1.6152119636535645, "learning_rate": 1.5098039215686276e-05, "loss": 0.4724, "step": 100 }, { "epoch": 0.74, "eval_accuracy": 0.7665441176470589, "eval_f1_macro": 0.7129836193609764, "eval_f1_micro": 0.7665441176470589, "eval_loss": 0.48642775416374207, "eval_runtime": 0.5851, "eval_samples_per_second": 1859.58, "eval_steps_per_second": 29.056, "step": 100 } ], "logging_steps": 10, "max_steps": 408, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 50, "total_flos": 211947828019200.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }