{ "best_metric": 0.8127490039840638, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-xcydzmg4/checkpoint-300", "epoch": 3.0, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 3.299391984939575, "learning_rate": 3.163832728034676e-05, "loss": 0.6312, "step": 10 }, { "epoch": 0.2, "grad_norm": 2.2213969230651855, "learning_rate": 3.0547350477576185e-05, "loss": 0.5159, "step": 20 }, { "epoch": 0.3, "grad_norm": 1.8511892557144165, "learning_rate": 2.9456373674805607e-05, "loss": 0.5112, "step": 30 }, { "epoch": 0.4, "grad_norm": 3.693004608154297, "learning_rate": 2.836539687203503e-05, "loss": 0.3914, "step": 40 }, { "epoch": 0.5, "grad_norm": 6.287681579589844, "learning_rate": 2.727442006926445e-05, "loss": 0.3439, "step": 50 }, { "epoch": 0.6, "grad_norm": 3.235572576522827, "learning_rate": 2.6183443266493874e-05, "loss": 0.4263, "step": 60 }, { "epoch": 0.7, "grad_norm": 9.508350372314453, "learning_rate": 2.5092466463723296e-05, "loss": 0.3324, "step": 70 }, { "epoch": 0.8, "grad_norm": 3.4181947708129883, "learning_rate": 2.4001489660952715e-05, "loss": 0.4191, "step": 80 }, { "epoch": 0.9, "grad_norm": 3.315913200378418, "learning_rate": 2.2910512858182137e-05, "loss": 0.3647, "step": 90 }, { "epoch": 1.0, "grad_norm": 3.4039485454559326, "learning_rate": 2.181953605541156e-05, "loss": 0.2834, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.87, "eval_f1": 0.792, "eval_loss": 0.31944602727890015, "eval_precision": 0.7795275590551181, "eval_recall": 0.8048780487804879, "eval_runtime": 1.5401, "eval_samples_per_second": 259.717, "eval_steps_per_second": 16.232, "step": 100 }, { "epoch": 1.1, "grad_norm": 8.336113929748535, "learning_rate": 2.072855925264098e-05, "loss": 0.2983, "step": 110 }, { "epoch": 1.2, "grad_norm": 9.28201675415039, "learning_rate": 1.9637582449870403e-05, "loss": 0.3149, "step": 120 }, { "epoch": 1.3, "grad_norm": 6.901111125946045, "learning_rate": 1.8546605647099826e-05, "loss": 0.3158, "step": 130 }, { "epoch": 1.4, "grad_norm": 1.6393898725509644, "learning_rate": 1.7455628844329248e-05, "loss": 0.2477, "step": 140 }, { "epoch": 1.5, "grad_norm": 3.3284659385681152, "learning_rate": 1.636465204155867e-05, "loss": 0.1975, "step": 150 }, { "epoch": 1.6, "grad_norm": 12.852226257324219, "learning_rate": 1.5273675238788092e-05, "loss": 0.2251, "step": 160 }, { "epoch": 1.7, "grad_norm": 10.670600891113281, "learning_rate": 1.4182698436017515e-05, "loss": 0.263, "step": 170 }, { "epoch": 1.8, "grad_norm": 2.7927801609039307, "learning_rate": 1.3091721633246937e-05, "loss": 0.168, "step": 180 }, { "epoch": 1.9, "grad_norm": 1.2662396430969238, "learning_rate": 1.2000744830476357e-05, "loss": 0.2702, "step": 190 }, { "epoch": 2.0, "grad_norm": 7.965029239654541, "learning_rate": 1.090976802770578e-05, "loss": 0.2884, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.87, "eval_f1": 0.7815126050420168, "eval_loss": 0.3038490414619446, "eval_precision": 0.808695652173913, "eval_recall": 0.7560975609756098, "eval_runtime": 1.5371, "eval_samples_per_second": 260.227, "eval_steps_per_second": 16.264, "step": 200 }, { "epoch": 2.1, "grad_norm": 6.486904144287109, "learning_rate": 9.818791224935202e-06, "loss": 0.1707, "step": 210 }, { "epoch": 2.2, "grad_norm": 1.786522626876831, "learning_rate": 8.727814422164624e-06, "loss": 0.2129, "step": 220 }, { "epoch": 2.3, "grad_norm": 8.180079460144043, "learning_rate": 7.636837619394046e-06, "loss": 0.1224, "step": 230 }, { "epoch": 2.4, "grad_norm": 9.521204948425293, "learning_rate": 6.545860816623468e-06, "loss": 0.1735, "step": 240 }, { "epoch": 2.5, "grad_norm": 4.741390705108643, "learning_rate": 5.45488401385289e-06, "loss": 0.1549, "step": 250 }, { "epoch": 2.6, "grad_norm": 2.954021453857422, "learning_rate": 4.363907211082312e-06, "loss": 0.1712, "step": 260 }, { "epoch": 2.7, "grad_norm": 14.713294982910156, "learning_rate": 3.272930408311734e-06, "loss": 0.2493, "step": 270 }, { "epoch": 2.8, "grad_norm": 0.9680449366569519, "learning_rate": 2.181953605541156e-06, "loss": 0.1804, "step": 280 }, { "epoch": 2.9, "grad_norm": 14.325002670288086, "learning_rate": 1.090976802770578e-06, "loss": 0.2148, "step": 290 }, { "epoch": 3.0, "grad_norm": 1.2194037437438965, "learning_rate": 0.0, "loss": 0.1031, "step": 300 }, { "epoch": 3.0, "eval_accuracy": 0.8825, "eval_f1": 0.8127490039840638, "eval_loss": 0.3291611075401306, "eval_precision": 0.796875, "eval_recall": 0.8292682926829268, "eval_runtime": 1.5817, "eval_samples_per_second": 252.886, "eval_steps_per_second": 15.805, "step": 300 } ], "logging_steps": 10, "max_steps": 300, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 635446111352832.0, "train_batch_size": 16, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 3.272930408311734e-05, "metric": "eval/loss", "num_train_epochs": 3, "per_device_train_batch_size": 16, "seed": 40 } }