{ "best_metric": 0.7954545454545454, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-vdjsiuit/checkpoint-600", "epoch": 3.0, "eval_steps": 500, "global_step": 600, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 3.0975422859191895, "learning_rate": 5.939561377324845e-05, "loss": 0.5513, "step": 10 }, { "epoch": 0.1, "grad_norm": 8.386995315551758, "learning_rate": 5.83889084550578e-05, "loss": 0.5725, "step": 20 }, { "epoch": 0.15, "grad_norm": 2.8792715072631836, "learning_rate": 5.738220313686715e-05, "loss": 0.5265, "step": 30 }, { "epoch": 0.2, "grad_norm": 4.608365535736084, "learning_rate": 5.63754978186765e-05, "loss": 0.5106, "step": 40 }, { "epoch": 0.25, "grad_norm": 3.161665201187134, "learning_rate": 5.5368792500485845e-05, "loss": 0.4356, "step": 50 }, { "epoch": 0.3, "grad_norm": 2.6395349502563477, "learning_rate": 5.4362087182295196e-05, "loss": 0.5053, "step": 60 }, { "epoch": 0.35, "grad_norm": 2.152228832244873, "learning_rate": 5.335538186410454e-05, "loss": 0.3511, "step": 70 }, { "epoch": 0.4, "grad_norm": 22.35204315185547, "learning_rate": 5.234867654591389e-05, "loss": 0.5189, "step": 80 }, { "epoch": 0.45, "grad_norm": 3.6376490592956543, "learning_rate": 5.134197122772324e-05, "loss": 0.3371, "step": 90 }, { "epoch": 0.5, "grad_norm": 3.2198097705841064, "learning_rate": 5.033526590953259e-05, "loss": 0.4037, "step": 100 }, { "epoch": 0.55, "grad_norm": 7.86613130569458, "learning_rate": 4.9328560591341935e-05, "loss": 0.4833, "step": 110 }, { "epoch": 0.6, "grad_norm": 3.6074087619781494, "learning_rate": 4.8321855273151286e-05, "loss": 0.2857, "step": 120 }, { "epoch": 0.65, "grad_norm": 6.2979230880737305, "learning_rate": 4.731514995496063e-05, "loss": 0.4716, "step": 130 }, { "epoch": 0.7, "grad_norm": 8.780591011047363, "learning_rate": 4.630844463676999e-05, "loss": 0.4018, "step": 140 }, { "epoch": 0.75, "grad_norm": 21.96924591064453, "learning_rate": 4.530173931857933e-05, "loss": 0.412, "step": 150 }, { "epoch": 0.8, "grad_norm": 5.481058597564697, "learning_rate": 4.4295034000388675e-05, "loss": 0.5499, "step": 160 }, { "epoch": 0.85, "grad_norm": 6.030953884124756, "learning_rate": 4.3288328682198025e-05, "loss": 0.4422, "step": 170 }, { "epoch": 0.9, "grad_norm": 1.1474953889846802, "learning_rate": 4.228162336400737e-05, "loss": 0.2512, "step": 180 }, { "epoch": 0.95, "grad_norm": 13.46764087677002, "learning_rate": 4.127491804581672e-05, "loss": 0.4896, "step": 190 }, { "epoch": 1.0, "grad_norm": 6.764571666717529, "learning_rate": 4.026821272762607e-05, "loss": 0.3959, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.8775, "eval_f1": 0.776255707762557, "eval_loss": 0.42286473512649536, "eval_precision": 0.8854166666666666, "eval_recall": 0.6910569105691057, "eval_runtime": 1.5336, "eval_samples_per_second": 260.823, "eval_steps_per_second": 16.301, "step": 200 }, { "epoch": 1.05, "grad_norm": 13.774749755859375, "learning_rate": 3.926150740943542e-05, "loss": 0.2466, "step": 210 }, { "epoch": 1.1, "grad_norm": 0.9035859704017639, "learning_rate": 3.8254802091244765e-05, "loss": 0.258, "step": 220 }, { "epoch": 1.15, "grad_norm": 10.26409912109375, "learning_rate": 3.7248096773054115e-05, "loss": 0.2796, "step": 230 }, { "epoch": 1.2, "grad_norm": 16.81245994567871, "learning_rate": 3.624139145486346e-05, "loss": 0.2748, "step": 240 }, { "epoch": 1.25, "grad_norm": 0.9752582311630249, "learning_rate": 3.5234686136672817e-05, "loss": 0.3794, "step": 250 }, { "epoch": 1.3, "grad_norm": 0.8559211492538452, "learning_rate": 3.422798081848216e-05, "loss": 0.3013, "step": 260 }, { "epoch": 1.35, "grad_norm": 0.20962999761104584, "learning_rate": 3.322127550029151e-05, "loss": 0.3029, "step": 270 }, { "epoch": 1.4, "grad_norm": 9.075166702270508, "learning_rate": 3.2214570182100855e-05, "loss": 0.3187, "step": 280 }, { "epoch": 1.45, "grad_norm": 0.8023856282234192, "learning_rate": 3.1207864863910205e-05, "loss": 0.4383, "step": 290 }, { "epoch": 1.5, "grad_norm": 8.567304611206055, "learning_rate": 3.0201159545719553e-05, "loss": 0.1829, "step": 300 }, { "epoch": 1.55, "grad_norm": 8.392529487609863, "learning_rate": 2.91944542275289e-05, "loss": 0.3928, "step": 310 }, { "epoch": 1.6, "grad_norm": 0.7387467622756958, "learning_rate": 2.818774890933825e-05, "loss": 0.1683, "step": 320 }, { "epoch": 1.65, "grad_norm": 8.625945091247559, "learning_rate": 2.7181043591147598e-05, "loss": 0.2039, "step": 330 }, { "epoch": 1.7, "grad_norm": 0.30814623832702637, "learning_rate": 2.6174338272956945e-05, "loss": 0.1959, "step": 340 }, { "epoch": 1.75, "grad_norm": 7.88859748840332, "learning_rate": 2.5167632954766296e-05, "loss": 0.4723, "step": 350 }, { "epoch": 1.8, "grad_norm": 0.31379058957099915, "learning_rate": 2.4160927636575643e-05, "loss": 0.3442, "step": 360 }, { "epoch": 1.85, "grad_norm": 9.962308883666992, "learning_rate": 2.3154222318384993e-05, "loss": 0.4158, "step": 370 }, { "epoch": 1.9, "grad_norm": 3.3801684379577637, "learning_rate": 2.2147517000194337e-05, "loss": 0.2539, "step": 380 }, { "epoch": 1.95, "grad_norm": 2.923280954360962, "learning_rate": 2.1140811682003685e-05, "loss": 0.1824, "step": 390 }, { "epoch": 2.0, "grad_norm": 4.60498046875, "learning_rate": 2.0134106363813035e-05, "loss": 0.4522, "step": 400 }, { "epoch": 2.0, "eval_accuracy": 0.8725, "eval_f1": 0.7792207792207793, "eval_loss": 0.318487286567688, "eval_precision": 0.8333333333333334, "eval_recall": 0.7317073170731707, "eval_runtime": 1.5288, "eval_samples_per_second": 261.647, "eval_steps_per_second": 16.353, "step": 400 }, { "epoch": 2.05, "grad_norm": 0.41749486327171326, "learning_rate": 1.9127401045622382e-05, "loss": 0.1156, "step": 410 }, { "epoch": 2.1, "grad_norm": 0.3070080876350403, "learning_rate": 1.812069572743173e-05, "loss": 0.0349, "step": 420 }, { "epoch": 2.15, "grad_norm": 2.974935293197632, "learning_rate": 1.711399040924108e-05, "loss": 0.133, "step": 430 }, { "epoch": 2.2, "grad_norm": 0.4423005282878876, "learning_rate": 1.6107285091050427e-05, "loss": 0.0806, "step": 440 }, { "epoch": 2.25, "grad_norm": 0.2689347267150879, "learning_rate": 1.5100579772859776e-05, "loss": 0.1243, "step": 450 }, { "epoch": 2.3, "grad_norm": 0.1341865360736847, "learning_rate": 1.4093874454669125e-05, "loss": 0.1945, "step": 460 }, { "epoch": 2.35, "grad_norm": 0.14216558635234833, "learning_rate": 1.3087169136478472e-05, "loss": 0.2593, "step": 470 }, { "epoch": 2.4, "grad_norm": 5.250720977783203, "learning_rate": 1.2080463818287821e-05, "loss": 0.3948, "step": 480 }, { "epoch": 2.45, "grad_norm": 18.877355575561523, "learning_rate": 1.1073758500097169e-05, "loss": 0.2856, "step": 490 }, { "epoch": 2.5, "grad_norm": 0.6348363161087036, "learning_rate": 1.0067053181906518e-05, "loss": 0.4102, "step": 500 }, { "epoch": 2.55, "grad_norm": 0.16438539326190948, "learning_rate": 9.060347863715865e-06, "loss": 0.2346, "step": 510 }, { "epoch": 2.6, "grad_norm": 0.23815393447875977, "learning_rate": 8.053642545525214e-06, "loss": 0.1046, "step": 520 }, { "epoch": 2.65, "grad_norm": 25.9287166595459, "learning_rate": 7.046937227334563e-06, "loss": 0.0799, "step": 530 }, { "epoch": 2.7, "grad_norm": 0.2302125245332718, "learning_rate": 6.040231909143911e-06, "loss": 0.1963, "step": 540 }, { "epoch": 2.75, "grad_norm": 0.14851589500904083, "learning_rate": 5.033526590953259e-06, "loss": 0.1668, "step": 550 }, { "epoch": 2.8, "grad_norm": 13.211697578430176, "learning_rate": 4.026821272762607e-06, "loss": 0.3321, "step": 560 }, { "epoch": 2.85, "grad_norm": 0.20074860751628876, "learning_rate": 3.0201159545719554e-06, "loss": 0.1587, "step": 570 }, { "epoch": 2.9, "grad_norm": 1.0804275274276733, "learning_rate": 2.0134106363813034e-06, "loss": 0.1044, "step": 580 }, { "epoch": 2.95, "grad_norm": 0.2024952471256256, "learning_rate": 1.0067053181906517e-06, "loss": 0.3374, "step": 590 }, { "epoch": 3.0, "grad_norm": 0.07060264050960541, "learning_rate": 0.0, "loss": 0.0739, "step": 600 }, { "epoch": 3.0, "eval_accuracy": 0.865, "eval_f1": 0.7954545454545454, "eval_loss": 0.45124685764312744, "eval_precision": 0.7446808510638298, "eval_recall": 0.8536585365853658, "eval_runtime": 1.5828, "eval_samples_per_second": 252.719, "eval_steps_per_second": 15.795, "step": 600 } ], "logging_steps": 10, "max_steps": 600, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 635446111352832.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 6.0402319091439105e-05, "metric": "eval/loss", "num_train_epochs": 3, "per_device_train_batch_size": 8, "seed": 33 } }