{ "best_metric": 0.8816452026367188, "best_model_checkpoint": "autotrain-df80/checkpoint-35", "epoch": 5.0, "eval_steps": 500, "global_step": 35, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14285714285714285, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 3.1053, "step": 1 }, { "epoch": 0.2857142857142857, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 2.972, "step": 2 }, { "epoch": 0.42857142857142855, "grad_norm": Infinity, "learning_rate": 0.0, "loss": 3.6741, "step": 3 }, { "epoch": 0.5714285714285714, "grad_norm": 70.11737060546875, "learning_rate": 1.25e-05, "loss": 2.1635, "step": 4 }, { "epoch": 0.7142857142857143, "grad_norm": 67.73457336425781, "learning_rate": 2.5e-05, "loss": 2.6355, "step": 5 }, { "epoch": 0.8571428571428571, "grad_norm": 71.76345825195312, "learning_rate": 3.7500000000000003e-05, "loss": 3.7052, "step": 6 }, { "epoch": 1.0, "grad_norm": 21.13036346435547, "learning_rate": 5e-05, "loss": 1.9736, "step": 7 }, { "epoch": 1.0, "eval_accuracy": 0.5416666666666666, "eval_f1_macro": 0.23423423423423426, "eval_f1_micro": 0.5416666666666666, "eval_f1_weighted": 0.40990990990990994, "eval_loss": 2.0108728408813477, "eval_precision_macro": 0.18840579710144925, "eval_precision_micro": 0.5416666666666666, "eval_precision_weighted": 0.3297101449275362, "eval_recall_macro": 0.30952380952380953, "eval_recall_micro": 0.5416666666666666, "eval_recall_weighted": 0.5416666666666666, "eval_runtime": 0.0836, "eval_samples_per_second": 287.125, "eval_steps_per_second": 23.927, "step": 7 }, { "epoch": 1.1428571428571428, "grad_norm": 37.35652542114258, "learning_rate": 4.8387096774193554e-05, "loss": 1.4028, "step": 8 }, { "epoch": 1.2857142857142856, "grad_norm": 72.43930053710938, "learning_rate": 4.67741935483871e-05, "loss": 0.5302, "step": 9 }, { "epoch": 1.4285714285714286, "grad_norm": 10.876357078552246, "learning_rate": 4.516129032258064e-05, "loss": 0.8016, "step": 10 }, { "epoch": 1.5714285714285714, "grad_norm": 26.06935691833496, "learning_rate": 4.3548387096774194e-05, "loss": 2.2303, "step": 11 }, { "epoch": 1.7142857142857144, "grad_norm": Infinity, "learning_rate": 4.3548387096774194e-05, "loss": 1.7161, "step": 12 }, { "epoch": 1.8571428571428572, "grad_norm": 21.18684959411621, "learning_rate": 4.1935483870967746e-05, "loss": 1.4677, "step": 13 }, { "epoch": 2.0, "grad_norm": 13.552129745483398, "learning_rate": 4.032258064516129e-05, "loss": 0.6474, "step": 14 }, { "epoch": 2.0, "eval_accuracy": 0.5416666666666666, "eval_f1_macro": 0.3638583638583639, "eval_f1_micro": 0.5416666666666666, "eval_f1_weighted": 0.5375457875457875, "eval_loss": 1.3520864248275757, "eval_precision_macro": 0.3611111111111111, "eval_precision_micro": 0.5416666666666666, "eval_precision_weighted": 0.5451388888888888, "eval_recall_macro": 0.37566137566137564, "eval_recall_micro": 0.5416666666666666, "eval_recall_weighted": 0.5416666666666666, "eval_runtime": 0.0825, "eval_samples_per_second": 291.009, "eval_steps_per_second": 24.251, "step": 14 }, { "epoch": 2.142857142857143, "grad_norm": 11.4788236618042, "learning_rate": 3.870967741935484e-05, "loss": 0.5563, "step": 15 }, { "epoch": 2.2857142857142856, "grad_norm": 36.51395034790039, "learning_rate": 3.7096774193548386e-05, "loss": 0.4919, "step": 16 }, { "epoch": 2.4285714285714284, "grad_norm": 64.92756652832031, "learning_rate": 3.548387096774194e-05, "loss": 1.3394, "step": 17 }, { "epoch": 2.571428571428571, "grad_norm": 58.25400924682617, "learning_rate": 3.387096774193548e-05, "loss": 1.069, "step": 18 }, { "epoch": 2.7142857142857144, "grad_norm": 33.65617370605469, "learning_rate": 3.2258064516129034e-05, "loss": 0.474, "step": 19 }, { "epoch": 2.857142857142857, "grad_norm": 24.85539436340332, "learning_rate": 3.0645161290322585e-05, "loss": 0.8503, "step": 20 }, { "epoch": 3.0, "grad_norm": 9.094024658203125, "learning_rate": 2.9032258064516133e-05, "loss": 0.8696, "step": 21 }, { "epoch": 3.0, "eval_accuracy": 0.5833333333333334, "eval_f1_macro": 0.3013468013468013, "eval_f1_micro": 0.5833333333333334, "eval_f1_weighted": 0.48947811447811446, "eval_loss": 0.96966552734375, "eval_precision_macro": 0.3636363636363636, "eval_precision_micro": 0.5833333333333334, "eval_precision_weighted": 0.5321969696969697, "eval_recall_macro": 0.34656084656084657, "eval_recall_micro": 0.5833333333333334, "eval_recall_weighted": 0.5833333333333334, "eval_runtime": 0.0817, "eval_samples_per_second": 293.685, "eval_steps_per_second": 24.474, "step": 21 }, { "epoch": 3.142857142857143, "grad_norm": 9.66805362701416, "learning_rate": 2.7419354838709678e-05, "loss": 0.52, "step": 22 }, { "epoch": 3.2857142857142856, "grad_norm": 9.8311767578125, "learning_rate": 2.5806451612903226e-05, "loss": 0.51, "step": 23 }, { "epoch": 3.4285714285714284, "grad_norm": 11.982104301452637, "learning_rate": 2.4193548387096777e-05, "loss": 0.4699, "step": 24 }, { "epoch": 3.571428571428571, "grad_norm": 14.817587852478027, "learning_rate": 2.258064516129032e-05, "loss": 1.1386, "step": 25 }, { "epoch": 3.7142857142857144, "grad_norm": 3.237607955932617, "learning_rate": 2.0967741935483873e-05, "loss": 0.3454, "step": 26 }, { "epoch": 3.857142857142857, "grad_norm": 26.063825607299805, "learning_rate": 1.935483870967742e-05, "loss": 0.918, "step": 27 }, { "epoch": 4.0, "grad_norm": 22.578527450561523, "learning_rate": 1.774193548387097e-05, "loss": 0.5343, "step": 28 }, { "epoch": 4.0, "eval_accuracy": 0.5833333333333334, "eval_f1_macro": 0.3013468013468013, "eval_f1_micro": 0.5833333333333334, "eval_f1_weighted": 0.48947811447811446, "eval_loss": 0.9342803955078125, "eval_precision_macro": 0.3636363636363636, "eval_precision_micro": 0.5833333333333334, "eval_precision_weighted": 0.5321969696969697, "eval_recall_macro": 0.34656084656084657, "eval_recall_micro": 0.5833333333333334, "eval_recall_weighted": 0.5833333333333334, "eval_runtime": 0.08, "eval_samples_per_second": 300.024, "eval_steps_per_second": 25.002, "step": 28 }, { "epoch": 4.142857142857143, "grad_norm": 7.312303066253662, "learning_rate": 1.6129032258064517e-05, "loss": 0.5345, "step": 29 }, { "epoch": 4.285714285714286, "grad_norm": 5.162611484527588, "learning_rate": 1.4516129032258066e-05, "loss": 0.3541, "step": 30 }, { "epoch": 4.428571428571429, "grad_norm": 6.131501197814941, "learning_rate": 1.2903225806451613e-05, "loss": 0.4523, "step": 31 }, { "epoch": 4.571428571428571, "grad_norm": 14.81253433227539, "learning_rate": 1.129032258064516e-05, "loss": 0.5316, "step": 32 }, { "epoch": 4.714285714285714, "grad_norm": 10.304619789123535, "learning_rate": 9.67741935483871e-06, "loss": 0.4244, "step": 33 }, { "epoch": 4.857142857142857, "grad_norm": 7.296845436096191, "learning_rate": 8.064516129032258e-06, "loss": 0.6376, "step": 34 }, { "epoch": 5.0, "grad_norm": 6.571065425872803, "learning_rate": 6.451612903225806e-06, "loss": 0.2192, "step": 35 }, { "epoch": 5.0, "eval_accuracy": 0.75, "eval_f1_macro": 0.5019607843137255, "eval_f1_micro": 0.75, "eval_f1_weighted": 0.7313725490196079, "eval_loss": 0.8816452026367188, "eval_precision_macro": 0.5, "eval_precision_micro": 0.75, "eval_precision_weighted": 0.71875, "eval_recall_macro": 0.5079365079365079, "eval_recall_micro": 0.75, "eval_recall_weighted": 0.75, "eval_runtime": 0.0927, "eval_samples_per_second": 258.881, "eval_steps_per_second": 21.573, "step": 35 } ], "logging_steps": 1, "max_steps": 35, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 18417939240960.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }