{ "best_metric": 0.18285594880580902, "best_model_checkpoint": "autotrain-6doma-5m8vf/checkpoint-1107", "epoch": 3.0, "eval_steps": 500, "global_step": 1107, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06775067750677506, "grad_norm": 16.566085815429688, "learning_rate": 9.90990990990991e-06, "loss": 1.5667, "step": 25 }, { "epoch": 0.13550135501355012, "grad_norm": 25.27834129333496, "learning_rate": 2.117117117117117e-05, "loss": 1.2958, "step": 50 }, { "epoch": 0.2032520325203252, "grad_norm": 14.882951736450195, "learning_rate": 3.2432432432432436e-05, "loss": 1.0021, "step": 75 }, { "epoch": 0.27100271002710025, "grad_norm": 12.05569839477539, "learning_rate": 4.369369369369369e-05, "loss": 0.7635, "step": 100 }, { "epoch": 0.33875338753387535, "grad_norm": 15.701233863830566, "learning_rate": 4.944779116465864e-05, "loss": 0.572, "step": 125 }, { "epoch": 0.4065040650406504, "grad_norm": 45.54197692871094, "learning_rate": 4.8242971887550205e-05, "loss": 0.4778, "step": 150 }, { "epoch": 0.4742547425474255, "grad_norm": 20.10284996032715, "learning_rate": 4.698795180722892e-05, "loss": 0.4213, "step": 175 }, { "epoch": 0.5420054200542005, "grad_norm": 52.795291900634766, "learning_rate": 4.573293172690764e-05, "loss": 0.4171, "step": 200 }, { "epoch": 0.6097560975609756, "grad_norm": 32.24135208129883, "learning_rate": 4.447791164658635e-05, "loss": 0.4031, "step": 225 }, { "epoch": 0.6775067750677507, "grad_norm": 48.17521286010742, "learning_rate": 4.3222891566265064e-05, "loss": 0.3206, "step": 250 }, { "epoch": 0.7452574525745257, "grad_norm": 27.5257511138916, "learning_rate": 4.196787148594378e-05, "loss": 0.3616, "step": 275 }, { "epoch": 0.8130081300813008, "grad_norm": 15.912370681762695, "learning_rate": 4.071285140562249e-05, "loss": 0.508, "step": 300 }, { "epoch": 0.8807588075880759, "grad_norm": 12.763589859008789, "learning_rate": 3.9508032128514064e-05, "loss": 0.3685, "step": 325 }, { "epoch": 0.948509485094851, "grad_norm": 20.36044692993164, "learning_rate": 3.8253012048192774e-05, "loss": 0.393, "step": 350 }, { "epoch": 1.0, "eval_accuracy": 0.9189005768578216, "eval_f1_macro": 0.8913236764060113, "eval_f1_micro": 0.9189005768578216, "eval_f1_weighted": 0.9195807716070247, "eval_loss": 0.2322985827922821, "eval_precision_macro": 0.8992556342366311, "eval_precision_micro": 0.9189005768578216, "eval_precision_weighted": 0.9235420876186199, "eval_recall_macro": 0.8887207219589304, "eval_recall_micro": 0.9189005768578216, "eval_recall_weighted": 0.9189005768578216, "eval_runtime": 19.8492, "eval_samples_per_second": 148.469, "eval_steps_per_second": 9.32, "step": 369 }, { "epoch": 1.016260162601626, "grad_norm": 14.446638107299805, "learning_rate": 3.699799196787149e-05, "loss": 0.2934, "step": 375 }, { "epoch": 1.084010840108401, "grad_norm": 29.589651107788086, "learning_rate": 3.57429718875502e-05, "loss": 0.3801, "step": 400 }, { "epoch": 1.151761517615176, "grad_norm": 23.76276397705078, "learning_rate": 3.4487951807228916e-05, "loss": 0.2276, "step": 425 }, { "epoch": 1.2195121951219512, "grad_norm": 33.06072998046875, "learning_rate": 3.323293172690763e-05, "loss": 0.2623, "step": 450 }, { "epoch": 1.2872628726287263, "grad_norm": 31.562694549560547, "learning_rate": 3.197791164658634e-05, "loss": 0.3324, "step": 475 }, { "epoch": 1.3550135501355014, "grad_norm": 25.050046920776367, "learning_rate": 3.072289156626506e-05, "loss": 0.3613, "step": 500 }, { "epoch": 1.4227642276422765, "grad_norm": 5.65738582611084, "learning_rate": 2.9467871485943778e-05, "loss": 0.3689, "step": 525 }, { "epoch": 1.4905149051490514, "grad_norm": 30.50360870361328, "learning_rate": 2.821285140562249e-05, "loss": 0.2128, "step": 550 }, { "epoch": 1.5582655826558267, "grad_norm": 31.306838989257812, "learning_rate": 2.6957831325301207e-05, "loss": 0.3329, "step": 575 }, { "epoch": 1.6260162601626016, "grad_norm": 5.569540023803711, "learning_rate": 2.570281124497992e-05, "loss": 0.3934, "step": 600 }, { "epoch": 1.6937669376693767, "grad_norm": 79.83793640136719, "learning_rate": 2.4447791164658633e-05, "loss": 0.3329, "step": 625 }, { "epoch": 1.7615176151761518, "grad_norm": 11.711432456970215, "learning_rate": 2.319277108433735e-05, "loss": 0.3065, "step": 650 }, { "epoch": 1.8292682926829267, "grad_norm": 27.71021842956543, "learning_rate": 2.1937751004016066e-05, "loss": 0.3361, "step": 675 }, { "epoch": 1.897018970189702, "grad_norm": 24.346481323242188, "learning_rate": 2.068273092369478e-05, "loss": 0.3967, "step": 700 }, { "epoch": 1.9647696476964769, "grad_norm": 7.5306549072265625, "learning_rate": 1.9427710843373495e-05, "loss": 0.3304, "step": 725 }, { "epoch": 2.0, "eval_accuracy": 0.9284017645062775, "eval_f1_macro": 0.903605865288441, "eval_f1_micro": 0.9284017645062775, "eval_f1_weighted": 0.9277100982185731, "eval_loss": 0.2046061009168625, "eval_precision_macro": 0.9095877174004062, "eval_precision_micro": 0.9284017645062775, "eval_precision_weighted": 0.9281331331487362, "eval_recall_macro": 0.8989112570392443, "eval_recall_micro": 0.9284017645062775, "eval_recall_weighted": 0.9284017645062775, "eval_runtime": 19.9432, "eval_samples_per_second": 147.769, "eval_steps_per_second": 9.276, "step": 738 }, { "epoch": 2.032520325203252, "grad_norm": 28.1395206451416, "learning_rate": 1.822289156626506e-05, "loss": 0.332, "step": 750 }, { "epoch": 2.100271002710027, "grad_norm": 7.682183265686035, "learning_rate": 1.6967871485943776e-05, "loss": 0.2995, "step": 775 }, { "epoch": 2.168021680216802, "grad_norm": 23.640390396118164, "learning_rate": 1.5712851405622492e-05, "loss": 0.3089, "step": 800 }, { "epoch": 2.2357723577235773, "grad_norm": 3.6244945526123047, "learning_rate": 1.4457831325301205e-05, "loss": 0.2557, "step": 825 }, { "epoch": 2.303523035230352, "grad_norm": 109.68293762207031, "learning_rate": 1.3202811244979921e-05, "loss": 0.3734, "step": 850 }, { "epoch": 2.3712737127371275, "grad_norm": 28.19609832763672, "learning_rate": 1.1947791164658636e-05, "loss": 0.3144, "step": 875 }, { "epoch": 2.4390243902439024, "grad_norm": 17.588850021362305, "learning_rate": 1.069277108433735e-05, "loss": 0.4061, "step": 900 }, { "epoch": 2.5067750677506773, "grad_norm": 21.384654998779297, "learning_rate": 9.437751004016063e-06, "loss": 0.2626, "step": 925 }, { "epoch": 2.5745257452574526, "grad_norm": 0.35269397497177124, "learning_rate": 8.18273092369478e-06, "loss": 0.2822, "step": 950 }, { "epoch": 2.642276422764228, "grad_norm": 21.37306785583496, "learning_rate": 6.927710843373494e-06, "loss": 0.4436, "step": 975 }, { "epoch": 2.710027100271003, "grad_norm": 17.97796630859375, "learning_rate": 5.672690763052209e-06, "loss": 0.2517, "step": 1000 }, { "epoch": 2.7777777777777777, "grad_norm": 9.19117259979248, "learning_rate": 4.417670682730924e-06, "loss": 0.2395, "step": 1025 }, { "epoch": 2.845528455284553, "grad_norm": 64.22978210449219, "learning_rate": 3.1626506024096387e-06, "loss": 0.3387, "step": 1050 }, { "epoch": 2.913279132791328, "grad_norm": 1.4875394105911255, "learning_rate": 1.9076305220883537e-06, "loss": 0.2559, "step": 1075 }, { "epoch": 2.9810298102981028, "grad_norm": 0.5281310677528381, "learning_rate": 6.526104417670682e-07, "loss": 0.3605, "step": 1100 }, { "epoch": 3.0, "eval_accuracy": 0.9365456396335257, "eval_f1_macro": 0.9148614413559308, "eval_f1_micro": 0.9365456396335257, "eval_f1_weighted": 0.9364564915178187, "eval_loss": 0.18285594880580902, "eval_precision_macro": 0.9159613318061458, "eval_precision_micro": 0.9365456396335257, "eval_precision_weighted": 0.9365041505044936, "eval_recall_macro": 0.9139276800740521, "eval_recall_micro": 0.9365456396335257, "eval_recall_weighted": 0.9365456396335257, "eval_runtime": 19.7723, "eval_samples_per_second": 149.047, "eval_steps_per_second": 9.357, "step": 1107 } ], "logging_steps": 25, "max_steps": 1107, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2.225421168402862e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }