{ "best_metric": 0.19515299797058105, "best_model_checkpoint": "autotrain-rtvyh-y5ben/checkpoint-1146", "epoch": 3.0, "eval_steps": 500, "global_step": 1146, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06544502617801047, "grad_norm": 27.387327194213867, "learning_rate": 9.130434782608697e-06, "loss": 1.9398, "step": 25 }, { "epoch": 0.13089005235602094, "grad_norm": 22.39668846130371, "learning_rate": 2e-05, "loss": 1.7177, "step": 50 }, { "epoch": 0.19633507853403143, "grad_norm": 17.08791160583496, "learning_rate": 3.086956521739131e-05, "loss": 1.3264, "step": 75 }, { "epoch": 0.2617801047120419, "grad_norm": 14.287014961242676, "learning_rate": 4.1739130434782605e-05, "loss": 0.9999, "step": 100 }, { "epoch": 0.32722513089005234, "grad_norm": 5.287769794464111, "learning_rate": 4.97090203685742e-05, "loss": 0.7594, "step": 125 }, { "epoch": 0.39267015706806285, "grad_norm": 37.07988739013672, "learning_rate": 4.849660523763337e-05, "loss": 0.5061, "step": 150 }, { "epoch": 0.4581151832460733, "grad_norm": 2.10866641998291, "learning_rate": 4.728419010669253e-05, "loss": 0.4617, "step": 175 }, { "epoch": 0.5235602094240838, "grad_norm": 37.698177337646484, "learning_rate": 4.6071774975751696e-05, "loss": 0.3837, "step": 200 }, { "epoch": 0.5890052356020943, "grad_norm": 20.22072410583496, "learning_rate": 4.485935984481087e-05, "loss": 0.4478, "step": 225 }, { "epoch": 0.6544502617801047, "grad_norm": 37.976768493652344, "learning_rate": 4.364694471387003e-05, "loss": 0.4318, "step": 250 }, { "epoch": 0.7198952879581152, "grad_norm": 25.226699829101562, "learning_rate": 4.2434529582929193e-05, "loss": 0.3503, "step": 275 }, { "epoch": 0.7853403141361257, "grad_norm": 38.52054977416992, "learning_rate": 4.1270611057225994e-05, "loss": 0.4691, "step": 300 }, { "epoch": 0.8507853403141361, "grad_norm": 36.57845687866211, "learning_rate": 4.005819592628517e-05, "loss": 0.3103, "step": 325 }, { "epoch": 0.9162303664921466, "grad_norm": 21.468103408813477, "learning_rate": 3.8845780795344326e-05, "loss": 0.4161, "step": 350 }, { "epoch": 0.981675392670157, "grad_norm": 1.5287563800811768, "learning_rate": 3.763336566440349e-05, "loss": 0.2762, "step": 375 }, { "epoch": 1.0, "eval_accuracy": 0.912303664921466, "eval_f1_macro": 0.8881661676824378, "eval_f1_micro": 0.912303664921466, "eval_f1_weighted": 0.9139427487256797, "eval_loss": 0.24220755696296692, "eval_precision_macro": 0.878941728706775, "eval_precision_micro": 0.912303664921466, "eval_precision_weighted": 0.9193888936099365, "eval_recall_macro": 0.903916491565807, "eval_recall_micro": 0.912303664921466, "eval_recall_weighted": 0.912303664921466, "eval_runtime": 18.3017, "eval_samples_per_second": 166.979, "eval_steps_per_second": 10.436, "step": 382 }, { "epoch": 1.0471204188481675, "grad_norm": 82.7063980102539, "learning_rate": 3.6420950533462664e-05, "loss": 0.4792, "step": 400 }, { "epoch": 1.112565445026178, "grad_norm": 1.9939672946929932, "learning_rate": 3.520853540252182e-05, "loss": 0.3641, "step": 425 }, { "epoch": 1.1780104712041886, "grad_norm": 2.1117889881134033, "learning_rate": 3.399612027158099e-05, "loss": 0.3835, "step": 450 }, { "epoch": 1.243455497382199, "grad_norm": 12.421843528747559, "learning_rate": 3.278370514064016e-05, "loss": 0.3868, "step": 475 }, { "epoch": 1.3089005235602094, "grad_norm": 4.019728183746338, "learning_rate": 3.157129000969932e-05, "loss": 0.3204, "step": 500 }, { "epoch": 1.3743455497382198, "grad_norm": 23.721004486083984, "learning_rate": 3.0358874878758486e-05, "loss": 0.2277, "step": 525 }, { "epoch": 1.4397905759162304, "grad_norm": 37.32318878173828, "learning_rate": 2.9146459747817655e-05, "loss": 0.4918, "step": 550 }, { "epoch": 1.5052356020942408, "grad_norm": 0.5763813257217407, "learning_rate": 2.793404461687682e-05, "loss": 0.2702, "step": 575 }, { "epoch": 1.5706806282722514, "grad_norm": 25.39264488220215, "learning_rate": 2.6721629485935983e-05, "loss": 0.2803, "step": 600 }, { "epoch": 1.6361256544502618, "grad_norm": 48.61098861694336, "learning_rate": 2.5509214354995155e-05, "loss": 0.3219, "step": 625 }, { "epoch": 1.7015706806282722, "grad_norm": 19.207382202148438, "learning_rate": 2.4296799224054317e-05, "loss": 0.4093, "step": 650 }, { "epoch": 1.7670157068062826, "grad_norm": 19.071319580078125, "learning_rate": 2.3084384093113483e-05, "loss": 0.3342, "step": 675 }, { "epoch": 1.8324607329842932, "grad_norm": 24.700105667114258, "learning_rate": 2.187196896217265e-05, "loss": 0.3489, "step": 700 }, { "epoch": 1.8979057591623036, "grad_norm": 25.030014038085938, "learning_rate": 2.0659553831231815e-05, "loss": 0.3604, "step": 725 }, { "epoch": 1.9633507853403143, "grad_norm": 51.47434616088867, "learning_rate": 1.944713870029098e-05, "loss": 0.2948, "step": 750 }, { "epoch": 2.0, "eval_accuracy": 0.9195026178010471, "eval_f1_macro": 0.8825867245438248, "eval_f1_micro": 0.9195026178010471, "eval_f1_weighted": 0.9191139958840788, "eval_loss": 0.22944672405719757, "eval_precision_macro": 0.8833301888913111, "eval_precision_micro": 0.9195026178010471, "eval_precision_weighted": 0.9239632346357177, "eval_recall_macro": 0.8933703672169516, "eval_recall_micro": 0.9195026178010471, "eval_recall_weighted": 0.9195026178010471, "eval_runtime": 18.2799, "eval_samples_per_second": 167.178, "eval_steps_per_second": 10.449, "step": 764 }, { "epoch": 2.0287958115183247, "grad_norm": 13.710552215576172, "learning_rate": 1.8234723569350146e-05, "loss": 0.2163, "step": 775 }, { "epoch": 2.094240837696335, "grad_norm": 14.117116928100586, "learning_rate": 1.702230843840931e-05, "loss": 0.286, "step": 800 }, { "epoch": 2.1596858638743455, "grad_norm": 3.677631378173828, "learning_rate": 1.5809893307468477e-05, "loss": 0.421, "step": 825 }, { "epoch": 2.225130890052356, "grad_norm": 19.017574310302734, "learning_rate": 1.4597478176527643e-05, "loss": 0.3262, "step": 850 }, { "epoch": 2.2905759162303667, "grad_norm": 4.1396331787109375, "learning_rate": 1.338506304558681e-05, "loss": 0.3032, "step": 875 }, { "epoch": 2.356020942408377, "grad_norm": 14.411179542541504, "learning_rate": 1.2172647914645975e-05, "loss": 0.3508, "step": 900 }, { "epoch": 2.4214659685863875, "grad_norm": 3.900756597518921, "learning_rate": 1.096023278370514e-05, "loss": 0.4431, "step": 925 }, { "epoch": 2.486910994764398, "grad_norm": 3.200969696044922, "learning_rate": 9.747817652764308e-06, "loss": 0.2815, "step": 950 }, { "epoch": 2.5523560209424083, "grad_norm": 17.05501365661621, "learning_rate": 8.535402521823473e-06, "loss": 0.3593, "step": 975 }, { "epoch": 2.6178010471204187, "grad_norm": 15.708697319030762, "learning_rate": 7.322987390882638e-06, "loss": 0.2373, "step": 1000 }, { "epoch": 2.683246073298429, "grad_norm": 30.304662704467773, "learning_rate": 6.159068865179437e-06, "loss": 0.3447, "step": 1025 }, { "epoch": 2.7486910994764395, "grad_norm": 18.696701049804688, "learning_rate": 4.946653734238604e-06, "loss": 0.225, "step": 1050 }, { "epoch": 2.8141361256544504, "grad_norm": 8.534065246582031, "learning_rate": 3.734238603297769e-06, "loss": 0.2218, "step": 1075 }, { "epoch": 2.8795811518324608, "grad_norm": 2.1658194065093994, "learning_rate": 2.521823472356935e-06, "loss": 0.2223, "step": 1100 }, { "epoch": 2.945026178010471, "grad_norm": 61.290462493896484, "learning_rate": 1.309408341416101e-06, "loss": 0.3373, "step": 1125 }, { "epoch": 3.0, "eval_accuracy": 0.931282722513089, "eval_f1_macro": 0.9028776359481251, "eval_f1_micro": 0.931282722513089, "eval_f1_weighted": 0.9312850320245419, "eval_loss": 0.19515299797058105, "eval_precision_macro": 0.8955979809260896, "eval_precision_micro": 0.931282722513089, "eval_precision_weighted": 0.9330235887739936, "eval_recall_macro": 0.9159046117460236, "eval_recall_micro": 0.931282722513089, "eval_recall_weighted": 0.931282722513089, "eval_runtime": 18.4024, "eval_samples_per_second": 166.066, "eval_steps_per_second": 10.379, "step": 1146 } ], "logging_steps": 25, "max_steps": 1146, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3077959759396864e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }