{ "best_metric": 1.0650867223739624, "best_model_checkpoint": "autotrain-byt8e-zygc3/checkpoint-1082", "epoch": 1.0, "eval_steps": 500, "global_step": 1082, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02310536044362292, "grad_norm": 3.5127339363098145, "learning_rate": 3.846153846153846e-05, "loss": 1.0872, "step": 25 }, { "epoch": 0.04621072088724584, "grad_norm": 6.642369270324707, "learning_rate": 7.692307692307693e-05, "loss": 1.1215, "step": 50 }, { "epoch": 0.06931608133086876, "grad_norm": 6.217954158782959, "learning_rate": 0.0001153846153846154, "loss": 1.0733, "step": 75 }, { "epoch": 0.09242144177449169, "grad_norm": 2.9232001304626465, "learning_rate": 0.00015384615384615385, "loss": 1.1347, "step": 100 }, { "epoch": 0.11552680221811461, "grad_norm": 3.7540340423583984, "learning_rate": 0.00019230769230769233, "loss": 1.3273, "step": 125 }, { "epoch": 0.13863216266173753, "grad_norm": 4.9391679763793945, "learning_rate": 0.0002307692307692308, "loss": 1.1507, "step": 150 }, { "epoch": 0.16173752310536044, "grad_norm": 1.5326274633407593, "learning_rate": 0.0002692307692307692, "loss": 1.1118, "step": 175 }, { "epoch": 0.18484288354898337, "grad_norm": 1.3186924457550049, "learning_rate": 0.0003076923076923077, "loss": 1.1426, "step": 200 }, { "epoch": 0.20794824399260628, "grad_norm": 3.582869052886963, "learning_rate": 0.00034615384615384613, "loss": 1.175, "step": 225 }, { "epoch": 0.23105360443622922, "grad_norm": 2.4438865184783936, "learning_rate": 0.00038461538461538467, "loss": 1.0483, "step": 250 }, { "epoch": 0.2541589648798521, "grad_norm": 6.818902492523193, "learning_rate": 0.0004230769230769231, "loss": 1.0661, "step": 275 }, { "epoch": 0.27726432532347506, "grad_norm": 1.4846845865249634, "learning_rate": 0.0004615384615384616, "loss": 1.1412, "step": 300 }, { "epoch": 0.300369685767098, "grad_norm": 1.0014742612838745, "learning_rate": 0.0005, "loss": 1.0968, "step": 325 }, { "epoch": 0.3234750462107209, "grad_norm": 0.8473600745201111, "learning_rate": 0.0004957206436152003, "loss": 1.1779, "step": 350 }, { "epoch": 0.3465804066543438, "grad_norm": 3.419077157974243, "learning_rate": 0.0004914412872304005, "loss": 1.1943, "step": 375 }, { "epoch": 0.36968576709796674, "grad_norm": 0.8151220083236694, "learning_rate": 0.00048716193084560086, "loss": 1.0639, "step": 400 }, { "epoch": 0.3927911275415896, "grad_norm": 1.3580329418182373, "learning_rate": 0.0004828825744608011, "loss": 1.2678, "step": 425 }, { "epoch": 0.41589648798521256, "grad_norm": 0.5541791319847107, "learning_rate": 0.0004786032180760014, "loss": 1.1612, "step": 450 }, { "epoch": 0.4390018484288355, "grad_norm": 1.164920449256897, "learning_rate": 0.0004743238616912017, "loss": 1.1855, "step": 475 }, { "epoch": 0.46210720887245843, "grad_norm": 4.518599987030029, "learning_rate": 0.0004700445053064019, "loss": 1.1385, "step": 500 }, { "epoch": 0.4852125693160813, "grad_norm": 0.9030762910842896, "learning_rate": 0.0004657651489216022, "loss": 1.1228, "step": 525 }, { "epoch": 0.5083179297597042, "grad_norm": 1.1489505767822266, "learning_rate": 0.00046148579253680244, "loss": 1.145, "step": 550 }, { "epoch": 0.5314232902033271, "grad_norm": 0.9008183479309082, "learning_rate": 0.00045720643615200274, "loss": 1.1385, "step": 575 }, { "epoch": 0.5545286506469501, "grad_norm": 0.9687894582748413, "learning_rate": 0.00045292707976720304, "loss": 1.1424, "step": 600 }, { "epoch": 0.577634011090573, "grad_norm": 0.43966954946517944, "learning_rate": 0.0004486477233824033, "loss": 1.1123, "step": 625 }, { "epoch": 0.600739371534196, "grad_norm": 0.9648946523666382, "learning_rate": 0.0004443683669976036, "loss": 0.9658, "step": 650 }, { "epoch": 0.6238447319778189, "grad_norm": 1.1458590030670166, "learning_rate": 0.00044008901061280383, "loss": 1.1779, "step": 675 }, { "epoch": 0.6469500924214417, "grad_norm": 1.6918909549713135, "learning_rate": 0.00043580965422800413, "loss": 1.0564, "step": 700 }, { "epoch": 0.6700554528650647, "grad_norm": 0.38883867859840393, "learning_rate": 0.00043153029784320443, "loss": 1.1518, "step": 725 }, { "epoch": 0.6931608133086876, "grad_norm": 1.109215497970581, "learning_rate": 0.0004272509414584047, "loss": 1.0493, "step": 750 }, { "epoch": 0.7162661737523105, "grad_norm": 4.412217140197754, "learning_rate": 0.000422971585073605, "loss": 1.0994, "step": 775 }, { "epoch": 0.7393715341959335, "grad_norm": 0.6967116594314575, "learning_rate": 0.00041869222868880517, "loss": 1.1258, "step": 800 }, { "epoch": 0.7624768946395564, "grad_norm": 1.0759273767471313, "learning_rate": 0.00041441287230400547, "loss": 1.0622, "step": 825 }, { "epoch": 0.7855822550831792, "grad_norm": 1.6998566389083862, "learning_rate": 0.00041013351591920577, "loss": 1.0942, "step": 850 }, { "epoch": 0.8086876155268022, "grad_norm": 1.0458375215530396, "learning_rate": 0.000405854159534406, "loss": 1.0698, "step": 875 }, { "epoch": 0.8317929759704251, "grad_norm": 1.579179048538208, "learning_rate": 0.0004015748031496063, "loss": 0.9733, "step": 900 }, { "epoch": 0.8548983364140481, "grad_norm": 1.5911906957626343, "learning_rate": 0.00039729544676480656, "loss": 1.1809, "step": 925 }, { "epoch": 0.878003696857671, "grad_norm": 0.30177101492881775, "learning_rate": 0.00039301609038000686, "loss": 1.0625, "step": 950 }, { "epoch": 0.9011090573012939, "grad_norm": 0.9385190606117249, "learning_rate": 0.00038873673399520716, "loss": 1.0929, "step": 975 }, { "epoch": 0.9242144177449169, "grad_norm": 1.7940095663070679, "learning_rate": 0.0003844573776104074, "loss": 1.1022, "step": 1000 }, { "epoch": 0.9473197781885397, "grad_norm": 0.8840867280960083, "learning_rate": 0.0003801780212256077, "loss": 1.0802, "step": 1025 }, { "epoch": 0.9704251386321626, "grad_norm": 0.9084434509277344, "learning_rate": 0.00037589866484080795, "loss": 1.1098, "step": 1050 }, { "epoch": 0.9935304990757856, "grad_norm": 1.4320096969604492, "learning_rate": 0.00037161930845600825, "loss": 1.1118, "step": 1075 }, { "epoch": 1.0, "eval_accuracy": 0.4584103512014787, "eval_f1_macro": 0.2095479509928179, "eval_f1_micro": 0.4584103512014787, "eval_f1_weighted": 0.2881768494245037, "eval_loss": 1.0650867223739624, "eval_precision_macro": 0.1528034504004929, "eval_precision_micro": 0.4584103512014787, "eval_precision_weighted": 0.21014005008866307, "eval_recall_macro": 0.3333333333333333, "eval_recall_micro": 0.4584103512014787, "eval_recall_weighted": 0.4584103512014787, "eval_runtime": 190.8807, "eval_samples_per_second": 5.668, "eval_steps_per_second": 0.712, "step": 1082 } ], "logging_steps": 25, "max_steps": 3246, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 284688717981696.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }