|
{ |
|
"best_metric": 1.0650867223739624, |
|
"best_model_checkpoint": "autotrain-byt8e-zygc3/checkpoint-1082", |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 1082, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02310536044362292, |
|
"grad_norm": 3.5127339363098145, |
|
"learning_rate": 3.846153846153846e-05, |
|
"loss": 1.0872, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04621072088724584, |
|
"grad_norm": 6.642369270324707, |
|
"learning_rate": 7.692307692307693e-05, |
|
"loss": 1.1215, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06931608133086876, |
|
"grad_norm": 6.217954158782959, |
|
"learning_rate": 0.0001153846153846154, |
|
"loss": 1.0733, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.09242144177449169, |
|
"grad_norm": 2.9232001304626465, |
|
"learning_rate": 0.00015384615384615385, |
|
"loss": 1.1347, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11552680221811461, |
|
"grad_norm": 3.7540340423583984, |
|
"learning_rate": 0.00019230769230769233, |
|
"loss": 1.3273, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.13863216266173753, |
|
"grad_norm": 4.9391679763793945, |
|
"learning_rate": 0.0002307692307692308, |
|
"loss": 1.1507, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16173752310536044, |
|
"grad_norm": 1.5326274633407593, |
|
"learning_rate": 0.0002692307692307692, |
|
"loss": 1.1118, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.18484288354898337, |
|
"grad_norm": 1.3186924457550049, |
|
"learning_rate": 0.0003076923076923077, |
|
"loss": 1.1426, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20794824399260628, |
|
"grad_norm": 3.582869052886963, |
|
"learning_rate": 0.00034615384615384613, |
|
"loss": 1.175, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.23105360443622922, |
|
"grad_norm": 2.4438865184783936, |
|
"learning_rate": 0.00038461538461538467, |
|
"loss": 1.0483, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2541589648798521, |
|
"grad_norm": 6.818902492523193, |
|
"learning_rate": 0.0004230769230769231, |
|
"loss": 1.0661, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.27726432532347506, |
|
"grad_norm": 1.4846845865249634, |
|
"learning_rate": 0.0004615384615384616, |
|
"loss": 1.1412, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.300369685767098, |
|
"grad_norm": 1.0014742612838745, |
|
"learning_rate": 0.0005, |
|
"loss": 1.0968, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.3234750462107209, |
|
"grad_norm": 0.8473600745201111, |
|
"learning_rate": 0.0004957206436152003, |
|
"loss": 1.1779, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3465804066543438, |
|
"grad_norm": 3.419077157974243, |
|
"learning_rate": 0.0004914412872304005, |
|
"loss": 1.1943, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.36968576709796674, |
|
"grad_norm": 0.8151220083236694, |
|
"learning_rate": 0.00048716193084560086, |
|
"loss": 1.0639, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3927911275415896, |
|
"grad_norm": 1.3580329418182373, |
|
"learning_rate": 0.0004828825744608011, |
|
"loss": 1.2678, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.41589648798521256, |
|
"grad_norm": 0.5541791319847107, |
|
"learning_rate": 0.0004786032180760014, |
|
"loss": 1.1612, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4390018484288355, |
|
"grad_norm": 1.164920449256897, |
|
"learning_rate": 0.0004743238616912017, |
|
"loss": 1.1855, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.46210720887245843, |
|
"grad_norm": 4.518599987030029, |
|
"learning_rate": 0.0004700445053064019, |
|
"loss": 1.1385, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4852125693160813, |
|
"grad_norm": 0.9030762910842896, |
|
"learning_rate": 0.0004657651489216022, |
|
"loss": 1.1228, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5083179297597042, |
|
"grad_norm": 1.1489505767822266, |
|
"learning_rate": 0.00046148579253680244, |
|
"loss": 1.145, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5314232902033271, |
|
"grad_norm": 0.9008183479309082, |
|
"learning_rate": 0.00045720643615200274, |
|
"loss": 1.1385, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5545286506469501, |
|
"grad_norm": 0.9687894582748413, |
|
"learning_rate": 0.00045292707976720304, |
|
"loss": 1.1424, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.577634011090573, |
|
"grad_norm": 0.43966954946517944, |
|
"learning_rate": 0.0004486477233824033, |
|
"loss": 1.1123, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.600739371534196, |
|
"grad_norm": 0.9648946523666382, |
|
"learning_rate": 0.0004443683669976036, |
|
"loss": 0.9658, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6238447319778189, |
|
"grad_norm": 1.1458590030670166, |
|
"learning_rate": 0.00044008901061280383, |
|
"loss": 1.1779, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6469500924214417, |
|
"grad_norm": 1.6918909549713135, |
|
"learning_rate": 0.00043580965422800413, |
|
"loss": 1.0564, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6700554528650647, |
|
"grad_norm": 0.38883867859840393, |
|
"learning_rate": 0.00043153029784320443, |
|
"loss": 1.1518, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.6931608133086876, |
|
"grad_norm": 1.109215497970581, |
|
"learning_rate": 0.0004272509414584047, |
|
"loss": 1.0493, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7162661737523105, |
|
"grad_norm": 4.412217140197754, |
|
"learning_rate": 0.000422971585073605, |
|
"loss": 1.0994, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.7393715341959335, |
|
"grad_norm": 0.6967116594314575, |
|
"learning_rate": 0.00041869222868880517, |
|
"loss": 1.1258, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7624768946395564, |
|
"grad_norm": 1.0759273767471313, |
|
"learning_rate": 0.00041441287230400547, |
|
"loss": 1.0622, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.7855822550831792, |
|
"grad_norm": 1.6998566389083862, |
|
"learning_rate": 0.00041013351591920577, |
|
"loss": 1.0942, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8086876155268022, |
|
"grad_norm": 1.0458375215530396, |
|
"learning_rate": 0.000405854159534406, |
|
"loss": 1.0698, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8317929759704251, |
|
"grad_norm": 1.579179048538208, |
|
"learning_rate": 0.0004015748031496063, |
|
"loss": 0.9733, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8548983364140481, |
|
"grad_norm": 1.5911906957626343, |
|
"learning_rate": 0.00039729544676480656, |
|
"loss": 1.1809, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.878003696857671, |
|
"grad_norm": 0.30177101492881775, |
|
"learning_rate": 0.00039301609038000686, |
|
"loss": 1.0625, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9011090573012939, |
|
"grad_norm": 0.9385190606117249, |
|
"learning_rate": 0.00038873673399520716, |
|
"loss": 1.0929, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9242144177449169, |
|
"grad_norm": 1.7940095663070679, |
|
"learning_rate": 0.0003844573776104074, |
|
"loss": 1.1022, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9473197781885397, |
|
"grad_norm": 0.8840867280960083, |
|
"learning_rate": 0.0003801780212256077, |
|
"loss": 1.0802, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.9704251386321626, |
|
"grad_norm": 0.9084434509277344, |
|
"learning_rate": 0.00037589866484080795, |
|
"loss": 1.1098, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.9935304990757856, |
|
"grad_norm": 1.4320096969604492, |
|
"learning_rate": 0.00037161930845600825, |
|
"loss": 1.1118, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4584103512014787, |
|
"eval_f1_macro": 0.2095479509928179, |
|
"eval_f1_micro": 0.4584103512014787, |
|
"eval_f1_weighted": 0.2881768494245037, |
|
"eval_loss": 1.0650867223739624, |
|
"eval_precision_macro": 0.1528034504004929, |
|
"eval_precision_micro": 0.4584103512014787, |
|
"eval_precision_weighted": 0.21014005008866307, |
|
"eval_recall_macro": 0.3333333333333333, |
|
"eval_recall_micro": 0.4584103512014787, |
|
"eval_recall_weighted": 0.4584103512014787, |
|
"eval_runtime": 190.8807, |
|
"eval_samples_per_second": 5.668, |
|
"eval_steps_per_second": 0.712, |
|
"step": 1082 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 3246, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 284688717981696.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|