|
{ |
|
"best_metric": 0.9948348999023438, |
|
"best_model_checkpoint": "xblock-social-screenshots-2/checkpoint-1737", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1737, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 12.944127082824707, |
|
"learning_rate": 7.4712643678160925e-06, |
|
"loss": 2.5595, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 10.593692779541016, |
|
"learning_rate": 1.5517241379310346e-05, |
|
"loss": 2.1472, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 9.828369140625, |
|
"learning_rate": 2.327586206896552e-05, |
|
"loss": 1.7244, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 8.050122261047363, |
|
"learning_rate": 3.132183908045977e-05, |
|
"loss": 1.5718, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 7.566161155700684, |
|
"learning_rate": 3.936781609195402e-05, |
|
"loss": 1.4015, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 8.954880714416504, |
|
"learning_rate": 4.741379310344828e-05, |
|
"loss": 1.3705, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 8.316436767578125, |
|
"learning_rate": 4.9392194497760715e-05, |
|
"loss": 1.3727, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.580837726593018, |
|
"learning_rate": 4.8496481126039675e-05, |
|
"loss": 1.2824, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 12.312744140625, |
|
"learning_rate": 4.760076775431862e-05, |
|
"loss": 1.3119, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 10.076079368591309, |
|
"learning_rate": 4.670505438259757e-05, |
|
"loss": 1.252, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 9.073230743408203, |
|
"learning_rate": 4.580934101087652e-05, |
|
"loss": 1.3154, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 9.829570770263672, |
|
"learning_rate": 4.491362763915547e-05, |
|
"loss": 1.3688, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 7.165198802947998, |
|
"learning_rate": 4.4017914267434426e-05, |
|
"loss": 1.4376, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 8.911142349243164, |
|
"learning_rate": 4.312220089571337e-05, |
|
"loss": 1.3533, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 10.684996604919434, |
|
"learning_rate": 4.2226487523992326e-05, |
|
"loss": 1.2825, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 10.35905647277832, |
|
"learning_rate": 4.133077415227127e-05, |
|
"loss": 1.3382, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 9.137449264526367, |
|
"learning_rate": 4.0435060780550225e-05, |
|
"loss": 1.3054, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 6.174429416656494, |
|
"learning_rate": 3.953934740882918e-05, |
|
"loss": 1.4294, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 9.742380142211914, |
|
"learning_rate": 3.864363403710813e-05, |
|
"loss": 1.2502, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 7.022644519805908, |
|
"learning_rate": 3.774792066538708e-05, |
|
"loss": 1.2926, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5591026747195859, |
|
"eval_f1_macro": 0.17116779008569874, |
|
"eval_f1_micro": 0.5591026747195859, |
|
"eval_f1_weighted": 0.47555675422704724, |
|
"eval_loss": 1.1608415842056274, |
|
"eval_precision_macro": 0.17146675898922856, |
|
"eval_precision_micro": 0.5591026747195859, |
|
"eval_precision_weighted": 0.4635728502463956, |
|
"eval_recall_macro": 0.1928392780592314, |
|
"eval_recall_micro": 0.5591026747195859, |
|
"eval_recall_weighted": 0.5591026747195859, |
|
"eval_runtime": 47.359, |
|
"eval_samples_per_second": 24.473, |
|
"eval_steps_per_second": 1.541, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 7.127846717834473, |
|
"learning_rate": 3.685220729366603e-05, |
|
"loss": 1.161, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 6.150296211242676, |
|
"learning_rate": 3.5956493921944976e-05, |
|
"loss": 1.1594, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 6.171022415161133, |
|
"learning_rate": 3.506078055022393e-05, |
|
"loss": 1.1253, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 12.212858200073242, |
|
"learning_rate": 3.416506717850288e-05, |
|
"loss": 1.2149, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 8.228123664855957, |
|
"learning_rate": 3.3269353806781835e-05, |
|
"loss": 1.1905, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"grad_norm": 6.095876693725586, |
|
"learning_rate": 3.237364043506078e-05, |
|
"loss": 1.1387, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"grad_norm": 10.43623161315918, |
|
"learning_rate": 3.1477927063339734e-05, |
|
"loss": 1.2243, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 6.94878625869751, |
|
"learning_rate": 3.058221369161868e-05, |
|
"loss": 1.0566, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 10.651735305786133, |
|
"learning_rate": 2.9686500319897637e-05, |
|
"loss": 1.1425, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 8.426492691040039, |
|
"learning_rate": 2.8790786948176586e-05, |
|
"loss": 1.173, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 7.550307273864746, |
|
"learning_rate": 2.789507357645554e-05, |
|
"loss": 1.2052, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 9.533821105957031, |
|
"learning_rate": 2.6999360204734485e-05, |
|
"loss": 1.2043, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 7.776467323303223, |
|
"learning_rate": 2.6103646833013435e-05, |
|
"loss": 1.0199, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 9.101223945617676, |
|
"learning_rate": 2.5207933461292388e-05, |
|
"loss": 1.1681, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 7.713497161865234, |
|
"learning_rate": 2.4312220089571338e-05, |
|
"loss": 1.1177, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 7.732503414154053, |
|
"learning_rate": 2.341650671785029e-05, |
|
"loss": 1.1099, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 9.252326965332031, |
|
"learning_rate": 2.252079334612924e-05, |
|
"loss": 1.2187, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 7.301546096801758, |
|
"learning_rate": 2.162507997440819e-05, |
|
"loss": 1.0414, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 8.406270027160645, |
|
"learning_rate": 2.0729366602687143e-05, |
|
"loss": 1.0479, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 7.392611980438232, |
|
"learning_rate": 1.9833653230966092e-05, |
|
"loss": 1.2113, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"grad_norm": 7.991569995880127, |
|
"learning_rate": 1.8937939859245045e-05, |
|
"loss": 1.1279, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.548748921484038, |
|
"eval_f1_macro": 0.23354766712716346, |
|
"eval_f1_micro": 0.548748921484038, |
|
"eval_f1_weighted": 0.5206705245021934, |
|
"eval_loss": 1.076399326324463, |
|
"eval_precision_macro": 0.2591242803710448, |
|
"eval_precision_micro": 0.548748921484038, |
|
"eval_precision_weighted": 0.5086032820438354, |
|
"eval_recall_macro": 0.2392300811955355, |
|
"eval_recall_micro": 0.548748921484038, |
|
"eval_recall_weighted": 0.548748921484038, |
|
"eval_runtime": 45.7915, |
|
"eval_samples_per_second": 25.31, |
|
"eval_steps_per_second": 1.594, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 8.247747421264648, |
|
"learning_rate": 1.8042226487523995e-05, |
|
"loss": 1.0308, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 5.560178279876709, |
|
"learning_rate": 1.7146513115802944e-05, |
|
"loss": 1.0764, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 6.093762397766113, |
|
"learning_rate": 1.6250799744081894e-05, |
|
"loss": 1.1365, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 4.060959815979004, |
|
"learning_rate": 1.5355086372360844e-05, |
|
"loss": 1.0535, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 7.9812822341918945, |
|
"learning_rate": 1.4459373000639795e-05, |
|
"loss": 1.0241, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 5.996217250823975, |
|
"learning_rate": 1.3563659628918746e-05, |
|
"loss": 1.0137, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 10.663773536682129, |
|
"learning_rate": 1.2667946257197696e-05, |
|
"loss": 1.0274, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 9.829933166503906, |
|
"learning_rate": 1.1772232885476649e-05, |
|
"loss": 0.9893, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 9.35245418548584, |
|
"learning_rate": 1.0876519513755598e-05, |
|
"loss": 1.0497, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 9.116876602172852, |
|
"learning_rate": 9.98080614203455e-06, |
|
"loss": 0.972, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 8.650375366210938, |
|
"learning_rate": 9.085092770313499e-06, |
|
"loss": 1.0058, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 9.76453971862793, |
|
"learning_rate": 8.18937939859245e-06, |
|
"loss": 1.0451, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"grad_norm": 5.405215263366699, |
|
"learning_rate": 7.293666026871402e-06, |
|
"loss": 0.9628, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 10.716626167297363, |
|
"learning_rate": 6.397952655150352e-06, |
|
"loss": 1.1384, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"grad_norm": 4.47056770324707, |
|
"learning_rate": 5.502239283429303e-06, |
|
"loss": 0.9374, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 7.823177814483643, |
|
"learning_rate": 4.606525911708254e-06, |
|
"loss": 1.019, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 10.679405212402344, |
|
"learning_rate": 3.7108125399872046e-06, |
|
"loss": 1.0615, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 16.2053165435791, |
|
"learning_rate": 2.815099168266155e-06, |
|
"loss": 0.959, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 8.753532409667969, |
|
"learning_rate": 1.9193857965451054e-06, |
|
"loss": 1.0041, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 7.493074417114258, |
|
"learning_rate": 1.0236724248240563e-06, |
|
"loss": 0.998, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 7.273109436035156, |
|
"learning_rate": 1.2795905310300704e-07, |
|
"loss": 0.9851, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5823986194995686, |
|
"eval_f1_macro": 0.23526955685803672, |
|
"eval_f1_micro": 0.5823986194995686, |
|
"eval_f1_weighted": 0.5233223208450835, |
|
"eval_loss": 0.9948348999023438, |
|
"eval_precision_macro": 0.2973178657776674, |
|
"eval_precision_micro": 0.5823986194995686, |
|
"eval_precision_weighted": 0.5462756602202016, |
|
"eval_recall_macro": 0.24224828041633692, |
|
"eval_recall_micro": 0.5823986194995686, |
|
"eval_recall_weighted": 0.5823986194995686, |
|
"eval_runtime": 46.2253, |
|
"eval_samples_per_second": 25.073, |
|
"eval_steps_per_second": 1.579, |
|
"step": 1737 |
|
} |
|
], |
|
"logging_steps": 28, |
|
"max_steps": 1737, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 1.0764602068237517e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|