howdyaendra's picture
Upload folder using huggingface_hub
35a8681 verified
raw
history blame contribute delete
No virus
12.4 kB
{
"best_metric": 0.9948348999023438,
"best_model_checkpoint": "xblock-social-screenshots-2/checkpoint-1737",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 1737,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"grad_norm": 12.944127082824707,
"learning_rate": 7.4712643678160925e-06,
"loss": 2.5595,
"step": 28
},
{
"epoch": 0.1,
"grad_norm": 10.593692779541016,
"learning_rate": 1.5517241379310346e-05,
"loss": 2.1472,
"step": 56
},
{
"epoch": 0.15,
"grad_norm": 9.828369140625,
"learning_rate": 2.327586206896552e-05,
"loss": 1.7244,
"step": 84
},
{
"epoch": 0.19,
"grad_norm": 8.050122261047363,
"learning_rate": 3.132183908045977e-05,
"loss": 1.5718,
"step": 112
},
{
"epoch": 0.24,
"grad_norm": 7.566161155700684,
"learning_rate": 3.936781609195402e-05,
"loss": 1.4015,
"step": 140
},
{
"epoch": 0.29,
"grad_norm": 8.954880714416504,
"learning_rate": 4.741379310344828e-05,
"loss": 1.3705,
"step": 168
},
{
"epoch": 0.34,
"grad_norm": 8.316436767578125,
"learning_rate": 4.9392194497760715e-05,
"loss": 1.3727,
"step": 196
},
{
"epoch": 0.39,
"grad_norm": 4.580837726593018,
"learning_rate": 4.8496481126039675e-05,
"loss": 1.2824,
"step": 224
},
{
"epoch": 0.44,
"grad_norm": 12.312744140625,
"learning_rate": 4.760076775431862e-05,
"loss": 1.3119,
"step": 252
},
{
"epoch": 0.48,
"grad_norm": 10.076079368591309,
"learning_rate": 4.670505438259757e-05,
"loss": 1.252,
"step": 280
},
{
"epoch": 0.53,
"grad_norm": 9.073230743408203,
"learning_rate": 4.580934101087652e-05,
"loss": 1.3154,
"step": 308
},
{
"epoch": 0.58,
"grad_norm": 9.829570770263672,
"learning_rate": 4.491362763915547e-05,
"loss": 1.3688,
"step": 336
},
{
"epoch": 0.63,
"grad_norm": 7.165198802947998,
"learning_rate": 4.4017914267434426e-05,
"loss": 1.4376,
"step": 364
},
{
"epoch": 0.68,
"grad_norm": 8.911142349243164,
"learning_rate": 4.312220089571337e-05,
"loss": 1.3533,
"step": 392
},
{
"epoch": 0.73,
"grad_norm": 10.684996604919434,
"learning_rate": 4.2226487523992326e-05,
"loss": 1.2825,
"step": 420
},
{
"epoch": 0.77,
"grad_norm": 10.35905647277832,
"learning_rate": 4.133077415227127e-05,
"loss": 1.3382,
"step": 448
},
{
"epoch": 0.82,
"grad_norm": 9.137449264526367,
"learning_rate": 4.0435060780550225e-05,
"loss": 1.3054,
"step": 476
},
{
"epoch": 0.87,
"grad_norm": 6.174429416656494,
"learning_rate": 3.953934740882918e-05,
"loss": 1.4294,
"step": 504
},
{
"epoch": 0.92,
"grad_norm": 9.742380142211914,
"learning_rate": 3.864363403710813e-05,
"loss": 1.2502,
"step": 532
},
{
"epoch": 0.97,
"grad_norm": 7.022644519805908,
"learning_rate": 3.774792066538708e-05,
"loss": 1.2926,
"step": 560
},
{
"epoch": 1.0,
"eval_accuracy": 0.5591026747195859,
"eval_f1_macro": 0.17116779008569874,
"eval_f1_micro": 0.5591026747195859,
"eval_f1_weighted": 0.47555675422704724,
"eval_loss": 1.1608415842056274,
"eval_precision_macro": 0.17146675898922856,
"eval_precision_micro": 0.5591026747195859,
"eval_precision_weighted": 0.4635728502463956,
"eval_recall_macro": 0.1928392780592314,
"eval_recall_micro": 0.5591026747195859,
"eval_recall_weighted": 0.5591026747195859,
"eval_runtime": 47.359,
"eval_samples_per_second": 24.473,
"eval_steps_per_second": 1.541,
"step": 579
},
{
"epoch": 1.02,
"grad_norm": 7.127846717834473,
"learning_rate": 3.685220729366603e-05,
"loss": 1.161,
"step": 588
},
{
"epoch": 1.06,
"grad_norm": 6.150296211242676,
"learning_rate": 3.5956493921944976e-05,
"loss": 1.1594,
"step": 616
},
{
"epoch": 1.11,
"grad_norm": 6.171022415161133,
"learning_rate": 3.506078055022393e-05,
"loss": 1.1253,
"step": 644
},
{
"epoch": 1.16,
"grad_norm": 12.212858200073242,
"learning_rate": 3.416506717850288e-05,
"loss": 1.2149,
"step": 672
},
{
"epoch": 1.21,
"grad_norm": 8.228123664855957,
"learning_rate": 3.3269353806781835e-05,
"loss": 1.1905,
"step": 700
},
{
"epoch": 1.26,
"grad_norm": 6.095876693725586,
"learning_rate": 3.237364043506078e-05,
"loss": 1.1387,
"step": 728
},
{
"epoch": 1.31,
"grad_norm": 10.43623161315918,
"learning_rate": 3.1477927063339734e-05,
"loss": 1.2243,
"step": 756
},
{
"epoch": 1.35,
"grad_norm": 6.94878625869751,
"learning_rate": 3.058221369161868e-05,
"loss": 1.0566,
"step": 784
},
{
"epoch": 1.4,
"grad_norm": 10.651735305786133,
"learning_rate": 2.9686500319897637e-05,
"loss": 1.1425,
"step": 812
},
{
"epoch": 1.45,
"grad_norm": 8.426492691040039,
"learning_rate": 2.8790786948176586e-05,
"loss": 1.173,
"step": 840
},
{
"epoch": 1.5,
"grad_norm": 7.550307273864746,
"learning_rate": 2.789507357645554e-05,
"loss": 1.2052,
"step": 868
},
{
"epoch": 1.55,
"grad_norm": 9.533821105957031,
"learning_rate": 2.6999360204734485e-05,
"loss": 1.2043,
"step": 896
},
{
"epoch": 1.6,
"grad_norm": 7.776467323303223,
"learning_rate": 2.6103646833013435e-05,
"loss": 1.0199,
"step": 924
},
{
"epoch": 1.64,
"grad_norm": 9.101223945617676,
"learning_rate": 2.5207933461292388e-05,
"loss": 1.1681,
"step": 952
},
{
"epoch": 1.69,
"grad_norm": 7.713497161865234,
"learning_rate": 2.4312220089571338e-05,
"loss": 1.1177,
"step": 980
},
{
"epoch": 1.74,
"grad_norm": 7.732503414154053,
"learning_rate": 2.341650671785029e-05,
"loss": 1.1099,
"step": 1008
},
{
"epoch": 1.79,
"grad_norm": 9.252326965332031,
"learning_rate": 2.252079334612924e-05,
"loss": 1.2187,
"step": 1036
},
{
"epoch": 1.84,
"grad_norm": 7.301546096801758,
"learning_rate": 2.162507997440819e-05,
"loss": 1.0414,
"step": 1064
},
{
"epoch": 1.89,
"grad_norm": 8.406270027160645,
"learning_rate": 2.0729366602687143e-05,
"loss": 1.0479,
"step": 1092
},
{
"epoch": 1.93,
"grad_norm": 7.392611980438232,
"learning_rate": 1.9833653230966092e-05,
"loss": 1.2113,
"step": 1120
},
{
"epoch": 1.98,
"grad_norm": 7.991569995880127,
"learning_rate": 1.8937939859245045e-05,
"loss": 1.1279,
"step": 1148
},
{
"epoch": 2.0,
"eval_accuracy": 0.548748921484038,
"eval_f1_macro": 0.23354766712716346,
"eval_f1_micro": 0.548748921484038,
"eval_f1_weighted": 0.5206705245021934,
"eval_loss": 1.076399326324463,
"eval_precision_macro": 0.2591242803710448,
"eval_precision_micro": 0.548748921484038,
"eval_precision_weighted": 0.5086032820438354,
"eval_recall_macro": 0.2392300811955355,
"eval_recall_micro": 0.548748921484038,
"eval_recall_weighted": 0.548748921484038,
"eval_runtime": 45.7915,
"eval_samples_per_second": 25.31,
"eval_steps_per_second": 1.594,
"step": 1158
},
{
"epoch": 2.03,
"grad_norm": 8.247747421264648,
"learning_rate": 1.8042226487523995e-05,
"loss": 1.0308,
"step": 1176
},
{
"epoch": 2.08,
"grad_norm": 5.560178279876709,
"learning_rate": 1.7146513115802944e-05,
"loss": 1.0764,
"step": 1204
},
{
"epoch": 2.13,
"grad_norm": 6.093762397766113,
"learning_rate": 1.6250799744081894e-05,
"loss": 1.1365,
"step": 1232
},
{
"epoch": 2.18,
"grad_norm": 4.060959815979004,
"learning_rate": 1.5355086372360844e-05,
"loss": 1.0535,
"step": 1260
},
{
"epoch": 2.22,
"grad_norm": 7.9812822341918945,
"learning_rate": 1.4459373000639795e-05,
"loss": 1.0241,
"step": 1288
},
{
"epoch": 2.27,
"grad_norm": 5.996217250823975,
"learning_rate": 1.3563659628918746e-05,
"loss": 1.0137,
"step": 1316
},
{
"epoch": 2.32,
"grad_norm": 10.663773536682129,
"learning_rate": 1.2667946257197696e-05,
"loss": 1.0274,
"step": 1344
},
{
"epoch": 2.37,
"grad_norm": 9.829933166503906,
"learning_rate": 1.1772232885476649e-05,
"loss": 0.9893,
"step": 1372
},
{
"epoch": 2.42,
"grad_norm": 9.35245418548584,
"learning_rate": 1.0876519513755598e-05,
"loss": 1.0497,
"step": 1400
},
{
"epoch": 2.47,
"grad_norm": 9.116876602172852,
"learning_rate": 9.98080614203455e-06,
"loss": 0.972,
"step": 1428
},
{
"epoch": 2.51,
"grad_norm": 8.650375366210938,
"learning_rate": 9.085092770313499e-06,
"loss": 1.0058,
"step": 1456
},
{
"epoch": 2.56,
"grad_norm": 9.76453971862793,
"learning_rate": 8.18937939859245e-06,
"loss": 1.0451,
"step": 1484
},
{
"epoch": 2.61,
"grad_norm": 5.405215263366699,
"learning_rate": 7.293666026871402e-06,
"loss": 0.9628,
"step": 1512
},
{
"epoch": 2.66,
"grad_norm": 10.716626167297363,
"learning_rate": 6.397952655150352e-06,
"loss": 1.1384,
"step": 1540
},
{
"epoch": 2.71,
"grad_norm": 4.47056770324707,
"learning_rate": 5.502239283429303e-06,
"loss": 0.9374,
"step": 1568
},
{
"epoch": 2.76,
"grad_norm": 7.823177814483643,
"learning_rate": 4.606525911708254e-06,
"loss": 1.019,
"step": 1596
},
{
"epoch": 2.8,
"grad_norm": 10.679405212402344,
"learning_rate": 3.7108125399872046e-06,
"loss": 1.0615,
"step": 1624
},
{
"epoch": 2.85,
"grad_norm": 16.2053165435791,
"learning_rate": 2.815099168266155e-06,
"loss": 0.959,
"step": 1652
},
{
"epoch": 2.9,
"grad_norm": 8.753532409667969,
"learning_rate": 1.9193857965451054e-06,
"loss": 1.0041,
"step": 1680
},
{
"epoch": 2.95,
"grad_norm": 7.493074417114258,
"learning_rate": 1.0236724248240563e-06,
"loss": 0.998,
"step": 1708
},
{
"epoch": 3.0,
"grad_norm": 7.273109436035156,
"learning_rate": 1.2795905310300704e-07,
"loss": 0.9851,
"step": 1736
},
{
"epoch": 3.0,
"eval_accuracy": 0.5823986194995686,
"eval_f1_macro": 0.23526955685803672,
"eval_f1_micro": 0.5823986194995686,
"eval_f1_weighted": 0.5233223208450835,
"eval_loss": 0.9948348999023438,
"eval_precision_macro": 0.2973178657776674,
"eval_precision_micro": 0.5823986194995686,
"eval_precision_weighted": 0.5462756602202016,
"eval_recall_macro": 0.24224828041633692,
"eval_recall_micro": 0.5823986194995686,
"eval_recall_weighted": 0.5823986194995686,
"eval_runtime": 46.2253,
"eval_samples_per_second": 25.073,
"eval_steps_per_second": 1.579,
"step": 1737
}
],
"logging_steps": 28,
"max_steps": 1737,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 1.0764602068237517e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}