|
{ |
|
"best_metric": 0.5015928149223328, |
|
"best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-42__lstm/checkpoint-1550", |
|
"epoch": 2.6586620926243567, |
|
"global_step": 1550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9783362218370885e-05, |
|
"loss": 43.0786, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.956672443674177e-05, |
|
"loss": 38.7276, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_distillation_accuracy_counterfactual": 0.413801949717804, |
|
"eval_distillation_accuracy_factual": 0.7095946639302206, |
|
"eval_distillation_f1_counterfactual": 0.3810686639321449, |
|
"eval_distillation_f1_factual": 0.6957955686304648, |
|
"eval_groundtruth_accuracy_counterfactual": 0.3301693175987686, |
|
"eval_groundtruth_f1_counterfactual": 0.3009236688640019, |
|
"eval_groundtruth_f1_factual": 0.4899999707848249, |
|
"eval_icace_cosine": 0.7151280045509338, |
|
"eval_icace_l2": 0.7787489891052246, |
|
"eval_icace_normdiff": 0.5948067307472229, |
|
"eval_loss": 20.85917091369629, |
|
"eval_runtime": 4.2958, |
|
"eval_samples_per_second": 907.39, |
|
"eval_steps_per_second": 7.216, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.935008665511265e-05, |
|
"loss": 35.9292, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.913344887348354e-05, |
|
"loss": 32.7092, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.891681109185442e-05, |
|
"loss": 28.9099, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_distillation_accuracy_counterfactual": 0.44971780400205236, |
|
"eval_distillation_accuracy_factual": 0.7447408927655208, |
|
"eval_distillation_f1_counterfactual": 0.4113364356485835, |
|
"eval_distillation_f1_factual": 0.7313179359518789, |
|
"eval_groundtruth_accuracy_counterfactual": 0.3763468445356593, |
|
"eval_groundtruth_f1_counterfactual": 0.340596840887459, |
|
"eval_groundtruth_f1_factual": 0.5410055670992968, |
|
"eval_icace_cosine": 0.5891422629356384, |
|
"eval_icace_l2": 0.708361029624939, |
|
"eval_icace_normdiff": 0.5622901916503906, |
|
"eval_loss": 15.46045207977295, |
|
"eval_runtime": 4.856, |
|
"eval_samples_per_second": 802.712, |
|
"eval_steps_per_second": 6.384, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8700173310225307e-05, |
|
"loss": 23.7929, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.848353552859619e-05, |
|
"loss": 19.9476, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_distillation_accuracy_counterfactual": 0.4807593637762955, |
|
"eval_distillation_accuracy_factual": 0.6631605951770139, |
|
"eval_distillation_f1_counterfactual": 0.4198039457002024, |
|
"eval_distillation_f1_factual": 0.6325010855821678, |
|
"eval_groundtruth_accuracy_counterfactual": 0.439712673165726, |
|
"eval_groundtruth_f1_counterfactual": 0.38279540235888865, |
|
"eval_groundtruth_f1_factual": 0.49479750987904253, |
|
"eval_icace_cosine": 0.5389060378074646, |
|
"eval_icace_l2": 0.6715835332870483, |
|
"eval_icace_normdiff": 0.43847718834877014, |
|
"eval_loss": 12.59195613861084, |
|
"eval_runtime": 4.5282, |
|
"eval_samples_per_second": 860.827, |
|
"eval_steps_per_second": 6.846, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.826689774696707e-05, |
|
"loss": 18.2834, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8050259965337955e-05, |
|
"loss": 16.7884, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.7833622183708845e-05, |
|
"loss": 16.1986, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_distillation_accuracy_counterfactual": 0.5100051308363264, |
|
"eval_distillation_accuracy_factual": 0.7208825038481272, |
|
"eval_distillation_f1_counterfactual": 0.44399257347283927, |
|
"eval_distillation_f1_factual": 0.6985538094507235, |
|
"eval_groundtruth_accuracy_counterfactual": 0.469728065674705, |
|
"eval_groundtruth_f1_counterfactual": 0.4066620855800582, |
|
"eval_groundtruth_f1_factual": 0.5453203881049435, |
|
"eval_icace_cosine": 0.5247439742088318, |
|
"eval_icace_l2": 0.6397933959960938, |
|
"eval_icace_normdiff": 0.42328375577926636, |
|
"eval_loss": 11.142946243286133, |
|
"eval_runtime": 4.7099, |
|
"eval_samples_per_second": 827.623, |
|
"eval_steps_per_second": 6.582, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.761698440207972e-05, |
|
"loss": 16.3317, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.740034662045061e-05, |
|
"loss": 15.6449, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_distillation_accuracy_counterfactual": 0.5177013853258081, |
|
"eval_distillation_accuracy_factual": 0.7614161108260646, |
|
"eval_distillation_f1_counterfactual": 0.43564292507442814, |
|
"eval_distillation_f1_factual": 0.7349550078954596, |
|
"eval_groundtruth_accuracy_counterfactual": 0.48255515649050795, |
|
"eval_groundtruth_f1_counterfactual": 0.40504065429982916, |
|
"eval_groundtruth_f1_factual": 0.5535077645302229, |
|
"eval_icace_cosine": 0.516343355178833, |
|
"eval_icace_l2": 0.6303517818450928, |
|
"eval_icace_normdiff": 0.4126652777194977, |
|
"eval_loss": 10.60521411895752, |
|
"eval_runtime": 4.617, |
|
"eval_samples_per_second": 844.277, |
|
"eval_steps_per_second": 6.714, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7183708838821494e-05, |
|
"loss": 15.2981, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6967071057192376e-05, |
|
"loss": 15.1658, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.675043327556326e-05, |
|
"loss": 15.0967, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_distillation_accuracy_counterfactual": 0.5151359671626475, |
|
"eval_distillation_accuracy_factual": 0.7678296562339662, |
|
"eval_distillation_f1_counterfactual": 0.4566628678247276, |
|
"eval_distillation_f1_factual": 0.7434229464297083, |
|
"eval_groundtruth_accuracy_counterfactual": 0.4856336582863007, |
|
"eval_groundtruth_f1_counterfactual": 0.43108046151644946, |
|
"eval_groundtruth_f1_factual": 0.5711126744289745, |
|
"eval_icace_cosine": 0.5121855735778809, |
|
"eval_icace_l2": 0.636752188205719, |
|
"eval_icace_normdiff": 0.41414302587509155, |
|
"eval_loss": 10.544304847717285, |
|
"eval_runtime": 4.8047, |
|
"eval_samples_per_second": 811.294, |
|
"eval_steps_per_second": 6.452, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.653379549393415e-05, |
|
"loss": 14.9139, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.6317157712305025e-05, |
|
"loss": 14.7833, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_distillation_accuracy_counterfactual": 0.525910723447922, |
|
"eval_distillation_accuracy_factual": 0.7939969214982042, |
|
"eval_distillation_f1_counterfactual": 0.4562979855469525, |
|
"eval_distillation_f1_factual": 0.7748803987234086, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5010261672652643, |
|
"eval_groundtruth_f1_counterfactual": 0.43277911965474775, |
|
"eval_groundtruth_f1_factual": 0.5673297128547324, |
|
"eval_icace_cosine": 0.5058079957962036, |
|
"eval_icace_l2": 0.6291938424110413, |
|
"eval_icace_normdiff": 0.41149842739105225, |
|
"eval_loss": 10.096619606018066, |
|
"eval_runtime": 4.807, |
|
"eval_samples_per_second": 810.901, |
|
"eval_steps_per_second": 6.449, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6100519930675915e-05, |
|
"loss": 14.3751, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.58838821490468e-05, |
|
"loss": 14.1618, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.566724436741768e-05, |
|
"loss": 14.1163, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_distillation_accuracy_counterfactual": 0.5110312981015905, |
|
"eval_distillation_accuracy_factual": 0.8142637249871729, |
|
"eval_distillation_f1_counterfactual": 0.42866472439070946, |
|
"eval_distillation_f1_factual": 0.7995337396770525, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5002565418163161, |
|
"eval_groundtruth_f1_counterfactual": 0.4196447531444125, |
|
"eval_groundtruth_f1_factual": 0.570939084196488, |
|
"eval_icace_cosine": 0.5057737827301025, |
|
"eval_icace_l2": 0.6323451399803162, |
|
"eval_icace_normdiff": 0.4111088812351227, |
|
"eval_loss": 9.999621391296387, |
|
"eval_runtime": 4.6656, |
|
"eval_samples_per_second": 835.478, |
|
"eval_steps_per_second": 6.644, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.5450606585788563e-05, |
|
"loss": 14.1471, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.5233968804159446e-05, |
|
"loss": 14.0263, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_distillation_accuracy_counterfactual": 0.5094920472036942, |
|
"eval_distillation_accuracy_factual": 0.8224730631092868, |
|
"eval_distillation_f1_counterfactual": 0.4472075955335491, |
|
"eval_distillation_f1_factual": 0.8071684604194267, |
|
"eval_groundtruth_accuracy_counterfactual": 0.49640841457157514, |
|
"eval_groundtruth_f1_counterfactual": 0.43563795723268167, |
|
"eval_groundtruth_f1_factual": 0.5741319757982798, |
|
"eval_icace_cosine": 0.5053402185440063, |
|
"eval_icace_l2": 0.6384401917457581, |
|
"eval_icace_normdiff": 0.41610294580459595, |
|
"eval_loss": 9.992120742797852, |
|
"eval_runtime": 4.6252, |
|
"eval_samples_per_second": 842.767, |
|
"eval_steps_per_second": 6.702, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.501733102253033e-05, |
|
"loss": 13.777, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.480069324090121e-05, |
|
"loss": 13.7366, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.45840554592721e-05, |
|
"loss": 13.645, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_distillation_accuracy_counterfactual": 0.5187275525910724, |
|
"eval_distillation_accuracy_factual": 0.8206772703950744, |
|
"eval_distillation_f1_counterfactual": 0.44479298662465616, |
|
"eval_distillation_f1_factual": 0.8084170023738808, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5107747562852745, |
|
"eval_groundtruth_f1_counterfactual": 0.43819547536098524, |
|
"eval_groundtruth_f1_factual": 0.5804064040286399, |
|
"eval_icace_cosine": 0.5102166533470154, |
|
"eval_icace_l2": 0.6428956985473633, |
|
"eval_icace_normdiff": 0.4216759502887726, |
|
"eval_loss": 10.014728546142578, |
|
"eval_runtime": 4.6422, |
|
"eval_samples_per_second": 839.683, |
|
"eval_steps_per_second": 6.678, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.436741767764298e-05, |
|
"loss": 13.6366, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.415077989601387e-05, |
|
"loss": 13.5142, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_distillation_accuracy_counterfactual": 0.521292970754233, |
|
"eval_distillation_accuracy_factual": 0.8222165212929707, |
|
"eval_distillation_f1_counterfactual": 0.44615115849333264, |
|
"eval_distillation_f1_factual": 0.8098912953108461, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5138532580810672, |
|
"eval_groundtruth_f1_counterfactual": 0.4412909861048945, |
|
"eval_groundtruth_f1_factual": 0.5759446881541332, |
|
"eval_icace_cosine": 0.5050559043884277, |
|
"eval_icace_l2": 0.6343655586242676, |
|
"eval_icace_normdiff": 0.4120153784751892, |
|
"eval_loss": 9.914525032043457, |
|
"eval_runtime": 4.5769, |
|
"eval_samples_per_second": 851.659, |
|
"eval_steps_per_second": 6.773, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.393414211438475e-05, |
|
"loss": 13.516, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.371750433275563e-05, |
|
"loss": 13.2886, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.3500866551126516e-05, |
|
"loss": 13.0863, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_distillation_accuracy_counterfactual": 0.5225756798358132, |
|
"eval_distillation_accuracy_factual": 0.8322216521292971, |
|
"eval_distillation_f1_counterfactual": 0.44257027446874675, |
|
"eval_distillation_f1_factual": 0.8222712281483833, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5159055926115957, |
|
"eval_groundtruth_f1_counterfactual": 0.43868456910889764, |
|
"eval_groundtruth_f1_factual": 0.5752420543125346, |
|
"eval_icace_cosine": 0.5034614205360413, |
|
"eval_icace_l2": 0.6326570510864258, |
|
"eval_icace_normdiff": 0.4119584262371063, |
|
"eval_loss": 9.747785568237305, |
|
"eval_runtime": 4.5826, |
|
"eval_samples_per_second": 850.614, |
|
"eval_steps_per_second": 6.765, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.3284228769497406e-05, |
|
"loss": 12.9339, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.306759098786828e-05, |
|
"loss": 13.0796, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_distillation_accuracy_counterfactual": 0.5110312981015905, |
|
"eval_distillation_accuracy_factual": 0.8273473576192919, |
|
"eval_distillation_f1_counterfactual": 0.43220510255327216, |
|
"eval_distillation_f1_factual": 0.8195939186393728, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5094920472036942, |
|
"eval_groundtruth_f1_counterfactual": 0.43438648520235124, |
|
"eval_groundtruth_f1_factual": 0.5765313785081062, |
|
"eval_icace_cosine": 0.5092682242393494, |
|
"eval_icace_l2": 0.6488710045814514, |
|
"eval_icace_normdiff": 0.42473679780960083, |
|
"eval_loss": 10.03084659576416, |
|
"eval_runtime": 4.5757, |
|
"eval_samples_per_second": 851.883, |
|
"eval_steps_per_second": 6.775, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.285095320623917e-05, |
|
"loss": 13.3328, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.2634315424610055e-05, |
|
"loss": 13.2103, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.241767764298094e-05, |
|
"loss": 12.8642, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_distillation_accuracy_counterfactual": 0.5202668034889687, |
|
"eval_distillation_accuracy_factual": 0.8227296049256029, |
|
"eval_distillation_f1_counterfactual": 0.44726878993383945, |
|
"eval_distillation_f1_factual": 0.8144807445603238, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5146228835300154, |
|
"eval_groundtruth_f1_counterfactual": 0.44463257796889355, |
|
"eval_groundtruth_f1_factual": 0.5825450226028441, |
|
"eval_icace_cosine": 0.5063404440879822, |
|
"eval_icace_l2": 0.6389003396034241, |
|
"eval_icace_normdiff": 0.41857677698135376, |
|
"eval_loss": 9.888192176818848, |
|
"eval_runtime": 4.6226, |
|
"eval_samples_per_second": 843.25, |
|
"eval_steps_per_second": 6.706, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.220103986135182e-05, |
|
"loss": 12.5476, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.198440207972271e-05, |
|
"loss": 13.1858, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_distillation_accuracy_counterfactual": 0.5243714725500257, |
|
"eval_distillation_accuracy_factual": 0.8327347357619292, |
|
"eval_distillation_f1_counterfactual": 0.4460710855321577, |
|
"eval_distillation_f1_factual": 0.8222063551634736, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5182144689584403, |
|
"eval_groundtruth_f1_counterfactual": 0.4434489043121113, |
|
"eval_groundtruth_f1_factual": 0.5879251850701077, |
|
"eval_icace_cosine": 0.5027849674224854, |
|
"eval_icace_l2": 0.6359062790870667, |
|
"eval_icace_normdiff": 0.41075506806373596, |
|
"eval_loss": 9.917060852050781, |
|
"eval_runtime": 4.7653, |
|
"eval_samples_per_second": 817.997, |
|
"eval_steps_per_second": 6.505, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.1767764298093586e-05, |
|
"loss": 12.7591, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.1551126516464476e-05, |
|
"loss": 12.8519, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.133448873483536e-05, |
|
"loss": 12.7851, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_distillation_accuracy_counterfactual": 0.5118009235505387, |
|
"eval_distillation_accuracy_factual": 0.8137506413545408, |
|
"eval_distillation_f1_counterfactual": 0.43914802457549407, |
|
"eval_distillation_f1_factual": 0.8049341794937319, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5079527963057978, |
|
"eval_groundtruth_f1_counterfactual": 0.4407143465132375, |
|
"eval_groundtruth_f1_factual": 0.5835148341351625, |
|
"eval_icace_cosine": 0.5061963796615601, |
|
"eval_icace_l2": 0.6484118103981018, |
|
"eval_icace_normdiff": 0.42115285992622375, |
|
"eval_loss": 10.143532752990723, |
|
"eval_runtime": 4.6865, |
|
"eval_samples_per_second": 831.75, |
|
"eval_steps_per_second": 6.615, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.111785095320624e-05, |
|
"loss": 12.8646, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.0901213171577124e-05, |
|
"loss": 12.558, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_distillation_accuracy_counterfactual": 0.5307850179579271, |
|
"eval_distillation_accuracy_factual": 0.831965110312981, |
|
"eval_distillation_f1_counterfactual": 0.4484519611985586, |
|
"eval_distillation_f1_factual": 0.822168247394948, |
|
"eval_groundtruth_accuracy_counterfactual": 0.517444843509492, |
|
"eval_groundtruth_f1_counterfactual": 0.43872302047817147, |
|
"eval_groundtruth_f1_factual": 0.5800644441761371, |
|
"eval_icace_cosine": 0.5030389428138733, |
|
"eval_icace_l2": 0.632723867893219, |
|
"eval_icace_normdiff": 0.4054679870605469, |
|
"eval_loss": 9.84372615814209, |
|
"eval_runtime": 5.0033, |
|
"eval_samples_per_second": 779.083, |
|
"eval_steps_per_second": 6.196, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.068457538994801e-05, |
|
"loss": 12.828, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 4.04679376083189e-05, |
|
"loss": 12.3635, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.025129982668977e-05, |
|
"loss": 12.6661, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_distillation_accuracy_counterfactual": 0.5253976398152899, |
|
"eval_distillation_accuracy_factual": 0.8365828630066701, |
|
"eval_distillation_f1_counterfactual": 0.450312280094982, |
|
"eval_distillation_f1_factual": 0.8262625380550788, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5189840944073884, |
|
"eval_groundtruth_f1_counterfactual": 0.44867654499260745, |
|
"eval_groundtruth_f1_factual": 0.5750955137521894, |
|
"eval_icace_cosine": 0.5052414536476135, |
|
"eval_icace_l2": 0.6440022587776184, |
|
"eval_icace_normdiff": 0.41912850737571716, |
|
"eval_loss": 9.941903114318848, |
|
"eval_runtime": 4.5789, |
|
"eval_samples_per_second": 851.288, |
|
"eval_steps_per_second": 6.77, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.003466204506066e-05, |
|
"loss": 12.9309, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.9818024263431546e-05, |
|
"loss": 12.463, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_distillation_accuracy_counterfactual": 0.513340174448435, |
|
"eval_distillation_accuracy_factual": 0.8099025141097999, |
|
"eval_distillation_f1_counterfactual": 0.4451801537186667, |
|
"eval_distillation_f1_factual": 0.8018083383301378, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5135967162647511, |
|
"eval_groundtruth_f1_counterfactual": 0.45122773824298, |
|
"eval_groundtruth_f1_factual": 0.582459738995094, |
|
"eval_icace_cosine": 0.5057380199432373, |
|
"eval_icace_l2": 0.6424580216407776, |
|
"eval_icace_normdiff": 0.4136655926704407, |
|
"eval_loss": 10.070512771606445, |
|
"eval_runtime": 4.5868, |
|
"eval_samples_per_second": 849.823, |
|
"eval_steps_per_second": 6.758, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.960138648180243e-05, |
|
"loss": 12.6212, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.938474870017331e-05, |
|
"loss": 12.6221, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.91681109185442e-05, |
|
"loss": 12.5131, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_distillation_accuracy_counterfactual": 0.5192406362237044, |
|
"eval_distillation_accuracy_factual": 0.8127244740892765, |
|
"eval_distillation_f1_counterfactual": 0.4342093180083893, |
|
"eval_distillation_f1_factual": 0.8020837833036161, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5156490507952797, |
|
"eval_groundtruth_f1_counterfactual": 0.4371545167935388, |
|
"eval_groundtruth_f1_factual": 0.5865676169660375, |
|
"eval_icace_cosine": 0.5021520256996155, |
|
"eval_icace_l2": 0.6322320699691772, |
|
"eval_icace_normdiff": 0.40523087978363037, |
|
"eval_loss": 9.895723342895508, |
|
"eval_runtime": 4.6904, |
|
"eval_samples_per_second": 831.068, |
|
"eval_steps_per_second": 6.609, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.895147313691508e-05, |
|
"loss": 12.5027, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.873483535528597e-05, |
|
"loss": 12.7083, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_distillation_accuracy_counterfactual": 0.5192406362237044, |
|
"eval_distillation_accuracy_factual": 0.8114417650076963, |
|
"eval_distillation_f1_counterfactual": 0.4263386972710933, |
|
"eval_distillation_f1_factual": 0.7994083878081278, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5192406362237044, |
|
"eval_groundtruth_f1_counterfactual": 0.4329553891077026, |
|
"eval_groundtruth_f1_factual": 0.5912280932648739, |
|
"eval_icace_cosine": 0.5038776397705078, |
|
"eval_icace_l2": 0.6317997574806213, |
|
"eval_icace_normdiff": 0.39978867769241333, |
|
"eval_loss": 10.077560424804688, |
|
"eval_runtime": 5.0771, |
|
"eval_samples_per_second": 767.767, |
|
"eval_steps_per_second": 6.106, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.851819757365685e-05, |
|
"loss": 12.3779, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 3.830155979202773e-05, |
|
"loss": 12.1297, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.8084922010398616e-05, |
|
"loss": 12.2778, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_distillation_accuracy_counterfactual": 0.5076962544894817, |
|
"eval_distillation_accuracy_factual": 0.8093894304771678, |
|
"eval_distillation_f1_counterfactual": 0.42915135801638715, |
|
"eval_distillation_f1_factual": 0.7988387328754103, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5074397126731657, |
|
"eval_groundtruth_f1_counterfactual": 0.43467229220494497, |
|
"eval_groundtruth_f1_factual": 0.5863614417021632, |
|
"eval_icace_cosine": 0.5077196955680847, |
|
"eval_icace_l2": 0.6488126516342163, |
|
"eval_icace_normdiff": 0.4152773916721344, |
|
"eval_loss": 10.29930591583252, |
|
"eval_runtime": 4.7917, |
|
"eval_samples_per_second": 813.496, |
|
"eval_steps_per_second": 6.47, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.78682842287695e-05, |
|
"loss": 12.391, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.765164644714038e-05, |
|
"loss": 12.1049, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_distillation_accuracy_counterfactual": 0.5277065161621344, |
|
"eval_distillation_accuracy_factual": 0.8114417650076963, |
|
"eval_distillation_f1_counterfactual": 0.4379402413535344, |
|
"eval_distillation_f1_factual": 0.8009596923401426, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5184710107747563, |
|
"eval_groundtruth_f1_counterfactual": 0.4351158243705179, |
|
"eval_groundtruth_f1_factual": 0.5809362022910343, |
|
"eval_icace_cosine": 0.5041907429695129, |
|
"eval_icace_l2": 0.6319468021392822, |
|
"eval_icace_normdiff": 0.4021805226802826, |
|
"eval_loss": 10.031487464904785, |
|
"eval_runtime": 4.6011, |
|
"eval_samples_per_second": 847.186, |
|
"eval_steps_per_second": 6.737, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.7435008665511264e-05, |
|
"loss": 12.3362, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.7218370883882154e-05, |
|
"loss": 12.3522, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.700173310225303e-05, |
|
"loss": 12.2852, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_distillation_accuracy_counterfactual": 0.5089789635710621, |
|
"eval_distillation_accuracy_factual": 0.8093894304771678, |
|
"eval_distillation_f1_counterfactual": 0.4327873004366438, |
|
"eval_distillation_f1_factual": 0.7978534612810504, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5094920472036942, |
|
"eval_groundtruth_f1_counterfactual": 0.44147489438032766, |
|
"eval_groundtruth_f1_factual": 0.5875097166633956, |
|
"eval_icace_cosine": 0.5080219507217407, |
|
"eval_icace_l2": 0.6420192122459412, |
|
"eval_icace_normdiff": 0.40897807478904724, |
|
"eval_loss": 10.132452011108398, |
|
"eval_runtime": 4.7301, |
|
"eval_samples_per_second": 824.088, |
|
"eval_steps_per_second": 6.554, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.678509532062392e-05, |
|
"loss": 12.2919, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.65684575389948e-05, |
|
"loss": 12.2076, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_distillation_accuracy_counterfactual": 0.5082093381221139, |
|
"eval_distillation_accuracy_factual": 0.8052847614161108, |
|
"eval_distillation_f1_counterfactual": 0.421327115393484, |
|
"eval_distillation_f1_factual": 0.7938972956790581, |
|
"eval_groundtruth_accuracy_counterfactual": 0.508722421754746, |
|
"eval_groundtruth_f1_counterfactual": 0.4317767637232814, |
|
"eval_groundtruth_f1_factual": 0.5884182312452589, |
|
"eval_icace_cosine": 0.5102863907814026, |
|
"eval_icace_l2": 0.6493218541145325, |
|
"eval_icace_normdiff": 0.41481640934944153, |
|
"eval_loss": 10.397825241088867, |
|
"eval_runtime": 4.5642, |
|
"eval_samples_per_second": 854.035, |
|
"eval_steps_per_second": 6.792, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.6351819757365686e-05, |
|
"loss": 12.1793, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.613518197573657e-05, |
|
"loss": 11.9955, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.591854419410746e-05, |
|
"loss": 12.0414, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_distillation_accuracy_counterfactual": 0.5159055926115957, |
|
"eval_distillation_accuracy_factual": 0.8088763468445357, |
|
"eval_distillation_f1_counterfactual": 0.43928335663414925, |
|
"eval_distillation_f1_factual": 0.798007092912107, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5192406362237044, |
|
"eval_groundtruth_f1_counterfactual": 0.45071084200597555, |
|
"eval_groundtruth_f1_factual": 0.5881302910281572, |
|
"eval_icace_cosine": 0.5018934607505798, |
|
"eval_icace_l2": 0.6314676403999329, |
|
"eval_icace_normdiff": 0.4030630588531494, |
|
"eval_loss": 10.04751968383789, |
|
"eval_runtime": 4.5717, |
|
"eval_samples_per_second": 852.63, |
|
"eval_steps_per_second": 6.781, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.5701906412478334e-05, |
|
"loss": 12.0019, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.5485268630849224e-05, |
|
"loss": 11.8994, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_distillation_accuracy_counterfactual": 0.5120574653668548, |
|
"eval_distillation_accuracy_factual": 0.8001539250897897, |
|
"eval_distillation_f1_counterfactual": 0.43648655334155206, |
|
"eval_distillation_f1_factual": 0.791237363619484, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5035915854284249, |
|
"eval_groundtruth_f1_counterfactual": 0.4365541899982916, |
|
"eval_groundtruth_f1_factual": 0.5892108143561166, |
|
"eval_icace_cosine": 0.5103155970573425, |
|
"eval_icace_l2": 0.648665726184845, |
|
"eval_icace_normdiff": 0.4152573347091675, |
|
"eval_loss": 10.379837989807129, |
|
"eval_runtime": 4.5419, |
|
"eval_samples_per_second": 858.233, |
|
"eval_steps_per_second": 6.825, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.526863084922011e-05, |
|
"loss": 12.1835, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.505199306759099e-05, |
|
"loss": 12.1258, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.483535528596187e-05, |
|
"loss": 11.8758, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_distillation_accuracy_counterfactual": 0.5092355053873782, |
|
"eval_distillation_accuracy_factual": 0.8055413032324269, |
|
"eval_distillation_f1_counterfactual": 0.43572512517349055, |
|
"eval_distillation_f1_factual": 0.7959903986995227, |
|
"eval_groundtruth_accuracy_counterfactual": 0.512827090815803, |
|
"eval_groundtruth_f1_counterfactual": 0.44567177435185246, |
|
"eval_groundtruth_f1_factual": 0.5944081922689601, |
|
"eval_icace_cosine": 0.504929780960083, |
|
"eval_icace_l2": 0.6384508013725281, |
|
"eval_icace_normdiff": 0.404530793428421, |
|
"eval_loss": 10.173859596252441, |
|
"eval_runtime": 4.614, |
|
"eval_samples_per_second": 844.817, |
|
"eval_steps_per_second": 6.719, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.461871750433276e-05, |
|
"loss": 12.0309, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.440207972270364e-05, |
|
"loss": 11.738, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_distillation_accuracy_counterfactual": 0.5151359671626475, |
|
"eval_distillation_accuracy_factual": 0.8065674704976912, |
|
"eval_distillation_f1_counterfactual": 0.42557736492521486, |
|
"eval_distillation_f1_factual": 0.7962593487138783, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5207798871216008, |
|
"eval_groundtruth_f1_counterfactual": 0.4400972677897436, |
|
"eval_groundtruth_f1_factual": 0.5898699262629978, |
|
"eval_icace_cosine": 0.504587709903717, |
|
"eval_icace_l2": 0.6404756903648376, |
|
"eval_icace_normdiff": 0.4062090516090393, |
|
"eval_loss": 10.268754005432129, |
|
"eval_runtime": 4.5043, |
|
"eval_samples_per_second": 865.395, |
|
"eval_steps_per_second": 6.882, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.418544194107453e-05, |
|
"loss": 11.847, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.396880415944541e-05, |
|
"loss": 11.7855, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.3752166377816294e-05, |
|
"loss": 12.2395, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_distillation_accuracy_counterfactual": 0.5076962544894817, |
|
"eval_distillation_accuracy_factual": 0.793227296049256, |
|
"eval_distillation_f1_counterfactual": 0.42559353759463275, |
|
"eval_distillation_f1_factual": 0.7832657828168742, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5105182144689584, |
|
"eval_groundtruth_f1_counterfactual": 0.4380208778877491, |
|
"eval_groundtruth_f1_factual": 0.5829268254157943, |
|
"eval_icace_cosine": 0.508986234664917, |
|
"eval_icace_l2": 0.6547452211380005, |
|
"eval_icace_normdiff": 0.4193916618824005, |
|
"eval_loss": 10.556840896606445, |
|
"eval_runtime": 4.5426, |
|
"eval_samples_per_second": 858.107, |
|
"eval_steps_per_second": 6.824, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 3.353552859618718e-05, |
|
"loss": 12.2207, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.331889081455806e-05, |
|
"loss": 12.2085, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_distillation_accuracy_counterfactual": 0.5177013853258081, |
|
"eval_distillation_accuracy_factual": 0.7978450487429452, |
|
"eval_distillation_f1_counterfactual": 0.4343412617259876, |
|
"eval_distillation_f1_factual": 0.7885736470487629, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5241149307337096, |
|
"eval_groundtruth_f1_counterfactual": 0.44795498771470293, |
|
"eval_groundtruth_f1_factual": 0.5848176602353289, |
|
"eval_icace_cosine": 0.5015928149223328, |
|
"eval_icace_l2": 0.6313996911048889, |
|
"eval_icace_normdiff": 0.400736927986145, |
|
"eval_loss": 10.176055908203125, |
|
"eval_runtime": 6.0094, |
|
"eval_samples_per_second": 648.649, |
|
"eval_steps_per_second": 5.159, |
|
"step": 1550 |
|
} |
|
], |
|
"max_steps": 4616, |
|
"num_train_epochs": 8, |
|
"total_flos": 234153553766400.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|