|
{ |
|
"best_metric": 0.49332907795906067, |
|
"best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-66__lstm/checkpoint-650", |
|
"epoch": 1.1149228130360207, |
|
"global_step": 650, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9783362218370885e-05, |
|
"loss": 43.289, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.956672443674177e-05, |
|
"loss": 39.7742, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_distillation_accuracy_counterfactual": 0.3876346844535659, |
|
"eval_distillation_accuracy_factual": 0.7116469984607491, |
|
"eval_distillation_f1_counterfactual": 0.36789273743176754, |
|
"eval_distillation_f1_factual": 0.706355718089049, |
|
"eval_groundtruth_accuracy_counterfactual": 0.3170856849666496, |
|
"eval_groundtruth_f1_counterfactual": 0.30061734733599244, |
|
"eval_groundtruth_f1_factual": 0.5309938705776336, |
|
"eval_icace_cosine": 0.7443665266036987, |
|
"eval_icace_l2": 0.8025880455970764, |
|
"eval_icace_normdiff": 0.6179075837135315, |
|
"eval_loss": 21.722654342651367, |
|
"eval_runtime": 4.6356, |
|
"eval_samples_per_second": 840.887, |
|
"eval_steps_per_second": 6.687, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.935008665511265e-05, |
|
"loss": 36.1211, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.913344887348354e-05, |
|
"loss": 33.0051, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.891681109185442e-05, |
|
"loss": 29.0866, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_distillation_accuracy_counterfactual": 0.4179066187788609, |
|
"eval_distillation_accuracy_factual": 0.7213955874807594, |
|
"eval_distillation_f1_counterfactual": 0.4011779697256409, |
|
"eval_distillation_f1_factual": 0.7130234220148569, |
|
"eval_groundtruth_accuracy_counterfactual": 0.36198050282196, |
|
"eval_groundtruth_f1_counterfactual": 0.34119540585580543, |
|
"eval_groundtruth_f1_factual": 0.533506870286257, |
|
"eval_icace_cosine": 0.6045525074005127, |
|
"eval_icace_l2": 0.7391046285629272, |
|
"eval_icace_normdiff": 0.5667964220046997, |
|
"eval_loss": 15.981736183166504, |
|
"eval_runtime": 4.6279, |
|
"eval_samples_per_second": 842.278, |
|
"eval_steps_per_second": 6.698, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.8700173310225307e-05, |
|
"loss": 24.2452, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.848353552859619e-05, |
|
"loss": 20.9734, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_distillation_accuracy_counterfactual": 0.4415084658799384, |
|
"eval_distillation_accuracy_factual": 0.6688045151359672, |
|
"eval_distillation_f1_counterfactual": 0.39743600677420876, |
|
"eval_distillation_f1_factual": 0.6386265115556637, |
|
"eval_groundtruth_accuracy_counterfactual": 0.42432016418676244, |
|
"eval_groundtruth_f1_counterfactual": 0.37094685868809796, |
|
"eval_groundtruth_f1_factual": 0.49392886445649786, |
|
"eval_icace_cosine": 0.5455384254455566, |
|
"eval_icace_l2": 0.6967277526855469, |
|
"eval_icace_normdiff": 0.46749597787857056, |
|
"eval_loss": 12.647016525268555, |
|
"eval_runtime": 4.5013, |
|
"eval_samples_per_second": 865.969, |
|
"eval_steps_per_second": 6.887, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.826689774696707e-05, |
|
"loss": 19.1703, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8050259965337955e-05, |
|
"loss": 17.6579, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.7833622183708845e-05, |
|
"loss": 17.1812, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_distillation_accuracy_counterfactual": 0.46587993842996406, |
|
"eval_distillation_accuracy_factual": 0.7129297075423294, |
|
"eval_distillation_f1_counterfactual": 0.42585340396044125, |
|
"eval_distillation_f1_factual": 0.6801681890460928, |
|
"eval_groundtruth_accuracy_counterfactual": 0.44766546947152386, |
|
"eval_groundtruth_f1_counterfactual": 0.4011001985251402, |
|
"eval_groundtruth_f1_factual": 0.5200271047795778, |
|
"eval_icace_cosine": 0.5150377154350281, |
|
"eval_icace_l2": 0.6690810322761536, |
|
"eval_icace_normdiff": 0.4451918601989746, |
|
"eval_loss": 11.362848281860352, |
|
"eval_runtime": 4.5371, |
|
"eval_samples_per_second": 859.147, |
|
"eval_steps_per_second": 6.833, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.761698440207972e-05, |
|
"loss": 16.2793, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.740034662045061e-05, |
|
"loss": 15.8969, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_distillation_accuracy_counterfactual": 0.48255515649050795, |
|
"eval_distillation_accuracy_factual": 0.7508978963571062, |
|
"eval_distillation_f1_counterfactual": 0.4392245827288441, |
|
"eval_distillation_f1_factual": 0.7287650814209791, |
|
"eval_groundtruth_accuracy_counterfactual": 0.4574140584915341, |
|
"eval_groundtruth_f1_counterfactual": 0.4058681312407287, |
|
"eval_groundtruth_f1_factual": 0.5340988634733079, |
|
"eval_icace_cosine": 0.5094192028045654, |
|
"eval_icace_l2": 0.6640841960906982, |
|
"eval_icace_normdiff": 0.43985918164253235, |
|
"eval_loss": 11.062640190124512, |
|
"eval_runtime": 4.5851, |
|
"eval_samples_per_second": 850.14, |
|
"eval_steps_per_second": 6.761, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.7183708838821494e-05, |
|
"loss": 15.567, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.6967071057192376e-05, |
|
"loss": 15.5899, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.675043327556326e-05, |
|
"loss": 15.4491, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_distillation_accuracy_counterfactual": 0.49589533093894306, |
|
"eval_distillation_accuracy_factual": 0.7701385325808107, |
|
"eval_distillation_f1_counterfactual": 0.4490615827265524, |
|
"eval_distillation_f1_factual": 0.7488952781611599, |
|
"eval_groundtruth_accuracy_counterfactual": 0.47614161108260644, |
|
"eval_groundtruth_f1_counterfactual": 0.4213178039314829, |
|
"eval_groundtruth_f1_factual": 0.5320832145764172, |
|
"eval_icace_cosine": 0.5026495456695557, |
|
"eval_icace_l2": 0.6473169922828674, |
|
"eval_icace_normdiff": 0.4321483075618744, |
|
"eval_loss": 10.44105339050293, |
|
"eval_runtime": 4.6207, |
|
"eval_samples_per_second": 843.602, |
|
"eval_steps_per_second": 6.709, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.653379549393415e-05, |
|
"loss": 15.2629, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.6317157712305025e-05, |
|
"loss": 14.8008, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_distillation_accuracy_counterfactual": 0.50846587993843, |
|
"eval_distillation_accuracy_factual": 0.7747562852744997, |
|
"eval_distillation_f1_counterfactual": 0.45682754564452965, |
|
"eval_distillation_f1_factual": 0.7559415583306467, |
|
"eval_groundtruth_accuracy_counterfactual": 0.49461262185736277, |
|
"eval_groundtruth_f1_counterfactual": 0.4326301439786258, |
|
"eval_groundtruth_f1_factual": 0.5477637349101722, |
|
"eval_icace_cosine": 0.5030019283294678, |
|
"eval_icace_l2": 0.6434506177902222, |
|
"eval_icace_normdiff": 0.4238852560520172, |
|
"eval_loss": 10.315159797668457, |
|
"eval_runtime": 4.5363, |
|
"eval_samples_per_second": 859.299, |
|
"eval_steps_per_second": 6.834, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6100519930675915e-05, |
|
"loss": 14.6876, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.58838821490468e-05, |
|
"loss": 14.8699, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.566724436741768e-05, |
|
"loss": 14.3057, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_distillation_accuracy_counterfactual": 0.5179579271421242, |
|
"eval_distillation_accuracy_factual": 0.7955361723961005, |
|
"eval_distillation_f1_counterfactual": 0.4694720290698628, |
|
"eval_distillation_f1_factual": 0.7787731275919787, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5007696254489482, |
|
"eval_groundtruth_f1_counterfactual": 0.44241001562042664, |
|
"eval_groundtruth_f1_factual": 0.5479747801171813, |
|
"eval_icace_cosine": 0.4990290403366089, |
|
"eval_icace_l2": 0.6423947811126709, |
|
"eval_icace_normdiff": 0.4277788996696472, |
|
"eval_loss": 10.05081558227539, |
|
"eval_runtime": 4.5234, |
|
"eval_samples_per_second": 861.75, |
|
"eval_steps_per_second": 6.853, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.5450606585788563e-05, |
|
"loss": 14.2025, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.5233968804159446e-05, |
|
"loss": 14.1203, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_distillation_accuracy_counterfactual": 0.5023088763468445, |
|
"eval_distillation_accuracy_factual": 0.8127244740892765, |
|
"eval_distillation_f1_counterfactual": 0.4656964220693382, |
|
"eval_distillation_f1_factual": 0.8006616317977919, |
|
"eval_groundtruth_accuracy_counterfactual": 0.4789635710620831, |
|
"eval_groundtruth_f1_counterfactual": 0.4346768246991469, |
|
"eval_groundtruth_f1_factual": 0.5734933636976969, |
|
"eval_icace_cosine": 0.5007656216621399, |
|
"eval_icace_l2": 0.6549262404441833, |
|
"eval_icace_normdiff": 0.4307872951030731, |
|
"eval_loss": 10.223078727722168, |
|
"eval_runtime": 4.7741, |
|
"eval_samples_per_second": 816.492, |
|
"eval_steps_per_second": 6.493, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.501733102253033e-05, |
|
"loss": 14.0055, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.480069324090121e-05, |
|
"loss": 14.1732, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.45840554592721e-05, |
|
"loss": 13.7579, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_distillation_accuracy_counterfactual": 0.5017957927142124, |
|
"eval_distillation_accuracy_factual": 0.8217034376603386, |
|
"eval_distillation_f1_counterfactual": 0.45110748338345036, |
|
"eval_distillation_f1_factual": 0.8120033725947984, |
|
"eval_groundtruth_accuracy_counterfactual": 0.491277578245254, |
|
"eval_groundtruth_f1_counterfactual": 0.4306026913746316, |
|
"eval_groundtruth_f1_factual": 0.5751269063395339, |
|
"eval_icace_cosine": 0.5004109144210815, |
|
"eval_icace_l2": 0.6552226543426514, |
|
"eval_icace_normdiff": 0.43052297830581665, |
|
"eval_loss": 10.176661491394043, |
|
"eval_runtime": 8.7551, |
|
"eval_samples_per_second": 445.225, |
|
"eval_steps_per_second": 3.541, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.436741767764298e-05, |
|
"loss": 13.774, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.415077989601387e-05, |
|
"loss": 13.8, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_distillation_accuracy_counterfactual": 0.5059004617752694, |
|
"eval_distillation_accuracy_factual": 0.8234992303745511, |
|
"eval_distillation_f1_counterfactual": 0.4512025853019249, |
|
"eval_distillation_f1_factual": 0.8135629436467913, |
|
"eval_groundtruth_accuracy_counterfactual": 0.49461262185736277, |
|
"eval_groundtruth_f1_counterfactual": 0.43348852199044147, |
|
"eval_groundtruth_f1_factual": 0.5789128560578634, |
|
"eval_icace_cosine": 0.498872309923172, |
|
"eval_icace_l2": 0.6550716757774353, |
|
"eval_icace_normdiff": 0.42937180399894714, |
|
"eval_loss": 10.230228424072266, |
|
"eval_runtime": 7.836, |
|
"eval_samples_per_second": 497.446, |
|
"eval_steps_per_second": 3.956, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.393414211438475e-05, |
|
"loss": 13.5074, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.371750433275563e-05, |
|
"loss": 13.6928, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.3500866551126516e-05, |
|
"loss": 13.7141, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_distillation_accuracy_counterfactual": 0.5179579271421242, |
|
"eval_distillation_accuracy_factual": 0.8306824012314007, |
|
"eval_distillation_f1_counterfactual": 0.45833699531570044, |
|
"eval_distillation_f1_factual": 0.8222217849303248, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5033350436121088, |
|
"eval_groundtruth_f1_counterfactual": 0.433570321967066, |
|
"eval_groundtruth_f1_factual": 0.5763197755988125, |
|
"eval_icace_cosine": 0.49790823459625244, |
|
"eval_icace_l2": 0.6532407402992249, |
|
"eval_icace_normdiff": 0.4311390519142151, |
|
"eval_loss": 10.064295768737793, |
|
"eval_runtime": 4.6211, |
|
"eval_samples_per_second": 843.531, |
|
"eval_steps_per_second": 6.708, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.3284228769497406e-05, |
|
"loss": 13.3673, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.306759098786828e-05, |
|
"loss": 13.4799, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_distillation_accuracy_counterfactual": 0.5187275525910724, |
|
"eval_distillation_accuracy_factual": 0.8283735248845562, |
|
"eval_distillation_f1_counterfactual": 0.46382143672624687, |
|
"eval_distillation_f1_factual": 0.8207720593293605, |
|
"eval_groundtruth_accuracy_counterfactual": 0.5064135454079015, |
|
"eval_groundtruth_f1_counterfactual": 0.4409147466060839, |
|
"eval_groundtruth_f1_factual": 0.5772188705192105, |
|
"eval_icace_cosine": 0.49332907795906067, |
|
"eval_icace_l2": 0.6394243240356445, |
|
"eval_icace_normdiff": 0.4169619083404541, |
|
"eval_loss": 9.886384963989258, |
|
"eval_runtime": 4.5468, |
|
"eval_samples_per_second": 857.313, |
|
"eval_steps_per_second": 6.818, |
|
"step": 650 |
|
} |
|
], |
|
"max_steps": 4616, |
|
"num_train_epochs": 8, |
|
"total_flos": 98183903155200.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|