{ "best_metric": 0.4927329123020172, "best_model_checkpoint": "model_output/e2e_opentable_5_way__approximate__0-shot__seed-77__lstm/checkpoint-900", "epoch": 1.5437392795883362, "global_step": 900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 4.9783362218370885e-05, "loss": 44.1562, "step": 20 }, { "epoch": 0.07, "learning_rate": 4.956672443674177e-05, "loss": 40.0981, "step": 40 }, { "epoch": 0.09, "eval_distillation_accuracy_counterfactual": 0.40533606977937403, "eval_distillation_accuracy_factual": 0.7136993329912775, "eval_distillation_f1_counterfactual": 0.37584526793260814, "eval_distillation_f1_factual": 0.6903688842294228, "eval_groundtruth_accuracy_counterfactual": 0.33119548486403283, "eval_groundtruth_f1_counterfactual": 0.30050094417395845, "eval_groundtruth_f1_factual": 0.5105855093093573, "eval_icace_cosine": 0.7006840109825134, "eval_icace_l2": 0.7841421961784363, "eval_icace_normdiff": 0.6143233180046082, "eval_loss": 21.405780792236328, "eval_runtime": 4.5232, "eval_samples_per_second": 861.775, "eval_steps_per_second": 6.854, "step": 50 }, { "epoch": 0.1, "learning_rate": 4.935008665511265e-05, "loss": 36.4728, "step": 60 }, { "epoch": 0.14, "learning_rate": 4.913344887348354e-05, "loss": 33.5128, "step": 80 }, { "epoch": 0.17, "learning_rate": 4.891681109185442e-05, "loss": 29.1246, "step": 100 }, { "epoch": 0.17, "eval_distillation_accuracy_counterfactual": 0.4304771677783479, "eval_distillation_accuracy_factual": 0.7060030785017958, "eval_distillation_f1_counterfactual": 0.3874499036502149, "eval_distillation_f1_factual": 0.672147939569431, "eval_groundtruth_accuracy_counterfactual": 0.3599281682914315, "eval_groundtruth_f1_counterfactual": 0.317370542081978, "eval_groundtruth_f1_factual": 0.5138084441251398, "eval_icace_cosine": 0.5940763354301453, "eval_icace_l2": 0.7375399470329285, "eval_icace_normdiff": 0.5838260650634766, "eval_loss": 15.98747730255127, "eval_runtime": 4.638, "eval_samples_per_second": 840.44, "eval_steps_per_second": 6.684, "step": 100 }, { "epoch": 0.21, "learning_rate": 4.8700173310225307e-05, "loss": 25.2302, "step": 120 }, { "epoch": 0.24, "learning_rate": 4.848353552859619e-05, "loss": 21.5776, "step": 140 }, { "epoch": 0.26, "eval_distillation_accuracy_counterfactual": 0.4620318111852232, "eval_distillation_accuracy_factual": 0.6246793227296049, "eval_distillation_f1_counterfactual": 0.3998229577563853, "eval_distillation_f1_factual": 0.5809545209677998, "eval_groundtruth_accuracy_counterfactual": 0.4220112878399179, "eval_groundtruth_f1_counterfactual": 0.3560556229162239, "eval_groundtruth_f1_factual": 0.4660399182176905, "eval_icace_cosine": 0.541000485420227, "eval_icace_l2": 0.6904007792472839, "eval_icace_normdiff": 0.4991605877876282, "eval_loss": 12.918840408325195, "eval_runtime": 4.7423, "eval_samples_per_second": 821.967, "eval_steps_per_second": 6.537, "step": 150 }, { "epoch": 0.27, "learning_rate": 4.826689774696707e-05, "loss": 19.3343, "step": 160 }, { "epoch": 0.31, "learning_rate": 4.8050259965337955e-05, "loss": 17.9944, "step": 180 }, { "epoch": 0.34, "learning_rate": 4.7833622183708845e-05, "loss": 17.4913, "step": 200 }, { "epoch": 0.34, "eval_distillation_accuracy_counterfactual": 0.48512057465366853, "eval_distillation_accuracy_factual": 0.6765007696254489, "eval_distillation_f1_counterfactual": 0.4355374039109995, "eval_distillation_f1_factual": 0.648085081022865, "eval_groundtruth_accuracy_counterfactual": 0.45459209851205745, "eval_groundtruth_f1_counterfactual": 0.3992732299014231, "eval_groundtruth_f1_factual": 0.5315401786684271, "eval_icace_cosine": 0.5196343660354614, "eval_icace_l2": 0.6643721461296082, "eval_icace_normdiff": 0.46649259328842163, "eval_loss": 11.92587947845459, "eval_runtime": 4.7274, "eval_samples_per_second": 824.557, "eval_steps_per_second": 6.558, "step": 200 }, { "epoch": 0.38, "learning_rate": 4.761698440207972e-05, "loss": 16.9263, "step": 220 }, { "epoch": 0.41, "learning_rate": 4.740034662045061e-05, "loss": 16.4577, "step": 240 }, { "epoch": 0.43, "eval_distillation_accuracy_counterfactual": 0.4953822473063109, "eval_distillation_accuracy_factual": 0.6990764494612622, "eval_distillation_f1_counterfactual": 0.4349405670052812, "eval_distillation_f1_factual": 0.6754484776025494, "eval_groundtruth_accuracy_counterfactual": 0.46587993842996406, "eval_groundtruth_f1_counterfactual": 0.4007888276500159, "eval_groundtruth_f1_factual": 0.537290774599058, "eval_icace_cosine": 0.5181905627250671, "eval_icace_l2": 0.6551650166511536, "eval_icace_normdiff": 0.4527762532234192, "eval_loss": 11.470577239990234, "eval_runtime": 4.5537, "eval_samples_per_second": 856.014, "eval_steps_per_second": 6.808, "step": 250 }, { "epoch": 0.45, "learning_rate": 4.7183708838821494e-05, "loss": 16.175, "step": 260 }, { "epoch": 0.48, "learning_rate": 4.6967071057192376e-05, "loss": 16.2998, "step": 280 }, { "epoch": 0.51, "learning_rate": 4.675043327556326e-05, "loss": 15.9865, "step": 300 }, { "epoch": 0.51, "eval_distillation_accuracy_counterfactual": 0.49384299640841456, "eval_distillation_accuracy_factual": 0.7421754746023602, "eval_distillation_f1_counterfactual": 0.43651743845854823, "eval_distillation_f1_factual": 0.7241733737284982, "eval_groundtruth_accuracy_counterfactual": 0.4674191893278604, "eval_groundtruth_f1_counterfactual": 0.4069323134051392, "eval_groundtruth_f1_factual": 0.5441331322978117, "eval_icace_cosine": 0.5107740163803101, "eval_icace_l2": 0.652337908744812, "eval_icace_normdiff": 0.44736814498901367, "eval_loss": 11.233296394348145, "eval_runtime": 7.2912, "eval_samples_per_second": 534.616, "eval_steps_per_second": 4.252, "step": 300 }, { "epoch": 0.55, "learning_rate": 4.653379549393415e-05, "loss": 15.9314, "step": 320 }, { "epoch": 0.58, "learning_rate": 4.6317157712305025e-05, "loss": 15.505, "step": 340 }, { "epoch": 0.6, "eval_distillation_accuracy_counterfactual": 0.5, "eval_distillation_accuracy_factual": 0.7637249871729092, "eval_distillation_f1_counterfactual": 0.44117078829235296, "eval_distillation_f1_factual": 0.7500667273075408, "eval_groundtruth_accuracy_counterfactual": 0.4879425346331452, "eval_groundtruth_f1_counterfactual": 0.42134689982561896, "eval_groundtruth_f1_factual": 0.562706144496911, "eval_icace_cosine": 0.5107312798500061, "eval_icace_l2": 0.6565550565719604, "eval_icace_normdiff": 0.4413979947566986, "eval_loss": 11.069193840026855, "eval_runtime": 6.2756, "eval_samples_per_second": 621.137, "eval_steps_per_second": 4.94, "step": 350 }, { "epoch": 0.62, "learning_rate": 4.6100519930675915e-05, "loss": 15.3031, "step": 360 }, { "epoch": 0.65, "learning_rate": 4.58838821490468e-05, "loss": 15.4206, "step": 380 }, { "epoch": 0.69, "learning_rate": 4.566724436741768e-05, "loss": 14.8304, "step": 400 }, { "epoch": 0.69, "eval_distillation_accuracy_counterfactual": 0.508722421754746, "eval_distillation_accuracy_factual": 0.771421241662391, "eval_distillation_f1_counterfactual": 0.44238377528971906, "eval_distillation_f1_factual": 0.7522621473444253, "eval_groundtruth_accuracy_counterfactual": 0.49923037455105185, "eval_groundtruth_f1_counterfactual": 0.4273841467316483, "eval_groundtruth_f1_factual": 0.5609154619878947, "eval_icace_cosine": 0.5019481182098389, "eval_icace_l2": 0.6451992988586426, "eval_icace_normdiff": 0.4310374855995178, "eval_loss": 10.662566184997559, "eval_runtime": 4.5814, "eval_samples_per_second": 850.836, "eval_steps_per_second": 6.767, "step": 400 }, { "epoch": 0.72, "learning_rate": 4.5450606585788563e-05, "loss": 14.9398, "step": 420 }, { "epoch": 0.75, "learning_rate": 4.5233968804159446e-05, "loss": 14.8756, "step": 440 }, { "epoch": 0.77, "eval_distillation_accuracy_counterfactual": 0.5071831708568497, "eval_distillation_accuracy_factual": 0.7806567470497691, "eval_distillation_f1_counterfactual": 0.4509914585662811, "eval_distillation_f1_factual": 0.7726447146201695, "eval_groundtruth_accuracy_counterfactual": 0.49640841457157514, "eval_groundtruth_f1_counterfactual": 0.4369358063962636, "eval_groundtruth_f1_factual": 0.5701105764409996, "eval_icace_cosine": 0.505915641784668, "eval_icace_l2": 0.6462938189506531, "eval_icace_normdiff": 0.4340927302837372, "eval_loss": 10.489557266235352, "eval_runtime": 4.6204, "eval_samples_per_second": 843.645, "eval_steps_per_second": 6.709, "step": 450 }, { "epoch": 0.79, "learning_rate": 4.501733102253033e-05, "loss": 14.6303, "step": 460 }, { "epoch": 0.82, "learning_rate": 4.480069324090121e-05, "loss": 14.6318, "step": 480 }, { "epoch": 0.86, "learning_rate": 4.45840554592721e-05, "loss": 14.3026, "step": 500 }, { "epoch": 0.86, "eval_distillation_accuracy_counterfactual": 0.512827090815803, "eval_distillation_accuracy_factual": 0.8009235505387378, "eval_distillation_f1_counterfactual": 0.45566955055093156, "eval_distillation_f1_factual": 0.7939347571605715, "eval_groundtruth_accuracy_counterfactual": 0.4987172909184197, "eval_groundtruth_f1_counterfactual": 0.4364792426591563, "eval_groundtruth_f1_factual": 0.5727123146480367, "eval_icace_cosine": 0.4989969730377197, "eval_icace_l2": 0.6415925025939941, "eval_icace_normdiff": 0.4321776330471039, "eval_loss": 10.163902282714844, "eval_runtime": 4.5879, "eval_samples_per_second": 849.631, "eval_steps_per_second": 6.757, "step": 500 }, { "epoch": 0.89, "learning_rate": 4.436741767764298e-05, "loss": 14.249, "step": 520 }, { "epoch": 0.93, "learning_rate": 4.415077989601387e-05, "loss": 14.2543, "step": 540 }, { "epoch": 0.94, "eval_distillation_accuracy_counterfactual": 0.5005130836326321, "eval_distillation_accuracy_factual": 0.793227296049256, "eval_distillation_f1_counterfactual": 0.4392802659881442, "eval_distillation_f1_factual": 0.7820837225108551, "eval_groundtruth_accuracy_counterfactual": 0.4935864545920985, "eval_groundtruth_f1_counterfactual": 0.4259921068619771, "eval_groundtruth_f1_factual": 0.568994158851058, "eval_icace_cosine": 0.5032749772071838, "eval_icace_l2": 0.6510319709777832, "eval_icace_normdiff": 0.4337131381034851, "eval_loss": 10.366103172302246, "eval_runtime": 4.5676, "eval_samples_per_second": 853.4, "eval_steps_per_second": 6.787, "step": 550 }, { "epoch": 0.96, "learning_rate": 4.393414211438475e-05, "loss": 14.1474, "step": 560 }, { "epoch": 0.99, "learning_rate": 4.371750433275563e-05, "loss": 13.8257, "step": 580 }, { "epoch": 1.03, "learning_rate": 4.3500866551126516e-05, "loss": 13.6754, "step": 600 }, { "epoch": 1.03, "eval_distillation_accuracy_counterfactual": 0.5151359671626475, "eval_distillation_accuracy_factual": 0.8099025141097999, "eval_distillation_f1_counterfactual": 0.4455005046191058, "eval_distillation_f1_factual": 0.8010898247683294, "eval_groundtruth_accuracy_counterfactual": 0.5064135454079015, "eval_groundtruth_f1_counterfactual": 0.43123052906317394, "eval_groundtruth_f1_factual": 0.5698126916133328, "eval_icace_cosine": 0.500103235244751, "eval_icace_l2": 0.6384768486022949, "eval_icace_normdiff": 0.42454108595848083, "eval_loss": 10.087526321411133, "eval_runtime": 4.6814, "eval_samples_per_second": 832.665, "eval_steps_per_second": 6.622, "step": 600 }, { "epoch": 1.06, "learning_rate": 4.3284228769497406e-05, "loss": 13.7083, "step": 620 }, { "epoch": 1.1, "learning_rate": 4.306759098786828e-05, "loss": 13.479, "step": 640 }, { "epoch": 1.11, "eval_distillation_accuracy_counterfactual": 0.5071831708568497, "eval_distillation_accuracy_factual": 0.8114417650076963, "eval_distillation_f1_counterfactual": 0.4452735168036456, "eval_distillation_f1_factual": 0.8041390027772044, "eval_groundtruth_accuracy_counterfactual": 0.5023088763468445, "eval_groundtruth_f1_counterfactual": 0.43961184729665465, "eval_groundtruth_f1_factual": 0.5743904963335431, "eval_icace_cosine": 0.4984728991985321, "eval_icace_l2": 0.6454970240592957, "eval_icace_normdiff": 0.4303584694862366, "eval_loss": 10.136197090148926, "eval_runtime": 4.5364, "eval_samples_per_second": 859.262, "eval_steps_per_second": 6.834, "step": 650 }, { "epoch": 1.13, "learning_rate": 4.285095320623917e-05, "loss": 13.5985, "step": 660 }, { "epoch": 1.17, "learning_rate": 4.2634315424610055e-05, "loss": 13.5509, "step": 680 }, { "epoch": 1.2, "learning_rate": 4.241767764298094e-05, "loss": 13.5415, "step": 700 }, { "epoch": 1.2, "eval_distillation_accuracy_counterfactual": 0.508722421754746, "eval_distillation_accuracy_factual": 0.8052847614161108, "eval_distillation_f1_counterfactual": 0.442105002572385, "eval_distillation_f1_factual": 0.7987637622171455, "eval_groundtruth_accuracy_counterfactual": 0.5030785017957927, "eval_groundtruth_f1_counterfactual": 0.43495956928902413, "eval_groundtruth_f1_factual": 0.5712315287508367, "eval_icace_cosine": 0.49780699610710144, "eval_icace_l2": 0.6452174782752991, "eval_icace_normdiff": 0.43159618973731995, "eval_loss": 10.0033597946167, "eval_runtime": 4.6939, "eval_samples_per_second": 830.434, "eval_steps_per_second": 6.604, "step": 700 }, { "epoch": 1.23, "learning_rate": 4.220103986135182e-05, "loss": 13.4033, "step": 720 }, { "epoch": 1.27, "learning_rate": 4.198440207972271e-05, "loss": 13.1605, "step": 740 }, { "epoch": 1.29, "eval_distillation_accuracy_counterfactual": 0.5135967162647511, "eval_distillation_accuracy_factual": 0.8155464340687532, "eval_distillation_f1_counterfactual": 0.44334782704039266, "eval_distillation_f1_factual": 0.8087207034484185, "eval_groundtruth_accuracy_counterfactual": 0.5100051308363264, "eval_groundtruth_f1_counterfactual": 0.43833016787486423, "eval_groundtruth_f1_factual": 0.5747378526464983, "eval_icace_cosine": 0.49590805172920227, "eval_icace_l2": 0.6382647752761841, "eval_icace_normdiff": 0.4257577061653137, "eval_loss": 9.923479080200195, "eval_runtime": 4.7429, "eval_samples_per_second": 821.861, "eval_steps_per_second": 6.536, "step": 750 }, { "epoch": 1.3, "learning_rate": 4.1767764298093586e-05, "loss": 13.2256, "step": 760 }, { "epoch": 1.34, "learning_rate": 4.1551126516464476e-05, "loss": 13.2255, "step": 780 }, { "epoch": 1.37, "learning_rate": 4.133448873483536e-05, "loss": 13.3642, "step": 800 }, { "epoch": 1.37, "eval_distillation_accuracy_counterfactual": 0.5110312981015905, "eval_distillation_accuracy_factual": 0.8170856849666496, "eval_distillation_f1_counterfactual": 0.450487562425549, "eval_distillation_f1_factual": 0.8126073239205303, "eval_groundtruth_accuracy_counterfactual": 0.5107747562852745, "eval_groundtruth_f1_counterfactual": 0.4463667818812921, "eval_groundtruth_f1_factual": 0.5773838596268437, "eval_icace_cosine": 0.4975035488605499, "eval_icace_l2": 0.6422339081764221, "eval_icace_normdiff": 0.4249454140663147, "eval_loss": 10.02309799194336, "eval_runtime": 4.6282, "eval_samples_per_second": 842.226, "eval_steps_per_second": 6.698, "step": 800 }, { "epoch": 1.41, "learning_rate": 4.111785095320624e-05, "loss": 13.4977, "step": 820 }, { "epoch": 1.44, "learning_rate": 4.0901213171577124e-05, "loss": 13.244, "step": 840 }, { "epoch": 1.46, "eval_distillation_accuracy_counterfactual": 0.5118009235505387, "eval_distillation_accuracy_factual": 0.8129810159055926, "eval_distillation_f1_counterfactual": 0.44921794601695686, "eval_distillation_f1_factual": 0.8068472467220802, "eval_groundtruth_accuracy_counterfactual": 0.504617752693689, "eval_groundtruth_f1_counterfactual": 0.4373446550684491, "eval_groundtruth_f1_factual": 0.5739812608335962, "eval_icace_cosine": 0.49849507212638855, "eval_icace_l2": 0.6440720558166504, "eval_icace_normdiff": 0.4274795949459076, "eval_loss": 10.06081485748291, "eval_runtime": 4.5658, "eval_samples_per_second": 853.743, "eval_steps_per_second": 6.79, "step": 850 }, { "epoch": 1.48, "learning_rate": 4.068457538994801e-05, "loss": 12.8572, "step": 860 }, { "epoch": 1.51, "learning_rate": 4.04679376083189e-05, "loss": 13.1029, "step": 880 }, { "epoch": 1.54, "learning_rate": 4.025129982668977e-05, "loss": 13.132, "step": 900 }, { "epoch": 1.54, "eval_distillation_accuracy_counterfactual": 0.5189840944073884, "eval_distillation_accuracy_factual": 0.8196511031298102, "eval_distillation_f1_counterfactual": 0.4513931354514297, "eval_distillation_f1_factual": 0.815584477795333, "eval_groundtruth_accuracy_counterfactual": 0.513083632632119, "eval_groundtruth_f1_counterfactual": 0.4442783591730798, "eval_groundtruth_f1_factual": 0.5677589854844752, "eval_icace_cosine": 0.4927329123020172, "eval_icace_l2": 0.6366254091262817, "eval_icace_normdiff": 0.42217281460762024, "eval_loss": 9.926761627197266, "eval_runtime": 4.5591, "eval_samples_per_second": 854.999, "eval_steps_per_second": 6.8, "step": 900 } ], "max_steps": 4616, "num_train_epochs": 8, "total_flos": 135969650611200.0, "trial_name": null, "trial_params": null }