{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.99819298879653, "global_step": 41500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression_loss": 0.0, "distillation_loss": 10.801469802856445, "epoch": 0.0, "learning_rate": 6.999996751507258e-05, "loss": 12.3405, "step": 10, "task_loss": 3.04644775390625 }, { "compression_loss": 0.0, "distillation_loss": 5.152049541473389, "epoch": 0.01, "learning_rate": 6.999976899629009e-05, "loss": 7.0823, "step": 20, "task_loss": 2.0041749477386475 }, { "compression_loss": 0.0, "distillation_loss": 3.413750171661377, "epoch": 0.01, "learning_rate": 6.999939000692937e-05, "loss": 3.5252, "step": 30, "task_loss": 2.1580514907836914 }, { "compression_loss": 0.0, "distillation_loss": 2.6377995014190674, "epoch": 0.01, "learning_rate": 6.999883054894466e-05, "loss": 2.9633, "step": 40, "task_loss": 1.704897165298462 }, { "compression_loss": 0.0, "distillation_loss": 2.8139700889587402, "epoch": 0.02, "learning_rate": 6.999809062522068e-05, "loss": 2.4806, "step": 50, "task_loss": 1.2986035346984863 }, { "compression_loss": 0.0, "distillation_loss": 2.0872642993927, "epoch": 0.02, "learning_rate": 6.999717023957274e-05, "loss": 2.3172, "step": 60, "task_loss": 1.510830044746399 }, { "compression_loss": 0.0, "distillation_loss": 2.322617530822754, "epoch": 0.03, "learning_rate": 6.999606939674666e-05, "loss": 2.0705, "step": 70, "task_loss": 1.6173977851867676 }, { "compression_loss": 0.0, "distillation_loss": 1.930269718170166, "epoch": 0.03, "learning_rate": 6.999478810241875e-05, "loss": 1.8671, "step": 80, "task_loss": 1.3637044429779053 }, { "compression_loss": 0.0, "distillation_loss": 2.235046863555908, "epoch": 0.03, "learning_rate": 6.999332636319578e-05, "loss": 1.9769, "step": 90, "task_loss": 1.294555425643921 }, { "compression_loss": 0.0, "distillation_loss": 2.0402920246124268, "epoch": 0.04, "learning_rate": 6.999168418661498e-05, "loss": 1.9396, "step": 100, "task_loss": 1.5622482299804688 }, { "compression_loss": 0.0, "distillation_loss": 1.7098126411437988, "epoch": 0.04, "learning_rate": 6.9989861581144e-05, "loss": 1.8838, "step": 110, "task_loss": 1.0829225778579712 }, { "compression_loss": 0.0, "distillation_loss": 1.186718225479126, "epoch": 0.04, "learning_rate": 6.998785855618076e-05, "loss": 1.7517, "step": 120, "task_loss": 0.6862260699272156 }, { "compression_loss": 0.0, "distillation_loss": 2.135342597961426, "epoch": 0.05, "learning_rate": 6.998567512205353e-05, "loss": 1.8173, "step": 130, "task_loss": 1.4869736433029175 }, { "compression_loss": 0.0, "distillation_loss": 1.74942946434021, "epoch": 0.05, "learning_rate": 6.998331129002086e-05, "loss": 1.962, "step": 140, "task_loss": 1.1613945960998535 }, { "compression_loss": 0.0, "distillation_loss": 1.7254133224487305, "epoch": 0.05, "learning_rate": 6.998076707227144e-05, "loss": 1.7815, "step": 150, "task_loss": 1.527439832687378 }, { "compression_loss": 0.0, "distillation_loss": 2.164252758026123, "epoch": 0.06, "learning_rate": 6.997804248192412e-05, "loss": 1.7949, "step": 160, "task_loss": 1.0921921730041504 }, { "compression_loss": 0.0, "distillation_loss": 2.0819597244262695, "epoch": 0.06, "learning_rate": 6.997513753302782e-05, "loss": 1.765, "step": 170, "task_loss": 1.229843020439148 }, { "compression_loss": 0.0, "distillation_loss": 1.4309144020080566, "epoch": 0.07, "learning_rate": 6.99720522405614e-05, "loss": 1.6999, "step": 180, "task_loss": 1.0473328828811646 }, { "compression_loss": 0.0, "distillation_loss": 1.8363723754882812, "epoch": 0.07, "learning_rate": 6.996878662043369e-05, "loss": 1.6825, "step": 190, "task_loss": 1.4055719375610352 }, { "compression_loss": 0.0, "distillation_loss": 1.8027558326721191, "epoch": 0.07, "learning_rate": 6.996534068948332e-05, "loss": 1.7122, "step": 200, "task_loss": 1.052909255027771 }, { "compression_loss": 0.0, "distillation_loss": 1.6521565914154053, "epoch": 0.08, "learning_rate": 6.996171446547867e-05, "loss": 1.7636, "step": 210, "task_loss": 0.9150123596191406 }, { "compression_loss": 0.0, "distillation_loss": 2.1441216468811035, "epoch": 0.08, "learning_rate": 6.995790796711777e-05, "loss": 1.6195, "step": 220, "task_loss": 1.9888694286346436 }, { "compression_loss": 0.0, "distillation_loss": 1.6174334287643433, "epoch": 0.08, "learning_rate": 6.99539212140282e-05, "loss": 1.5489, "step": 230, "task_loss": 1.0295372009277344 }, { "compression_loss": 0.0, "distillation_loss": 1.3089139461517334, "epoch": 0.09, "learning_rate": 6.994975422676701e-05, "loss": 1.5913, "step": 240, "task_loss": 1.1961848735809326 }, { "compression_loss": 0.0, "distillation_loss": 1.507627010345459, "epoch": 0.09, "learning_rate": 6.994540702682057e-05, "loss": 1.5598, "step": 250, "task_loss": 1.0574748516082764 }, { "epoch": 0.09, "eval_exact_match": 75.62913907284768, "eval_f1": 85.12845450683153, "step": 250 }, { "compression_loss": 0.0, "distillation_loss": 1.5679240226745605, "epoch": 0.09, "learning_rate": 6.994087963660452e-05, "loss": 1.5927, "step": 260, "task_loss": 0.7596392631530762 }, { "compression_loss": 0.0, "distillation_loss": 1.3486156463623047, "epoch": 0.1, "learning_rate": 6.99361720794636e-05, "loss": 1.5363, "step": 270, "task_loss": 0.8923729062080383 }, { "compression_loss": 0.0, "distillation_loss": 1.6968021392822266, "epoch": 0.1, "learning_rate": 6.993128437967155e-05, "loss": 1.5517, "step": 280, "task_loss": 1.2004411220550537 }, { "compression_loss": 0.0, "distillation_loss": 1.994243860244751, "epoch": 0.1, "learning_rate": 6.992621656243103e-05, "loss": 1.4897, "step": 290, "task_loss": 1.4071820974349976 }, { "compression_loss": 0.0, "distillation_loss": 1.4140949249267578, "epoch": 0.11, "learning_rate": 6.992096865387336e-05, "loss": 1.5032, "step": 300, "task_loss": 0.9100825786590576 }, { "compression_loss": 0.0, "distillation_loss": 1.4215054512023926, "epoch": 0.11, "learning_rate": 6.991554068105856e-05, "loss": 1.4617, "step": 310, "task_loss": 0.948255181312561 }, { "compression_loss": 0.0, "distillation_loss": 1.661365270614624, "epoch": 0.12, "learning_rate": 6.990993267197504e-05, "loss": 1.4116, "step": 320, "task_loss": 1.1182901859283447 }, { "compression_loss": 0.0, "distillation_loss": 1.2898075580596924, "epoch": 0.12, "learning_rate": 6.990414465553964e-05, "loss": 1.5381, "step": 330, "task_loss": 0.8843156695365906 }, { "compression_loss": 0.0, "distillation_loss": 1.2767791748046875, "epoch": 0.12, "learning_rate": 6.989817666159727e-05, "loss": 1.4393, "step": 340, "task_loss": 0.9783102869987488 }, { "compression_loss": 0.0, "distillation_loss": 1.4274914264678955, "epoch": 0.13, "learning_rate": 6.989202872092093e-05, "loss": 1.5174, "step": 350, "task_loss": 0.9928803443908691 }, { "compression_loss": 0.0, "distillation_loss": 1.2917709350585938, "epoch": 0.13, "learning_rate": 6.988570086521147e-05, "loss": 1.3964, "step": 360, "task_loss": 1.268720269203186 }, { "compression_loss": 0.0, "distillation_loss": 1.3270267248153687, "epoch": 0.13, "learning_rate": 6.987919312709744e-05, "loss": 1.4736, "step": 370, "task_loss": 1.111161708831787 }, { "compression_loss": 0.0, "distillation_loss": 1.4867007732391357, "epoch": 0.14, "learning_rate": 6.987250554013494e-05, "loss": 1.5575, "step": 380, "task_loss": 1.3485462665557861 }, { "compression_loss": 0.0, "distillation_loss": 1.7727158069610596, "epoch": 0.14, "learning_rate": 6.98656381388074e-05, "loss": 1.399, "step": 390, "task_loss": 1.4341893196105957 }, { "compression_loss": 0.0, "distillation_loss": 1.7645524740219116, "epoch": 0.14, "learning_rate": 6.985859095852546e-05, "loss": 1.4392, "step": 400, "task_loss": 0.8381137847900391 }, { "compression_loss": 0.0, "distillation_loss": 1.3841367959976196, "epoch": 0.15, "learning_rate": 6.985136403562674e-05, "loss": 1.4883, "step": 410, "task_loss": 0.6091371774673462 }, { "compression_loss": 0.0, "distillation_loss": 1.4980037212371826, "epoch": 0.15, "learning_rate": 6.984395740737568e-05, "loss": 1.4821, "step": 420, "task_loss": 0.7701343297958374 }, { "compression_loss": 0.0, "distillation_loss": 1.4909740686416626, "epoch": 0.16, "learning_rate": 6.983637111196338e-05, "loss": 1.4347, "step": 430, "task_loss": 1.2313339710235596 }, { "compression_loss": 0.0, "distillation_loss": 1.2218682765960693, "epoch": 0.16, "learning_rate": 6.982860518850731e-05, "loss": 1.355, "step": 440, "task_loss": 0.7704342603683472 }, { "compression_loss": 0.0, "distillation_loss": 0.997610330581665, "epoch": 0.16, "learning_rate": 6.982065967705115e-05, "loss": 1.3387, "step": 450, "task_loss": 0.5290695428848267 }, { "compression_loss": 0.0, "distillation_loss": 1.199681282043457, "epoch": 0.17, "learning_rate": 6.981253461856468e-05, "loss": 1.5446, "step": 460, "task_loss": 1.1124558448791504 }, { "compression_loss": 0.0, "distillation_loss": 1.501362681388855, "epoch": 0.17, "learning_rate": 6.980423005494342e-05, "loss": 1.3709, "step": 470, "task_loss": 0.956885814666748 }, { "compression_loss": 0.0, "distillation_loss": 1.6759693622589111, "epoch": 0.17, "learning_rate": 6.979574602900849e-05, "loss": 1.409, "step": 480, "task_loss": 1.1642181873321533 }, { "compression_loss": 0.0, "distillation_loss": 1.4492310285568237, "epoch": 0.18, "learning_rate": 6.978708258450635e-05, "loss": 1.4329, "step": 490, "task_loss": 1.134920358657837 }, { "compression_loss": 0.0, "distillation_loss": 1.5545307397842407, "epoch": 0.18, "learning_rate": 6.977823976610867e-05, "loss": 1.4856, "step": 500, "task_loss": 1.1181845664978027 }, { "epoch": 0.18, "eval_exact_match": 77.1050141911069, "eval_f1": 86.09609925606219, "step": 500 }, { "compression_loss": 0.0, "distillation_loss": 1.4672131538391113, "epoch": 0.18, "learning_rate": 6.9769217619412e-05, "loss": 1.3902, "step": 510, "task_loss": 0.6892313957214355 }, { "compression_loss": 0.0, "distillation_loss": 1.4067072868347168, "epoch": 0.19, "learning_rate": 6.976001619093752e-05, "loss": 1.3474, "step": 520, "task_loss": 0.6431139707565308 }, { "compression_loss": 0.0, "distillation_loss": 1.3734104633331299, "epoch": 0.19, "learning_rate": 6.975063552813094e-05, "loss": 1.3806, "step": 530, "task_loss": 0.8170199990272522 }, { "compression_loss": 0.0, "distillation_loss": 1.1358160972595215, "epoch": 0.2, "learning_rate": 6.974107567936206e-05, "loss": 1.3843, "step": 540, "task_loss": 0.5692929029464722 }, { "compression_loss": 0.0, "distillation_loss": 1.7749069929122925, "epoch": 0.2, "learning_rate": 6.973133669392473e-05, "loss": 1.4445, "step": 550, "task_loss": 1.323117971420288 }, { "compression_loss": 0.0, "distillation_loss": 1.689734697341919, "epoch": 0.2, "learning_rate": 6.97214186220364e-05, "loss": 1.3183, "step": 560, "task_loss": 1.1061980724334717 }, { "compression_loss": 0.0, "distillation_loss": 1.3462835550308228, "epoch": 0.21, "learning_rate": 6.971132151483798e-05, "loss": 1.2587, "step": 570, "task_loss": 0.793758749961853 }, { "compression_loss": 0.0, "distillation_loss": 1.4587080478668213, "epoch": 0.21, "learning_rate": 6.970104542439358e-05, "loss": 1.4055, "step": 580, "task_loss": 0.8984804153442383 }, { "compression_loss": 0.0, "distillation_loss": 1.171175241470337, "epoch": 0.21, "learning_rate": 6.969059040369014e-05, "loss": 1.2979, "step": 590, "task_loss": 0.8862461447715759 }, { "compression_loss": 0.0, "distillation_loss": 1.400400161743164, "epoch": 0.22, "learning_rate": 6.967995650663729e-05, "loss": 1.2604, "step": 600, "task_loss": 1.041499137878418 }, { "compression_loss": 0.0, "distillation_loss": 1.278060793876648, "epoch": 0.22, "learning_rate": 6.966914378806696e-05, "loss": 1.3942, "step": 610, "task_loss": 1.0624408721923828 }, { "compression_loss": 0.0, "distillation_loss": 0.905518651008606, "epoch": 0.22, "learning_rate": 6.965815230373319e-05, "loss": 1.3035, "step": 620, "task_loss": 0.7998524904251099 }, { "compression_loss": 0.0, "distillation_loss": 1.2518730163574219, "epoch": 0.23, "learning_rate": 6.964698211031174e-05, "loss": 1.3988, "step": 630, "task_loss": 1.4076586961746216 }, { "compression_loss": 0.0, "distillation_loss": 1.0938901901245117, "epoch": 0.23, "learning_rate": 6.963563326539988e-05, "loss": 1.3236, "step": 640, "task_loss": 0.5017621517181396 }, { "compression_loss": 0.0, "distillation_loss": 1.3720566034317017, "epoch": 0.23, "learning_rate": 6.962410582751611e-05, "loss": 1.3655, "step": 650, "task_loss": 1.3073033094406128 }, { "compression_loss": 0.0, "distillation_loss": 1.45675528049469, "epoch": 0.24, "learning_rate": 6.961239985609975e-05, "loss": 1.3654, "step": 660, "task_loss": 1.0983452796936035 }, { "compression_loss": 0.0, "distillation_loss": 1.594604730606079, "epoch": 0.24, "learning_rate": 6.960051541151072e-05, "loss": 1.2799, "step": 670, "task_loss": 1.1729819774627686 }, { "compression_loss": 0.0, "distillation_loss": 1.3936469554901123, "epoch": 0.25, "learning_rate": 6.958845255502922e-05, "loss": 1.2039, "step": 680, "task_loss": 0.9447198510169983 }, { "compression_loss": 0.0, "distillation_loss": 1.3353527784347534, "epoch": 0.25, "learning_rate": 6.95762113488554e-05, "loss": 1.2488, "step": 690, "task_loss": 1.1083095073699951 }, { "compression_loss": 0.0, "distillation_loss": 1.8204264640808105, "epoch": 0.25, "learning_rate": 6.956379185610903e-05, "loss": 1.3971, "step": 700, "task_loss": 1.080670714378357 }, { "compression_loss": 0.0, "distillation_loss": 1.3583054542541504, "epoch": 0.26, "learning_rate": 6.955119414082917e-05, "loss": 1.2583, "step": 710, "task_loss": 0.8659499883651733 }, { "compression_loss": 0.0, "distillation_loss": 1.4246025085449219, "epoch": 0.26, "learning_rate": 6.953841826797391e-05, "loss": 1.3295, "step": 720, "task_loss": 1.0010908842086792 }, { "compression_loss": 0.0, "distillation_loss": 1.0018075704574585, "epoch": 0.26, "learning_rate": 6.952546430341994e-05, "loss": 1.3176, "step": 730, "task_loss": 0.6832550764083862 }, { "compression_loss": 0.0, "distillation_loss": 1.1276683807373047, "epoch": 0.27, "learning_rate": 6.951233231396224e-05, "loss": 1.2458, "step": 740, "task_loss": 0.6161213517189026 }, { "compression_loss": 0.0, "distillation_loss": 1.3984005451202393, "epoch": 0.27, "learning_rate": 6.949902236731379e-05, "loss": 1.3199, "step": 750, "task_loss": 1.2446082830429077 }, { "epoch": 0.27, "eval_exact_match": 77.99432355723746, "eval_f1": 86.68746599929251, "step": 750 }, { "compression_loss": 0.0, "distillation_loss": 1.1631975173950195, "epoch": 0.27, "learning_rate": 6.948553453210512e-05, "loss": 1.2846, "step": 760, "task_loss": 0.5416216850280762 }, { "compression_loss": 0.0, "distillation_loss": 1.2278193235397339, "epoch": 0.28, "learning_rate": 6.947186887788406e-05, "loss": 1.1666, "step": 770, "task_loss": 0.6427184343338013 }, { "compression_loss": 0.0, "distillation_loss": 0.8423769474029541, "epoch": 0.28, "learning_rate": 6.945802547511531e-05, "loss": 1.1551, "step": 780, "task_loss": 0.5025490522384644 }, { "compression_loss": 0.0, "distillation_loss": 1.0702672004699707, "epoch": 0.29, "learning_rate": 6.944400439518014e-05, "loss": 1.3203, "step": 790, "task_loss": 0.5747840404510498 }, { "compression_loss": 0.0, "distillation_loss": 1.1548950672149658, "epoch": 0.29, "learning_rate": 6.94298057103759e-05, "loss": 1.325, "step": 800, "task_loss": 0.7011659741401672 }, { "compression_loss": 0.0, "distillation_loss": 1.390937089920044, "epoch": 0.29, "learning_rate": 6.941542949391584e-05, "loss": 1.3387, "step": 810, "task_loss": 0.6473915576934814 }, { "compression_loss": 0.0, "distillation_loss": 0.9621462821960449, "epoch": 0.3, "learning_rate": 6.940087581992852e-05, "loss": 1.3016, "step": 820, "task_loss": 0.5471967458724976 }, { "compression_loss": 0.0, "distillation_loss": 1.5039995908737183, "epoch": 0.3, "learning_rate": 6.93861447634576e-05, "loss": 1.3924, "step": 830, "task_loss": 0.7582854628562927 }, { "compression_loss": 0.0, "distillation_loss": 1.2158305644989014, "epoch": 0.3, "learning_rate": 6.937123640046135e-05, "loss": 1.339, "step": 840, "task_loss": 0.984902560710907 }, { "compression_loss": 0.0, "distillation_loss": 1.307792067527771, "epoch": 0.31, "learning_rate": 6.935615080781233e-05, "loss": 1.2595, "step": 850, "task_loss": 0.8387401103973389 }, { "compression_loss": 0.0, "distillation_loss": 1.1716396808624268, "epoch": 0.31, "learning_rate": 6.934088806329689e-05, "loss": 1.1884, "step": 860, "task_loss": 0.7160657644271851 }, { "compression_loss": 0.0, "distillation_loss": 1.267090082168579, "epoch": 0.31, "learning_rate": 6.932544824561489e-05, "loss": 1.2543, "step": 870, "task_loss": 0.8390096426010132 }, { "compression_loss": 0.0, "distillation_loss": 1.0325896739959717, "epoch": 0.32, "learning_rate": 6.930983143437924e-05, "loss": 1.245, "step": 880, "task_loss": 0.729494035243988 }, { "compression_loss": 0.0, "distillation_loss": 1.7409932613372803, "epoch": 0.32, "learning_rate": 6.929403771011544e-05, "loss": 1.2879, "step": 890, "task_loss": 1.1845016479492188 }, { "compression_loss": 0.0, "distillation_loss": 0.8987354636192322, "epoch": 0.33, "learning_rate": 6.927806715426126e-05, "loss": 1.1892, "step": 900, "task_loss": 0.8024774193763733 }, { "compression_loss": 0.0, "distillation_loss": 1.344858169555664, "epoch": 0.33, "learning_rate": 6.926191984916625e-05, "loss": 1.2256, "step": 910, "task_loss": 0.9374756217002869 }, { "compression_loss": 0.0, "distillation_loss": 0.851283609867096, "epoch": 0.33, "learning_rate": 6.924559587809135e-05, "loss": 1.1071, "step": 920, "task_loss": 0.7503871917724609 }, { "compression_loss": 0.0, "distillation_loss": 1.0525801181793213, "epoch": 0.34, "learning_rate": 6.922909532520844e-05, "loss": 1.323, "step": 930, "task_loss": 0.9204051494598389 }, { "compression_loss": 0.0, "distillation_loss": 1.303235411643982, "epoch": 0.34, "learning_rate": 6.921241827559993e-05, "loss": 1.307, "step": 940, "task_loss": 0.7826588153839111 }, { "compression_loss": 0.0, "distillation_loss": 1.189795970916748, "epoch": 0.34, "learning_rate": 6.919556481525826e-05, "loss": 1.44, "step": 950, "task_loss": 0.9468139410018921 }, { "compression_loss": 0.0, "distillation_loss": 1.2204010486602783, "epoch": 0.35, "learning_rate": 6.917853503108559e-05, "loss": 1.25, "step": 960, "task_loss": 0.7199236154556274 }, { "compression_loss": 0.0, "distillation_loss": 1.0518145561218262, "epoch": 0.35, "learning_rate": 6.91613290108932e-05, "loss": 1.296, "step": 970, "task_loss": 0.6551309823989868 }, { "compression_loss": 0.0, "distillation_loss": 1.3444669246673584, "epoch": 0.35, "learning_rate": 6.914394684340107e-05, "loss": 1.0248, "step": 980, "task_loss": 0.5821710824966431 }, { "compression_loss": 0.0, "distillation_loss": 1.2943898439407349, "epoch": 0.36, "learning_rate": 6.912638861823755e-05, "loss": 1.0796, "step": 990, "task_loss": 0.9846506714820862 }, { "compression_loss": 0.0, "distillation_loss": 1.1659986972808838, "epoch": 0.36, "learning_rate": 6.910865442593876e-05, "loss": 1.1452, "step": 1000, "task_loss": 0.8229370713233948 }, { "epoch": 0.36, "eval_exact_match": 78.06054872280038, "eval_f1": 86.791557332193, "step": 1000 }, { "compression_loss": 0.0, "distillation_loss": 1.6198532581329346, "epoch": 0.37, "learning_rate": 6.909074435794811e-05, "loss": 1.4119, "step": 1010, "task_loss": 1.3580809831619263 }, { "compression_loss": 0.0, "distillation_loss": 1.59236741065979, "epoch": 0.37, "learning_rate": 6.907265850661598e-05, "loss": 1.2508, "step": 1020, "task_loss": 1.45587158203125 }, { "compression_loss": 0.0, "distillation_loss": 1.1220039129257202, "epoch": 0.37, "learning_rate": 6.905439696519907e-05, "loss": 1.1633, "step": 1030, "task_loss": 0.9460667967796326 }, { "compression_loss": 0.0, "distillation_loss": 1.548439860343933, "epoch": 0.38, "learning_rate": 6.903595982786005e-05, "loss": 1.3139, "step": 1040, "task_loss": 1.329020380973816 }, { "compression_loss": 0.0, "distillation_loss": 0.7319619059562683, "epoch": 0.38, "learning_rate": 6.901734718966698e-05, "loss": 1.1286, "step": 1050, "task_loss": 0.6876011490821838 }, { "compression_loss": 0.0, "distillation_loss": 1.02553391456604, "epoch": 0.38, "learning_rate": 6.89985591465929e-05, "loss": 1.1894, "step": 1060, "task_loss": 1.0004324913024902 }, { "compression_loss": 0.0, "distillation_loss": 1.4067671298980713, "epoch": 0.39, "learning_rate": 6.897959579551526e-05, "loss": 1.2614, "step": 1070, "task_loss": 0.950558066368103 }, { "compression_loss": 0.0, "distillation_loss": 0.9523043632507324, "epoch": 0.39, "learning_rate": 6.89604572342155e-05, "loss": 1.1757, "step": 1080, "task_loss": 0.894980251789093 }, { "compression_loss": 0.0, "distillation_loss": 1.028804063796997, "epoch": 0.39, "learning_rate": 6.894114356137845e-05, "loss": 1.2074, "step": 1090, "task_loss": 0.6418243050575256 }, { "compression_loss": 0.0, "distillation_loss": 0.8996524810791016, "epoch": 0.4, "learning_rate": 6.89216548765919e-05, "loss": 1.0727, "step": 1100, "task_loss": 0.49160271883010864 }, { "compression_loss": 0.0, "distillation_loss": 1.2093851566314697, "epoch": 0.4, "learning_rate": 6.890199128034611e-05, "loss": 1.1026, "step": 1110, "task_loss": 0.8884088397026062 }, { "compression_loss": 0.0, "distillation_loss": 1.4207491874694824, "epoch": 0.4, "learning_rate": 6.888215287403314e-05, "loss": 1.2668, "step": 1120, "task_loss": 1.6956202983856201 }, { "compression_loss": 0.0, "distillation_loss": 0.8989244699478149, "epoch": 0.41, "learning_rate": 6.88621397599465e-05, "loss": 1.1698, "step": 1130, "task_loss": 0.8216118216514587 }, { "compression_loss": 0.0, "distillation_loss": 1.310133934020996, "epoch": 0.41, "learning_rate": 6.884195204128056e-05, "loss": 1.2178, "step": 1140, "task_loss": 0.9177054166793823 }, { "compression_loss": 0.0, "distillation_loss": 0.8592339158058167, "epoch": 0.42, "learning_rate": 6.882158982212994e-05, "loss": 1.1676, "step": 1150, "task_loss": 0.7672922611236572 }, { "compression_loss": 0.0, "distillation_loss": 1.31768798828125, "epoch": 0.42, "learning_rate": 6.880105320748914e-05, "loss": 1.2468, "step": 1160, "task_loss": 1.0583456754684448 }, { "compression_loss": 0.0, "distillation_loss": 1.2563865184783936, "epoch": 0.42, "learning_rate": 6.878034230325181e-05, "loss": 1.2145, "step": 1170, "task_loss": 0.8758540749549866 }, { "compression_loss": 0.0, "distillation_loss": 1.582754373550415, "epoch": 0.43, "learning_rate": 6.875945721621036e-05, "loss": 1.2769, "step": 1180, "task_loss": 1.3132786750793457 }, { "compression_loss": 0.0, "distillation_loss": 1.1652511358261108, "epoch": 0.43, "learning_rate": 6.873839805405531e-05, "loss": 1.1476, "step": 1190, "task_loss": 0.6254457831382751 }, { "compression_loss": 0.0, "distillation_loss": 1.1324548721313477, "epoch": 0.43, "learning_rate": 6.871716492537479e-05, "loss": 1.1278, "step": 1200, "task_loss": 0.786486029624939 }, { "compression_loss": 0.0, "distillation_loss": 1.2423601150512695, "epoch": 0.44, "learning_rate": 6.869575793965394e-05, "loss": 1.2086, "step": 1210, "task_loss": 0.7296125292778015 }, { "compression_loss": 0.0, "distillation_loss": 1.30234956741333, "epoch": 0.44, "learning_rate": 6.86741772072744e-05, "loss": 1.136, "step": 1220, "task_loss": 0.875715970993042 }, { "compression_loss": 0.0, "distillation_loss": 1.0005595684051514, "epoch": 0.44, "learning_rate": 6.865242283951365e-05, "loss": 1.2373, "step": 1230, "task_loss": 1.0233049392700195 }, { "compression_loss": 0.0, "distillation_loss": 1.4597254991531372, "epoch": 0.45, "learning_rate": 6.863049494854456e-05, "loss": 1.2707, "step": 1240, "task_loss": 0.9770482182502747 }, { "compression_loss": 0.0, "distillation_loss": 0.8547976016998291, "epoch": 0.45, "learning_rate": 6.86083936474347e-05, "loss": 1.0618, "step": 1250, "task_loss": 0.6300672888755798 }, { "epoch": 0.45, "eval_exact_match": 78.79848628193, "eval_f1": 87.37934185870961, "step": 1250 }, { "compression_loss": 0.0, "distillation_loss": 1.3759208917617798, "epoch": 0.46, "learning_rate": 6.858611905014577e-05, "loss": 1.2313, "step": 1260, "task_loss": 1.2745630741119385 }, { "compression_loss": 0.0, "distillation_loss": 0.9745676517486572, "epoch": 0.46, "learning_rate": 6.856367127153315e-05, "loss": 1.2901, "step": 1270, "task_loss": 0.6641950607299805 }, { "compression_loss": 0.0, "distillation_loss": 1.1643112897872925, "epoch": 0.46, "learning_rate": 6.854105042734507e-05, "loss": 1.1501, "step": 1280, "task_loss": 1.071144938468933 }, { "compression_loss": 0.0, "distillation_loss": 1.0921354293823242, "epoch": 0.47, "learning_rate": 6.851825663422227e-05, "loss": 1.1568, "step": 1290, "task_loss": 0.5675968527793884 }, { "compression_loss": 0.0, "distillation_loss": 1.0819282531738281, "epoch": 0.47, "learning_rate": 6.849529000969717e-05, "loss": 0.9992, "step": 1300, "task_loss": 1.3584985733032227 }, { "compression_loss": 0.0, "distillation_loss": 0.9455113410949707, "epoch": 0.47, "learning_rate": 6.847215067219341e-05, "loss": 1.0654, "step": 1310, "task_loss": 0.7708945870399475 }, { "compression_loss": 0.0, "distillation_loss": 1.0378615856170654, "epoch": 0.48, "learning_rate": 6.844883874102523e-05, "loss": 1.2306, "step": 1320, "task_loss": 0.8547934293746948 }, { "compression_loss": 0.0, "distillation_loss": 0.9078717231750488, "epoch": 0.48, "learning_rate": 6.842535433639674e-05, "loss": 1.094, "step": 1330, "task_loss": 0.7515894174575806 }, { "compression_loss": 0.0, "distillation_loss": 1.2001479864120483, "epoch": 0.48, "learning_rate": 6.840169757940148e-05, "loss": 1.1639, "step": 1340, "task_loss": 0.9455994367599487 }, { "compression_loss": 0.0, "distillation_loss": 1.205193042755127, "epoch": 0.49, "learning_rate": 6.837786859202159e-05, "loss": 1.265, "step": 1350, "task_loss": 0.8357219696044922 }, { "compression_loss": 0.0, "distillation_loss": 1.1558539867401123, "epoch": 0.49, "learning_rate": 6.835386749712738e-05, "loss": 1.1953, "step": 1360, "task_loss": 0.9024025797843933 }, { "compression_loss": 0.0, "distillation_loss": 0.8633468151092529, "epoch": 0.5, "learning_rate": 6.832969441847655e-05, "loss": 1.1829, "step": 1370, "task_loss": 1.0007044076919556 }, { "compression_loss": 0.0, "distillation_loss": 1.1772587299346924, "epoch": 0.5, "learning_rate": 6.830534948071362e-05, "loss": 1.1232, "step": 1380, "task_loss": 0.6399517059326172 }, { "compression_loss": 0.0, "distillation_loss": 1.416799783706665, "epoch": 0.5, "learning_rate": 6.828083280936928e-05, "loss": 1.1562, "step": 1390, "task_loss": 0.8481613397598267 }, { "compression_loss": 0.0, "distillation_loss": 1.0655555725097656, "epoch": 0.51, "learning_rate": 6.825614453085974e-05, "loss": 1.2153, "step": 1400, "task_loss": 0.9690505266189575 }, { "compression_loss": 0.0, "distillation_loss": 0.7563189268112183, "epoch": 0.51, "learning_rate": 6.823128477248606e-05, "loss": 1.0047, "step": 1410, "task_loss": 0.8691654205322266 }, { "compression_loss": 0.0, "distillation_loss": 1.11103355884552, "epoch": 0.51, "learning_rate": 6.82062536624335e-05, "loss": 1.2397, "step": 1420, "task_loss": 0.7043032050132751 }, { "compression_loss": 0.0, "distillation_loss": 1.2859797477722168, "epoch": 0.52, "learning_rate": 6.81810513297709e-05, "loss": 1.1573, "step": 1430, "task_loss": 1.0251799821853638 }, { "compression_loss": 0.0, "distillation_loss": 1.4492874145507812, "epoch": 0.52, "learning_rate": 6.815567790444994e-05, "loss": 1.1774, "step": 1440, "task_loss": 0.8829349875450134 }, { "compression_loss": 0.0, "distillation_loss": 1.20381760597229, "epoch": 0.52, "learning_rate": 6.813013351730457e-05, "loss": 1.0484, "step": 1450, "task_loss": 1.0177350044250488 }, { "compression_loss": 0.0, "distillation_loss": 1.2550363540649414, "epoch": 0.53, "learning_rate": 6.810441830005021e-05, "loss": 1.0561, "step": 1460, "task_loss": 0.7583893537521362 }, { "compression_loss": 0.0, "distillation_loss": 1.1922001838684082, "epoch": 0.53, "learning_rate": 6.807853238528316e-05, "loss": 1.0369, "step": 1470, "task_loss": 0.8423135876655579 }, { "compression_loss": 0.0, "distillation_loss": 1.057039737701416, "epoch": 0.53, "learning_rate": 6.805247590647992e-05, "loss": 1.06, "step": 1480, "task_loss": 0.8067488670349121 }, { "compression_loss": 0.0, "distillation_loss": 1.1017229557037354, "epoch": 0.54, "learning_rate": 6.802624899799646e-05, "loss": 1.2062, "step": 1490, "task_loss": 0.9069002866744995 }, { "compression_loss": 0.0, "distillation_loss": 0.9764423370361328, "epoch": 0.54, "learning_rate": 6.799985179506753e-05, "loss": 1.2209, "step": 1500, "task_loss": 1.0442379713058472 }, { "epoch": 0.54, "eval_exact_match": 79.47965941343425, "eval_f1": 87.69873817803351, "step": 1500 }, { "compression_loss": 0.0, "distillation_loss": 0.9164371490478516, "epoch": 0.55, "learning_rate": 6.797328443380597e-05, "loss": 1.2163, "step": 1510, "task_loss": 1.1710762977600098 }, { "compression_loss": 0.0, "distillation_loss": 1.0816839933395386, "epoch": 0.55, "learning_rate": 6.794654705120207e-05, "loss": 1.0914, "step": 1520, "task_loss": 1.251457691192627 }, { "compression_loss": 0.0, "distillation_loss": 1.1803267002105713, "epoch": 0.55, "learning_rate": 6.791963978512273e-05, "loss": 1.0023, "step": 1530, "task_loss": 0.8527973890304565 }, { "compression_loss": 0.0, "distillation_loss": 1.8129152059555054, "epoch": 0.56, "learning_rate": 6.789256277431087e-05, "loss": 1.1583, "step": 1540, "task_loss": 1.6529321670532227 }, { "compression_loss": 0.0, "distillation_loss": 1.4472854137420654, "epoch": 0.56, "learning_rate": 6.786531615838467e-05, "loss": 1.2427, "step": 1550, "task_loss": 1.1761155128479004 }, { "compression_loss": 0.0, "distillation_loss": 1.314471960067749, "epoch": 0.56, "learning_rate": 6.783790007783683e-05, "loss": 1.2151, "step": 1560, "task_loss": 0.894078254699707 }, { "compression_loss": 0.0, "distillation_loss": 0.9429694414138794, "epoch": 0.57, "learning_rate": 6.78103146740339e-05, "loss": 1.077, "step": 1570, "task_loss": 0.5526290535926819 }, { "compression_loss": 0.0, "distillation_loss": 1.1317658424377441, "epoch": 0.57, "learning_rate": 6.778256008921548e-05, "loss": 1.0742, "step": 1580, "task_loss": 0.6145908236503601 }, { "compression_loss": 0.0, "distillation_loss": 1.015599250793457, "epoch": 0.57, "learning_rate": 6.775463646649355e-05, "loss": 1.0751, "step": 1590, "task_loss": 1.2883775234222412 }, { "compression_loss": 0.0, "distillation_loss": 1.221526861190796, "epoch": 0.58, "learning_rate": 6.77265439498517e-05, "loss": 1.0519, "step": 1600, "task_loss": 1.0645751953125 }, { "compression_loss": 0.0, "distillation_loss": 1.1089495420455933, "epoch": 0.58, "learning_rate": 6.769828268414439e-05, "loss": 1.2171, "step": 1610, "task_loss": 0.4754822254180908 }, { "compression_loss": 0.0, "distillation_loss": 0.7438245415687561, "epoch": 0.59, "learning_rate": 6.76698528150962e-05, "loss": 1.0148, "step": 1620, "task_loss": 0.8168939352035522 }, { "compression_loss": 0.0, "distillation_loss": 0.9932070374488831, "epoch": 0.59, "learning_rate": 6.764125448930112e-05, "loss": 1.094, "step": 1630, "task_loss": 0.9427931308746338 }, { "compression_loss": 0.0, "distillation_loss": 1.2198781967163086, "epoch": 0.59, "learning_rate": 6.761248785422172e-05, "loss": 1.1677, "step": 1640, "task_loss": 0.8359280824661255 }, { "compression_loss": 0.0, "distillation_loss": 1.0543707609176636, "epoch": 0.6, "learning_rate": 6.758355305818843e-05, "loss": 1.121, "step": 1650, "task_loss": 0.6798961162567139 }, { "compression_loss": 0.0, "distillation_loss": 0.9245864152908325, "epoch": 0.6, "learning_rate": 6.755445025039881e-05, "loss": 0.9735, "step": 1660, "task_loss": 0.8748812079429626 }, { "compression_loss": 0.0, "distillation_loss": 0.9669818878173828, "epoch": 0.6, "learning_rate": 6.752517958091671e-05, "loss": 1.071, "step": 1670, "task_loss": 0.763600766658783 }, { "compression_loss": 0.0, "distillation_loss": 1.2453193664550781, "epoch": 0.61, "learning_rate": 6.749574120067155e-05, "loss": 1.06, "step": 1680, "task_loss": 1.083929419517517 }, { "compression_loss": 0.0, "distillation_loss": 0.5505318641662598, "epoch": 0.61, "learning_rate": 6.746613526145752e-05, "loss": 1.1745, "step": 1690, "task_loss": 0.41013821959495544 }, { "compression_loss": 0.0, "distillation_loss": 1.723199486732483, "epoch": 0.61, "learning_rate": 6.743636191593279e-05, "loss": 1.1728, "step": 1700, "task_loss": 1.5259857177734375 }, { "compression_loss": 0.0, "distillation_loss": 0.8400850296020508, "epoch": 0.62, "learning_rate": 6.740642131761876e-05, "loss": 1.1087, "step": 1710, "task_loss": 0.8797809481620789 }, { "compression_loss": 0.0, "distillation_loss": 1.503807783126831, "epoch": 0.62, "learning_rate": 6.737631362089919e-05, "loss": 1.1093, "step": 1720, "task_loss": 1.2856862545013428 }, { "compression_loss": 0.0, "distillation_loss": 1.252486228942871, "epoch": 0.63, "learning_rate": 6.734603898101952e-05, "loss": 1.1645, "step": 1730, "task_loss": 1.240208625793457 }, { "compression_loss": 0.0, "distillation_loss": 0.9471865892410278, "epoch": 0.63, "learning_rate": 6.731559755408595e-05, "loss": 1.0735, "step": 1740, "task_loss": 0.6639693975448608 }, { "compression_loss": 0.0, "distillation_loss": 1.4272645711898804, "epoch": 0.63, "learning_rate": 6.728498949706473e-05, "loss": 1.0731, "step": 1750, "task_loss": 1.374558448791504 }, { "epoch": 0.63, "eval_exact_match": 79.47019867549669, "eval_f1": 87.62942304951152, "step": 1750 }, { "compression_loss": 0.0, "distillation_loss": 1.1430143117904663, "epoch": 0.64, "learning_rate": 6.72542149677813e-05, "loss": 1.1903, "step": 1760, "task_loss": 1.1442666053771973 }, { "compression_loss": 0.0, "distillation_loss": 0.9708960652351379, "epoch": 0.64, "learning_rate": 6.722327412491946e-05, "loss": 1.0846, "step": 1770, "task_loss": 0.7830320596694946 }, { "compression_loss": 0.0, "distillation_loss": 0.9312682151794434, "epoch": 0.64, "learning_rate": 6.71921671280206e-05, "loss": 1.1422, "step": 1780, "task_loss": 0.8345022201538086 }, { "compression_loss": 0.0, "distillation_loss": 1.4343688488006592, "epoch": 0.65, "learning_rate": 6.716089413748289e-05, "loss": 1.031, "step": 1790, "task_loss": 1.1940171718597412 }, { "compression_loss": 0.0, "distillation_loss": 1.1692103147506714, "epoch": 0.65, "learning_rate": 6.712945531456035e-05, "loss": 1.2076, "step": 1800, "task_loss": 0.9186697006225586 }, { "compression_loss": 0.0, "distillation_loss": 1.1725080013275146, "epoch": 0.65, "learning_rate": 6.709785082136213e-05, "loss": 1.0877, "step": 1810, "task_loss": 1.2045694589614868 }, { "compression_loss": 0.0, "distillation_loss": 0.9919499158859253, "epoch": 0.66, "learning_rate": 6.706608082085164e-05, "loss": 1.0455, "step": 1820, "task_loss": 0.9436089396476746 }, { "compression_loss": 0.0, "distillation_loss": 1.1209893226623535, "epoch": 0.66, "learning_rate": 6.703414547684568e-05, "loss": 1.0689, "step": 1830, "task_loss": 0.7971674203872681 }, { "compression_loss": 0.0, "distillation_loss": 0.8886414766311646, "epoch": 0.66, "learning_rate": 6.700204495401361e-05, "loss": 1.093, "step": 1840, "task_loss": 1.0051020383834839 }, { "compression_loss": 0.0, "distillation_loss": 1.602144718170166, "epoch": 0.67, "learning_rate": 6.696977941787651e-05, "loss": 1.1886, "step": 1850, "task_loss": 0.9657487273216248 }, { "compression_loss": 0.0, "distillation_loss": 1.0558702945709229, "epoch": 0.67, "learning_rate": 6.693734903480639e-05, "loss": 1.0341, "step": 1860, "task_loss": 0.9395914673805237 }, { "compression_loss": 0.0, "distillation_loss": 0.9531676173210144, "epoch": 0.68, "learning_rate": 6.690475397202515e-05, "loss": 0.9256, "step": 1870, "task_loss": 0.9502599239349365 }, { "compression_loss": 0.0, "distillation_loss": 1.1095507144927979, "epoch": 0.68, "learning_rate": 6.687199439760392e-05, "loss": 1.0397, "step": 1880, "task_loss": 0.970070481300354 }, { "compression_loss": 0.0, "distillation_loss": 1.0300889015197754, "epoch": 0.68, "learning_rate": 6.683907048046209e-05, "loss": 1.0262, "step": 1890, "task_loss": 0.8158546686172485 }, { "compression_loss": 0.0, "distillation_loss": 0.9780499935150146, "epoch": 0.69, "learning_rate": 6.680598239036641e-05, "loss": 1.1005, "step": 1900, "task_loss": 0.8497573137283325 }, { "compression_loss": 0.0, "distillation_loss": 1.0767104625701904, "epoch": 0.69, "learning_rate": 6.677273029793025e-05, "loss": 1.001, "step": 1910, "task_loss": 0.9059280157089233 }, { "compression_loss": 0.0, "distillation_loss": 0.9213865995407104, "epoch": 0.69, "learning_rate": 6.673931437461255e-05, "loss": 1.098, "step": 1920, "task_loss": 0.615307629108429 }, { "compression_loss": 0.0, "distillation_loss": 1.2905583381652832, "epoch": 0.7, "learning_rate": 6.670573479271705e-05, "loss": 1.1162, "step": 1930, "task_loss": 0.8289541006088257 }, { "compression_loss": 0.0, "distillation_loss": 1.1863908767700195, "epoch": 0.7, "learning_rate": 6.667199172539137e-05, "loss": 1.0084, "step": 1940, "task_loss": 1.479468822479248 }, { "compression_loss": 0.0, "distillation_loss": 1.2633111476898193, "epoch": 0.7, "learning_rate": 6.663808534662611e-05, "loss": 1.1662, "step": 1950, "task_loss": 0.7809505462646484 }, { "compression_loss": 0.0, "distillation_loss": 0.9272475242614746, "epoch": 0.71, "learning_rate": 6.660401583125397e-05, "loss": 1.0658, "step": 1960, "task_loss": 1.3027081489562988 }, { "compression_loss": 0.0, "distillation_loss": 1.0278266668319702, "epoch": 0.71, "learning_rate": 6.65697833549488e-05, "loss": 1.0035, "step": 1970, "task_loss": 0.5536607503890991 }, { "compression_loss": 0.0, "distillation_loss": 1.1361572742462158, "epoch": 0.72, "learning_rate": 6.653538809422479e-05, "loss": 1.1669, "step": 1980, "task_loss": 1.4308404922485352 }, { "compression_loss": 0.0, "distillation_loss": 0.8886505365371704, "epoch": 0.72, "learning_rate": 6.650083022643546e-05, "loss": 1.0813, "step": 1990, "task_loss": 1.0401955842971802 }, { "compression_loss": 0.0, "distillation_loss": 0.9133187532424927, "epoch": 0.72, "learning_rate": 6.646610992977279e-05, "loss": 1.0828, "step": 2000, "task_loss": 0.5507737994194031 }, { "epoch": 0.72, "eval_exact_match": 79.2620624408704, "eval_f1": 87.39585165916291, "step": 2000 }, { "compression_loss": 0.0, "distillation_loss": 0.7194988131523132, "epoch": 0.73, "learning_rate": 6.643122738326632e-05, "loss": 1.0682, "step": 2010, "task_loss": 0.5585941076278687 }, { "compression_loss": 0.0, "distillation_loss": 1.098526120185852, "epoch": 0.73, "learning_rate": 6.639618276678217e-05, "loss": 1.0775, "step": 2020, "task_loss": 0.8350554704666138 }, { "compression_loss": 0.0, "distillation_loss": 1.1135523319244385, "epoch": 0.73, "learning_rate": 6.636097626102219e-05, "loss": 1.1848, "step": 2030, "task_loss": 1.121304988861084 }, { "compression_loss": 0.0, "distillation_loss": 0.9656478762626648, "epoch": 0.74, "learning_rate": 6.632560804752294e-05, "loss": 1.0541, "step": 2040, "task_loss": 0.7606736421585083 }, { "compression_loss": 0.0, "distillation_loss": 0.807013750076294, "epoch": 0.74, "learning_rate": 6.629007830865483e-05, "loss": 0.9216, "step": 2050, "task_loss": 0.5676593780517578 }, { "compression_loss": 0.0, "distillation_loss": 1.4709911346435547, "epoch": 0.74, "learning_rate": 6.625438722762114e-05, "loss": 1.0567, "step": 2060, "task_loss": 1.668703317642212 }, { "compression_loss": 0.0, "distillation_loss": 1.243257999420166, "epoch": 0.75, "learning_rate": 6.62185349884571e-05, "loss": 1.0476, "step": 2070, "task_loss": 1.2251255512237549 }, { "compression_loss": 0.0, "distillation_loss": 0.8128871321678162, "epoch": 0.75, "learning_rate": 6.618252177602889e-05, "loss": 0.9901, "step": 2080, "task_loss": 0.7867209911346436 }, { "compression_loss": 0.0, "distillation_loss": 1.8095636367797852, "epoch": 0.76, "learning_rate": 6.614634777603274e-05, "loss": 1.1872, "step": 2090, "task_loss": 1.47059965133667 }, { "compression_loss": 0.0, "distillation_loss": 1.0928575992584229, "epoch": 0.76, "learning_rate": 6.611001317499399e-05, "loss": 1.1672, "step": 2100, "task_loss": 1.0058388710021973 }, { "compression_loss": 0.0, "distillation_loss": 0.8655899167060852, "epoch": 0.76, "learning_rate": 6.607351816026604e-05, "loss": 0.925, "step": 2110, "task_loss": 0.7716636657714844 }, { "compression_loss": 0.0, "distillation_loss": 0.830997109413147, "epoch": 0.77, "learning_rate": 6.603686292002945e-05, "loss": 1.0954, "step": 2120, "task_loss": 1.1273469924926758 }, { "compression_loss": 0.0, "distillation_loss": 1.0073375701904297, "epoch": 0.77, "learning_rate": 6.600004764329098e-05, "loss": 1.0318, "step": 2130, "task_loss": 0.7276365756988525 }, { "compression_loss": 0.0, "distillation_loss": 1.026449203491211, "epoch": 0.77, "learning_rate": 6.596307251988257e-05, "loss": 1.1058, "step": 2140, "task_loss": 1.2019627094268799 }, { "compression_loss": 0.0, "distillation_loss": 1.1347987651824951, "epoch": 0.78, "learning_rate": 6.592593774046039e-05, "loss": 1.0494, "step": 2150, "task_loss": 0.947940468788147 }, { "compression_loss": 0.0, "distillation_loss": 0.9246293306350708, "epoch": 0.78, "learning_rate": 6.588864349650386e-05, "loss": 0.9727, "step": 2160, "task_loss": 0.8415713310241699 }, { "compression_loss": 0.0, "distillation_loss": 0.9354982972145081, "epoch": 0.78, "learning_rate": 6.585118998031462e-05, "loss": 1.015, "step": 2170, "task_loss": 0.8214912414550781 }, { "compression_loss": 0.0, "distillation_loss": 1.0691757202148438, "epoch": 0.79, "learning_rate": 6.581357738501561e-05, "loss": 1.0531, "step": 2180, "task_loss": 0.8481631278991699 }, { "compression_loss": 0.0, "distillation_loss": 1.2228312492370605, "epoch": 0.79, "learning_rate": 6.577580590455002e-05, "loss": 1.1091, "step": 2190, "task_loss": 0.9836097955703735 }, { "compression_loss": 0.0, "distillation_loss": 0.9064592123031616, "epoch": 0.8, "learning_rate": 6.57378757336803e-05, "loss": 0.9704, "step": 2200, "task_loss": 0.4665876626968384 }, { "compression_loss": 0.0, "distillation_loss": 0.7420713305473328, "epoch": 0.8, "learning_rate": 6.569978706798717e-05, "loss": 1.0118, "step": 2210, "task_loss": 0.4307766556739807 }, { "compression_loss": 0.0, "distillation_loss": 1.6637189388275146, "epoch": 0.8, "learning_rate": 6.566154010386858e-05, "loss": 1.1637, "step": 2220, "task_loss": 1.3755857944488525 }, { "compression_loss": 0.0, "distillation_loss": 1.168283462524414, "epoch": 0.81, "learning_rate": 6.562313503853876e-05, "loss": 1.1656, "step": 2230, "task_loss": 0.7027386426925659 }, { "compression_loss": 0.0, "distillation_loss": 0.8026998043060303, "epoch": 0.81, "learning_rate": 6.558457207002713e-05, "loss": 1.0892, "step": 2240, "task_loss": 0.700858473777771 }, { "compression_loss": 0.0, "distillation_loss": 1.1535958051681519, "epoch": 0.81, "learning_rate": 6.554585139717734e-05, "loss": 1.0732, "step": 2250, "task_loss": 0.8288143873214722 }, { "epoch": 0.81, "eval_exact_match": 80.06622516556291, "eval_f1": 87.992821077547, "step": 2250 }, { "compression_loss": 0.0, "distillation_loss": 1.2591572999954224, "epoch": 0.82, "learning_rate": 6.55069732196462e-05, "loss": 1.144, "step": 2260, "task_loss": 0.8866115212440491 }, { "compression_loss": 0.0, "distillation_loss": 1.0764415264129639, "epoch": 0.82, "learning_rate": 6.546793773790262e-05, "loss": 0.9642, "step": 2270, "task_loss": 0.891737699508667 }, { "compression_loss": 0.0, "distillation_loss": 1.1543341875076294, "epoch": 0.82, "learning_rate": 6.542874515322672e-05, "loss": 0.9274, "step": 2280, "task_loss": 1.1221840381622314 }, { "compression_loss": 0.0, "distillation_loss": 1.189033031463623, "epoch": 0.83, "learning_rate": 6.538939566770863e-05, "loss": 0.946, "step": 2290, "task_loss": 1.2177841663360596 }, { "compression_loss": 0.0, "distillation_loss": 1.1238713264465332, "epoch": 0.83, "learning_rate": 6.534988948424753e-05, "loss": 0.9513, "step": 2300, "task_loss": 0.7087922096252441 }, { "compression_loss": 0.0, "distillation_loss": 1.0527355670928955, "epoch": 0.83, "learning_rate": 6.531022680655057e-05, "loss": 1.1627, "step": 2310, "task_loss": 1.0492477416992188 }, { "compression_loss": 0.0, "distillation_loss": 1.1853148937225342, "epoch": 0.84, "learning_rate": 6.527040783913188e-05, "loss": 0.9871, "step": 2320, "task_loss": 1.1989734172821045 }, { "compression_loss": 0.0, "distillation_loss": 1.1268264055252075, "epoch": 0.84, "learning_rate": 6.523043278731144e-05, "loss": 1.1276, "step": 2330, "task_loss": 0.7949765920639038 }, { "compression_loss": 0.0, "distillation_loss": 1.072948694229126, "epoch": 0.85, "learning_rate": 6.519030185721406e-05, "loss": 1.0278, "step": 2340, "task_loss": 0.9652178287506104 }, { "compression_loss": 0.0, "distillation_loss": 1.2226154804229736, "epoch": 0.85, "learning_rate": 6.515001525576829e-05, "loss": 0.9951, "step": 2350, "task_loss": 0.9827218055725098 }, { "compression_loss": 0.0, "distillation_loss": 0.9792837500572205, "epoch": 0.85, "learning_rate": 6.510957319070542e-05, "loss": 0.9745, "step": 2360, "task_loss": 0.744950532913208 }, { "compression_loss": 0.0, "distillation_loss": 1.3859732151031494, "epoch": 0.86, "learning_rate": 6.50689758705583e-05, "loss": 1.019, "step": 2370, "task_loss": 1.170689582824707 }, { "compression_loss": 0.0, "distillation_loss": 0.8908461332321167, "epoch": 0.86, "learning_rate": 6.502822350466042e-05, "loss": 1.1045, "step": 2380, "task_loss": 0.838524580001831 }, { "compression_loss": 0.0, "distillation_loss": 1.280137062072754, "epoch": 0.86, "learning_rate": 6.498731630314464e-05, "loss": 0.948, "step": 2390, "task_loss": 1.2174863815307617 }, { "compression_loss": 0.0, "distillation_loss": 1.02490234375, "epoch": 0.87, "learning_rate": 6.494625447694226e-05, "loss": 1.0909, "step": 2400, "task_loss": 0.8274341821670532 }, { "compression_loss": 0.0, "distillation_loss": 0.8467527627944946, "epoch": 0.87, "learning_rate": 6.490503823778187e-05, "loss": 1.0749, "step": 2410, "task_loss": 0.5999952554702759 }, { "compression_loss": 0.0, "distillation_loss": 1.4467501640319824, "epoch": 0.87, "learning_rate": 6.486366779818823e-05, "loss": 1.0028, "step": 2420, "task_loss": 0.9743595123291016 }, { "compression_loss": 0.0, "distillation_loss": 0.9197993874549866, "epoch": 0.88, "learning_rate": 6.482214337148128e-05, "loss": 1.0118, "step": 2430, "task_loss": 0.9321801662445068 }, { "compression_loss": 0.0, "distillation_loss": 1.1022918224334717, "epoch": 0.88, "learning_rate": 6.478046517177489e-05, "loss": 1.1273, "step": 2440, "task_loss": 0.7594503164291382 }, { "compression_loss": 0.0, "distillation_loss": 0.9985268115997314, "epoch": 0.89, "learning_rate": 6.47386334139759e-05, "loss": 0.9987, "step": 2450, "task_loss": 0.5284379720687866 }, { "compression_loss": 0.0, "distillation_loss": 0.5598468780517578, "epoch": 0.89, "learning_rate": 6.469664831378291e-05, "loss": 1.0219, "step": 2460, "task_loss": 0.8020057678222656 }, { "compression_loss": 0.0, "distillation_loss": 0.7448583841323853, "epoch": 0.89, "learning_rate": 6.46545100876852e-05, "loss": 1.0504, "step": 2470, "task_loss": 0.7287012338638306 }, { "compression_loss": 0.0, "distillation_loss": 0.8835182189941406, "epoch": 0.9, "learning_rate": 6.461221895296166e-05, "loss": 1.083, "step": 2480, "task_loss": 1.5722730159759521 }, { "compression_loss": 0.0, "distillation_loss": 0.7862961292266846, "epoch": 0.9, "learning_rate": 6.45697751276796e-05, "loss": 1.0801, "step": 2490, "task_loss": 0.4984385371208191 }, { "compression_loss": 0.0, "distillation_loss": 1.5525429248809814, "epoch": 0.9, "learning_rate": 6.452717883069362e-05, "loss": 0.9646, "step": 2500, "task_loss": 1.5134918689727783 }, { "epoch": 0.9, "eval_exact_match": 80.54872280037843, "eval_f1": 88.31187998291739, "step": 2500 }, { "compression_loss": 0.0, "distillation_loss": 0.9102578163146973, "epoch": 0.91, "learning_rate": 6.448443028164458e-05, "loss": 1.0809, "step": 2510, "task_loss": 0.8942407369613647 }, { "compression_loss": 0.0, "distillation_loss": 0.9936506152153015, "epoch": 0.91, "learning_rate": 6.444152970095839e-05, "loss": 1.0757, "step": 2520, "task_loss": 0.9664222598075867 }, { "compression_loss": 0.0, "distillation_loss": 1.041625738143921, "epoch": 0.91, "learning_rate": 6.439847730984484e-05, "loss": 1.1217, "step": 2530, "task_loss": 0.7623254656791687 }, { "compression_loss": 0.0, "distillation_loss": 1.021484375, "epoch": 0.92, "learning_rate": 6.435527333029654e-05, "loss": 0.8477, "step": 2540, "task_loss": 0.7754542231559753 }, { "compression_loss": 0.0, "distillation_loss": 1.1402299404144287, "epoch": 0.92, "learning_rate": 6.431191798508773e-05, "loss": 1.0529, "step": 2550, "task_loss": 0.698540449142456 }, { "compression_loss": 0.0, "distillation_loss": 0.9655864238739014, "epoch": 0.93, "learning_rate": 6.426841149777318e-05, "loss": 1.0713, "step": 2560, "task_loss": 0.7478283643722534 }, { "compression_loss": 0.0, "distillation_loss": 0.8475412726402283, "epoch": 0.93, "learning_rate": 6.422475409268694e-05, "loss": 1.0224, "step": 2570, "task_loss": 0.922493040561676 }, { "compression_loss": 0.0, "distillation_loss": 1.2475067377090454, "epoch": 0.93, "learning_rate": 6.418094599494128e-05, "loss": 1.0774, "step": 2580, "task_loss": 1.2594256401062012 }, { "compression_loss": 0.0, "distillation_loss": 1.0052337646484375, "epoch": 0.94, "learning_rate": 6.413698743042548e-05, "loss": 0.8954, "step": 2590, "task_loss": 0.7200769186019897 }, { "compression_loss": 0.0, "distillation_loss": 0.8628212213516235, "epoch": 0.94, "learning_rate": 6.409287862580469e-05, "loss": 0.9528, "step": 2600, "task_loss": 0.936751127243042 }, { "compression_loss": 0.0, "distillation_loss": 1.1267863512039185, "epoch": 0.94, "learning_rate": 6.404861980851874e-05, "loss": 1.0709, "step": 2610, "task_loss": 0.9654757976531982 }, { "compression_loss": 0.0, "distillation_loss": 1.2044198513031006, "epoch": 0.95, "learning_rate": 6.400421120678095e-05, "loss": 1.0257, "step": 2620, "task_loss": 1.0389150381088257 }, { "compression_loss": 0.0, "distillation_loss": 1.0975178480148315, "epoch": 0.95, "learning_rate": 6.395965304957704e-05, "loss": 1.0731, "step": 2630, "task_loss": 1.0233360528945923 }, { "compression_loss": 0.0, "distillation_loss": 0.9345778226852417, "epoch": 0.95, "learning_rate": 6.391494556666383e-05, "loss": 1.0423, "step": 2640, "task_loss": 0.7960381507873535 }, { "compression_loss": 0.0, "distillation_loss": 0.6801820397377014, "epoch": 0.96, "learning_rate": 6.387008898856813e-05, "loss": 0.9883, "step": 2650, "task_loss": 0.6798236966133118 }, { "compression_loss": 0.0, "distillation_loss": 1.0460097789764404, "epoch": 0.96, "learning_rate": 6.382508354658555e-05, "loss": 1.0479, "step": 2660, "task_loss": 0.7182983160018921 }, { "compression_loss": 0.0, "distillation_loss": 1.1210675239562988, "epoch": 0.96, "learning_rate": 6.377992947277927e-05, "loss": 1.037, "step": 2670, "task_loss": 0.8833307027816772 }, { "compression_loss": 0.0, "distillation_loss": 1.166975498199463, "epoch": 0.97, "learning_rate": 6.373462699997891e-05, "loss": 1.0224, "step": 2680, "task_loss": 0.8321333527565002 }, { "compression_loss": 0.0, "distillation_loss": 0.843514621257782, "epoch": 0.97, "learning_rate": 6.368917636177923e-05, "loss": 0.9786, "step": 2690, "task_loss": 0.6727123260498047 }, { "compression_loss": 0.0, "distillation_loss": 1.0732271671295166, "epoch": 0.98, "learning_rate": 6.3643577792539e-05, "loss": 1.0344, "step": 2700, "task_loss": 1.1724574565887451 }, { "compression_loss": 0.0, "distillation_loss": 0.704067051410675, "epoch": 0.98, "learning_rate": 6.35978315273798e-05, "loss": 0.9464, "step": 2710, "task_loss": 0.4790229797363281 }, { "compression_loss": 0.0, "distillation_loss": 0.8885566592216492, "epoch": 0.98, "learning_rate": 6.355193780218473e-05, "loss": 1.0417, "step": 2720, "task_loss": 1.1946167945861816 }, { "compression_loss": 0.0, "distillation_loss": 0.6279390454292297, "epoch": 0.99, "learning_rate": 6.350589685359728e-05, "loss": 0.9042, "step": 2730, "task_loss": 0.38034892082214355 }, { "compression_loss": 0.0, "distillation_loss": 0.8930590748786926, "epoch": 0.99, "learning_rate": 6.345970891902006e-05, "loss": 0.8633, "step": 2740, "task_loss": 0.6923383474349976 }, { "compression_loss": 0.0, "distillation_loss": 0.864410936832428, "epoch": 0.99, "learning_rate": 6.341337423661359e-05, "loss": 0.9801, "step": 2750, "task_loss": 0.8051791191101074 }, { "epoch": 0.99, "eval_exact_match": 80.6244087038789, "eval_f1": 88.24949743347717, "step": 2750 }, { "compression_loss": 0.0, "distillation_loss": 0.5681467652320862, "epoch": 1.0, "learning_rate": 6.33668930452951e-05, "loss": 1.0045, "step": 2760, "task_loss": 0.5351274013519287 }, { "compression_loss": 0.0, "distillation_loss": 0.7320226430892944, "epoch": 1.0, "learning_rate": 6.33202655847372e-05, "loss": 0.8719, "step": 2770, "task_loss": 0.5473893880844116 }, { "compression_loss": 0.0, "distillation_loss": 1.155122995376587, "epoch": 1.0, "learning_rate": 6.327349209536678e-05, "loss": 0.9349, "step": 2780, "task_loss": 1.0080279111862183 }, { "compression_loss": 0.0, "distillation_loss": 1.2032737731933594, "epoch": 1.01, "learning_rate": 6.322657281836369e-05, "loss": 0.7598, "step": 2790, "task_loss": 0.9358059167861938 }, { "compression_loss": 0.0, "distillation_loss": 0.6500468850135803, "epoch": 1.01, "learning_rate": 6.317950799565947e-05, "loss": 0.7994, "step": 2800, "task_loss": 0.6531230807304382 }, { "compression_loss": 0.0, "distillation_loss": 0.904877781867981, "epoch": 1.02, "learning_rate": 6.31322978699362e-05, "loss": 0.8276, "step": 2810, "task_loss": 0.6158158779144287 }, { "compression_loss": 0.0, "distillation_loss": 0.7756827473640442, "epoch": 1.02, "learning_rate": 6.308494268462514e-05, "loss": 0.7618, "step": 2820, "task_loss": 0.48452329635620117 }, { "compression_loss": 0.0, "distillation_loss": 0.9801332950592041, "epoch": 1.02, "learning_rate": 6.303744268390556e-05, "loss": 0.7582, "step": 2830, "task_loss": 0.8897899389266968 }, { "compression_loss": 0.0, "distillation_loss": 0.8067210912704468, "epoch": 1.03, "learning_rate": 6.298979811270345e-05, "loss": 0.79, "step": 2840, "task_loss": 0.7257287502288818 }, { "compression_loss": 0.0, "distillation_loss": 0.7176787853240967, "epoch": 1.03, "learning_rate": 6.294200921669022e-05, "loss": 0.7634, "step": 2850, "task_loss": 0.6690226197242737 }, { "compression_loss": 0.0, "distillation_loss": 0.6467972993850708, "epoch": 1.03, "learning_rate": 6.28940762422815e-05, "loss": 0.8435, "step": 2860, "task_loss": 0.4089285135269165 }, { "compression_loss": 0.0, "distillation_loss": 0.6949372291564941, "epoch": 1.04, "learning_rate": 6.284599943663583e-05, "loss": 0.7328, "step": 2870, "task_loss": 0.5266913175582886 }, { "compression_loss": 0.0, "distillation_loss": 0.6310738325119019, "epoch": 1.04, "learning_rate": 6.279777904765335e-05, "loss": 0.6816, "step": 2880, "task_loss": 0.659895658493042 }, { "compression_loss": 0.0, "distillation_loss": 0.7482784986495972, "epoch": 1.04, "learning_rate": 6.274941532397464e-05, "loss": 0.7569, "step": 2890, "task_loss": 0.8065102100372314 }, { "compression_loss": 0.0, "distillation_loss": 0.9670886993408203, "epoch": 1.05, "learning_rate": 6.27009085149793e-05, "loss": 0.7821, "step": 2900, "task_loss": 0.7383887767791748 }, { "compression_loss": 0.0, "distillation_loss": 0.6161856651306152, "epoch": 1.05, "learning_rate": 6.265225887078478e-05, "loss": 0.7253, "step": 2910, "task_loss": 0.9445866346359253 }, { "compression_loss": 0.0, "distillation_loss": 0.8717264533042908, "epoch": 1.06, "learning_rate": 6.260346664224497e-05, "loss": 0.7941, "step": 2920, "task_loss": 1.126537799835205 }, { "compression_loss": 0.0, "distillation_loss": 0.6857512593269348, "epoch": 1.06, "learning_rate": 6.255453208094901e-05, "loss": 0.77, "step": 2930, "task_loss": 0.8074723482131958 }, { "compression_loss": 0.0, "distillation_loss": 0.7998137474060059, "epoch": 1.06, "learning_rate": 6.250545543921997e-05, "loss": 0.85, "step": 2940, "task_loss": 0.6924498677253723 }, { "compression_loss": 0.0, "distillation_loss": 1.1140186786651611, "epoch": 1.07, "learning_rate": 6.245623697011352e-05, "loss": 0.8453, "step": 2950, "task_loss": 1.2529780864715576 }, { "compression_loss": 0.0, "distillation_loss": 0.808302640914917, "epoch": 1.07, "learning_rate": 6.240687692741663e-05, "loss": 0.8449, "step": 2960, "task_loss": 0.704319953918457 }, { "compression_loss": 0.0, "distillation_loss": 0.6823419332504272, "epoch": 1.07, "learning_rate": 6.235737556564626e-05, "loss": 0.698, "step": 2970, "task_loss": 0.5391907095909119 }, { "compression_loss": 0.0, "distillation_loss": 0.6212811470031738, "epoch": 1.08, "learning_rate": 6.230773314004811e-05, "loss": 0.773, "step": 2980, "task_loss": 0.802199125289917 }, { "compression_loss": 0.0, "distillation_loss": 0.6568988561630249, "epoch": 1.08, "learning_rate": 6.225794990659519e-05, "loss": 0.7828, "step": 2990, "task_loss": 0.8078853487968445 }, { "compression_loss": 0.0, "distillation_loss": 0.8658432364463806, "epoch": 1.08, "learning_rate": 6.220802612198661e-05, "loss": 0.7315, "step": 3000, "task_loss": 0.9337005615234375 }, { "epoch": 1.08, "eval_exact_match": 80.64333017975402, "eval_f1": 88.53171644067697, "step": 3000 }, { "compression_loss": 0.0, "distillation_loss": 0.9142760038375854, "epoch": 1.09, "learning_rate": 6.215796204364618e-05, "loss": 0.7422, "step": 3010, "task_loss": 0.8946501016616821 }, { "compression_loss": 0.0, "distillation_loss": 1.0131399631500244, "epoch": 1.09, "learning_rate": 6.210775792972113e-05, "loss": 0.829, "step": 3020, "task_loss": 0.694104790687561 }, { "compression_loss": 0.0, "distillation_loss": 0.858983039855957, "epoch": 1.1, "learning_rate": 6.205741403908076e-05, "loss": 0.7699, "step": 3030, "task_loss": 0.8205258846282959 }, { "compression_loss": 0.0, "distillation_loss": 0.8396916389465332, "epoch": 1.1, "learning_rate": 6.20069306313151e-05, "loss": 0.8486, "step": 3040, "task_loss": 0.6872456073760986 }, { "compression_loss": 0.0, "distillation_loss": 0.7519895434379578, "epoch": 1.1, "learning_rate": 6.195630796673355e-05, "loss": 0.8115, "step": 3050, "task_loss": 1.1500433683395386 }, { "compression_loss": 0.0, "distillation_loss": 0.8241223096847534, "epoch": 1.11, "learning_rate": 6.19055463063636e-05, "loss": 0.7984, "step": 3060, "task_loss": 0.8155335187911987 }, { "compression_loss": 0.0, "distillation_loss": 0.6022705435752869, "epoch": 1.11, "learning_rate": 6.185464591194946e-05, "loss": 0.7076, "step": 3070, "task_loss": 0.594499945640564 }, { "compression_loss": 0.0, "distillation_loss": 0.895301103591919, "epoch": 1.11, "learning_rate": 6.180360704595069e-05, "loss": 0.7757, "step": 3080, "task_loss": 1.0475142002105713 }, { "compression_loss": 0.0, "distillation_loss": 0.9936012029647827, "epoch": 1.12, "learning_rate": 6.17524299715408e-05, "loss": 0.7569, "step": 3090, "task_loss": 0.5248420238494873 }, { "compression_loss": 0.0, "distillation_loss": 0.6736662983894348, "epoch": 1.12, "learning_rate": 6.170111495260603e-05, "loss": 0.779, "step": 3100, "task_loss": 0.8802880644798279 }, { "compression_loss": 0.0, "distillation_loss": 0.9580499529838562, "epoch": 1.12, "learning_rate": 6.164966225374386e-05, "loss": 0.7582, "step": 3110, "task_loss": 1.5431132316589355 }, { "compression_loss": 0.0, "distillation_loss": 0.7854373455047607, "epoch": 1.13, "learning_rate": 6.15980721402617e-05, "loss": 0.819, "step": 3120, "task_loss": 0.8485944271087646 }, { "compression_loss": 0.0, "distillation_loss": 1.1915910243988037, "epoch": 1.13, "learning_rate": 6.154634487817555e-05, "loss": 0.7818, "step": 3130, "task_loss": 1.0657076835632324 }, { "compression_loss": 0.0, "distillation_loss": 0.5495825409889221, "epoch": 1.13, "learning_rate": 6.149448073420854e-05, "loss": 0.7273, "step": 3140, "task_loss": 0.436737596988678 }, { "compression_loss": 0.0, "distillation_loss": 1.0557966232299805, "epoch": 1.14, "learning_rate": 6.144247997578965e-05, "loss": 0.8162, "step": 3150, "task_loss": 1.5712409019470215 }, { "compression_loss": 0.0, "distillation_loss": 0.5940508246421814, "epoch": 1.14, "learning_rate": 6.139034287105228e-05, "loss": 0.7228, "step": 3160, "task_loss": 0.7547852993011475 }, { "compression_loss": 0.0, "distillation_loss": 0.9270653128623962, "epoch": 1.15, "learning_rate": 6.133806968883286e-05, "loss": 0.7985, "step": 3170, "task_loss": 0.7847113609313965 }, { "compression_loss": 0.0, "distillation_loss": 0.6993736624717712, "epoch": 1.15, "learning_rate": 6.12856606986695e-05, "loss": 0.7397, "step": 3180, "task_loss": 0.8840616941452026 }, { "compression_loss": 0.0, "distillation_loss": 0.7587177157402039, "epoch": 1.15, "learning_rate": 6.123311617080057e-05, "loss": 0.7178, "step": 3190, "task_loss": 0.6237931251525879 }, { "compression_loss": 0.0, "distillation_loss": 1.0422003269195557, "epoch": 1.16, "learning_rate": 6.118043637616336e-05, "loss": 0.822, "step": 3200, "task_loss": 1.0040562152862549 }, { "compression_loss": 0.0, "distillation_loss": 0.7708929777145386, "epoch": 1.16, "learning_rate": 6.112762158639257e-05, "loss": 0.8651, "step": 3210, "task_loss": 0.8678411245346069 }, { "compression_loss": 0.0, "distillation_loss": 0.7193692922592163, "epoch": 1.16, "learning_rate": 6.107467207381903e-05, "loss": 0.7916, "step": 3220, "task_loss": 0.6456923484802246 }, { "compression_loss": 0.0, "distillation_loss": 0.6213294267654419, "epoch": 1.17, "learning_rate": 6.1021588111468214e-05, "loss": 0.7506, "step": 3230, "task_loss": 0.430205762386322 }, { "compression_loss": 0.0, "distillation_loss": 0.717927098274231, "epoch": 1.17, "learning_rate": 6.0968369973058906e-05, "loss": 0.6777, "step": 3240, "task_loss": 0.7677985429763794 }, { "compression_loss": 0.0, "distillation_loss": 0.5344014167785645, "epoch": 1.17, "learning_rate": 6.0915017933001704e-05, "loss": 0.6799, "step": 3250, "task_loss": 0.6910272836685181 }, { "epoch": 1.17, "eval_exact_match": 80.70955534531693, "eval_f1": 88.61805230583994, "step": 3250 }, { "compression_loss": 0.0, "distillation_loss": 0.7944083213806152, "epoch": 1.18, "learning_rate": 6.086153226639769e-05, "loss": 0.7754, "step": 3260, "task_loss": 0.8809820413589478 }, { "compression_loss": 0.0, "distillation_loss": 0.7685368657112122, "epoch": 1.18, "learning_rate": 6.080791324903693e-05, "loss": 0.8346, "step": 3270, "task_loss": 1.361288070678711 }, { "compression_loss": 0.0, "distillation_loss": 0.8159446716308594, "epoch": 1.19, "learning_rate": 6.075416115739711e-05, "loss": 0.783, "step": 3280, "task_loss": 0.8244059085845947 }, { "compression_loss": 0.0, "distillation_loss": 0.9105017185211182, "epoch": 1.19, "learning_rate": 6.070027626864208e-05, "loss": 0.7819, "step": 3290, "task_loss": 1.182206630706787 }, { "compression_loss": 0.0, "distillation_loss": 0.6835358142852783, "epoch": 1.19, "learning_rate": 6.064625886062046e-05, "loss": 0.7313, "step": 3300, "task_loss": 0.7755618095397949 }, { "compression_loss": 0.0, "distillation_loss": 0.7198119163513184, "epoch": 1.2, "learning_rate": 6.059210921186417e-05, "loss": 0.7644, "step": 3310, "task_loss": 0.6677089929580688 }, { "compression_loss": 0.0, "distillation_loss": 1.1201332807540894, "epoch": 1.2, "learning_rate": 6.053782760158701e-05, "loss": 0.9287, "step": 3320, "task_loss": 1.3147826194763184 }, { "compression_loss": 0.0, "distillation_loss": 0.7347714900970459, "epoch": 1.2, "learning_rate": 6.04834143096832e-05, "loss": 0.7157, "step": 3330, "task_loss": 1.1088364124298096 }, { "compression_loss": 0.0, "distillation_loss": 0.5882917642593384, "epoch": 1.21, "learning_rate": 6.0428869616725985e-05, "loss": 0.7162, "step": 3340, "task_loss": 0.697338342666626 }, { "compression_loss": 0.0, "distillation_loss": 0.6236642003059387, "epoch": 1.21, "learning_rate": 6.0374193803966136e-05, "loss": 0.7029, "step": 3350, "task_loss": 1.0311845541000366 }, { "compression_loss": 0.0, "distillation_loss": 0.6423226594924927, "epoch": 1.21, "learning_rate": 6.0319387153330536e-05, "loss": 0.7465, "step": 3360, "task_loss": 0.8737854957580566 }, { "compression_loss": 0.0, "distillation_loss": 0.9397047758102417, "epoch": 1.22, "learning_rate": 6.0264449947420696e-05, "loss": 0.8276, "step": 3370, "task_loss": 0.5499547719955444 }, { "compression_loss": 0.0, "distillation_loss": 0.9513425230979919, "epoch": 1.22, "learning_rate": 6.0209382469511336e-05, "loss": 0.7756, "step": 3380, "task_loss": 1.0776315927505493 }, { "compression_loss": 0.0, "distillation_loss": 0.955211877822876, "epoch": 1.23, "learning_rate": 6.015418500354887e-05, "loss": 0.8155, "step": 3390, "task_loss": 0.8473688364028931 }, { "compression_loss": 0.0, "distillation_loss": 0.7617285251617432, "epoch": 1.23, "learning_rate": 6.009885783415001e-05, "loss": 0.8277, "step": 3400, "task_loss": 0.9811973571777344 }, { "compression_loss": 0.0, "distillation_loss": 0.8025141954421997, "epoch": 1.23, "learning_rate": 6.0043401246600244e-05, "loss": 0.8092, "step": 3410, "task_loss": 0.78231281042099 }, { "compression_loss": 0.0, "distillation_loss": 0.732769787311554, "epoch": 1.24, "learning_rate": 5.998781552685238e-05, "loss": 0.7231, "step": 3420, "task_loss": 0.6331707239151001 }, { "compression_loss": 0.0, "distillation_loss": 0.7150098085403442, "epoch": 1.24, "learning_rate": 5.9932100961525085e-05, "loss": 0.7696, "step": 3430, "task_loss": 0.65977543592453 }, { "compression_loss": 0.0, "distillation_loss": 0.913851261138916, "epoch": 1.24, "learning_rate": 5.9876257837901394e-05, "loss": 0.8205, "step": 3440, "task_loss": 0.8993183970451355 }, { "compression_loss": 0.0, "distillation_loss": 0.5872275829315186, "epoch": 1.25, "learning_rate": 5.982028644392723e-05, "loss": 0.7224, "step": 3450, "task_loss": 0.657208263874054 }, { "compression_loss": 0.0, "distillation_loss": 0.7204796671867371, "epoch": 1.25, "learning_rate": 5.976418706820992e-05, "loss": 0.7553, "step": 3460, "task_loss": 0.8270089626312256 }, { "compression_loss": 0.0, "distillation_loss": 0.8491474390029907, "epoch": 1.25, "learning_rate": 5.9707960000016706e-05, "loss": 0.8146, "step": 3470, "task_loss": 0.7125909328460693 }, { "compression_loss": 0.0, "distillation_loss": 0.630764365196228, "epoch": 1.26, "learning_rate": 5.9651605529273264e-05, "loss": 0.7561, "step": 3480, "task_loss": 0.798701286315918 }, { "compression_loss": 0.0, "distillation_loss": 0.6272002458572388, "epoch": 1.26, "learning_rate": 5.9595123946562196e-05, "loss": 0.7211, "step": 3490, "task_loss": 0.7057316899299622 }, { "compression_loss": 0.0, "distillation_loss": 0.7990823984146118, "epoch": 1.26, "learning_rate": 5.9538515543121524e-05, "loss": 0.8313, "step": 3500, "task_loss": 0.7466528415679932 }, { "epoch": 1.26, "eval_exact_match": 80.89877010406812, "eval_f1": 88.60748473078176, "step": 3500 }, { "compression_loss": 0.0, "distillation_loss": 0.5751552581787109, "epoch": 1.27, "learning_rate": 5.948178061084323e-05, "loss": 0.7492, "step": 3510, "task_loss": 0.5113732814788818 }, { "compression_loss": 0.0, "distillation_loss": 0.8527777194976807, "epoch": 1.27, "learning_rate": 5.9424919442271693e-05, "loss": 0.6718, "step": 3520, "task_loss": 0.9590537548065186 }, { "compression_loss": 0.0, "distillation_loss": 0.6820343732833862, "epoch": 1.28, "learning_rate": 5.9367932330602216e-05, "loss": 0.7317, "step": 3530, "task_loss": 0.775328516960144 }, { "compression_loss": 0.0, "distillation_loss": 0.7279667854309082, "epoch": 1.28, "learning_rate": 5.931081956967951e-05, "loss": 0.8587, "step": 3540, "task_loss": 0.5757768750190735 }, { "compression_loss": 0.0, "distillation_loss": 0.7633707523345947, "epoch": 1.28, "learning_rate": 5.925358145399619e-05, "loss": 0.7386, "step": 3550, "task_loss": 0.6215412616729736 }, { "compression_loss": 0.0, "distillation_loss": 0.9111154079437256, "epoch": 1.29, "learning_rate": 5.9196218278691204e-05, "loss": 0.8404, "step": 3560, "task_loss": 0.9098265171051025 }, { "compression_loss": 0.0, "distillation_loss": 0.968406081199646, "epoch": 1.29, "learning_rate": 5.913873033954839e-05, "loss": 0.8175, "step": 3570, "task_loss": 0.8037402629852295 }, { "compression_loss": 0.0, "distillation_loss": 0.5799151062965393, "epoch": 1.29, "learning_rate": 5.908111793299489e-05, "loss": 0.7005, "step": 3580, "task_loss": 0.7509677410125732 }, { "compression_loss": 0.0, "distillation_loss": 0.7919946908950806, "epoch": 1.3, "learning_rate": 5.902338135609965e-05, "loss": 0.784, "step": 3590, "task_loss": 0.75666344165802 }, { "compression_loss": 0.0, "distillation_loss": 0.5682597160339355, "epoch": 1.3, "learning_rate": 5.896552090657185e-05, "loss": 0.7353, "step": 3600, "task_loss": 0.6385715007781982 }, { "compression_loss": 0.0, "distillation_loss": 0.8308883905410767, "epoch": 1.3, "learning_rate": 5.890753688275946e-05, "loss": 0.8532, "step": 3610, "task_loss": 0.8819625377655029 }, { "compression_loss": 0.0, "distillation_loss": 0.8280819058418274, "epoch": 1.31, "learning_rate": 5.884942958364759e-05, "loss": 0.8406, "step": 3620, "task_loss": 0.8358502984046936 }, { "compression_loss": 0.0, "distillation_loss": 0.8390671014785767, "epoch": 1.31, "learning_rate": 5.8791199308857e-05, "loss": 0.7029, "step": 3630, "task_loss": 1.1161457300186157 }, { "compression_loss": 0.0, "distillation_loss": 0.6460493803024292, "epoch": 1.32, "learning_rate": 5.873284635864259e-05, "loss": 0.9247, "step": 3640, "task_loss": 0.6301670074462891 }, { "compression_loss": 0.0, "distillation_loss": 0.6909412741661072, "epoch": 1.32, "learning_rate": 5.867437103389178e-05, "loss": 0.774, "step": 3650, "task_loss": 0.8153108954429626 }, { "compression_loss": 0.0, "distillation_loss": 0.6475903987884521, "epoch": 1.32, "learning_rate": 5.861577363612302e-05, "loss": 0.7276, "step": 3660, "task_loss": 0.5706870555877686 }, { "compression_loss": 0.0, "distillation_loss": 0.4457024335861206, "epoch": 1.33, "learning_rate": 5.855705446748418e-05, "loss": 0.7223, "step": 3670, "task_loss": 0.7826858162879944 }, { "compression_loss": 0.0, "distillation_loss": 0.6906821131706238, "epoch": 1.33, "learning_rate": 5.849821383075105e-05, "loss": 0.831, "step": 3680, "task_loss": 0.6492674946784973 }, { "compression_loss": 0.0, "distillation_loss": 0.7505589723587036, "epoch": 1.33, "learning_rate": 5.8439252029325734e-05, "loss": 0.8624, "step": 3690, "task_loss": 0.8882350921630859 }, { "compression_loss": 0.0, "distillation_loss": 0.9934232831001282, "epoch": 1.34, "learning_rate": 5.838016936723511e-05, "loss": 0.8199, "step": 3700, "task_loss": 1.0231308937072754 }, { "compression_loss": 0.0, "distillation_loss": 0.5626651048660278, "epoch": 1.34, "learning_rate": 5.832096614912926e-05, "loss": 0.7692, "step": 3710, "task_loss": 0.5882710814476013 }, { "compression_loss": 0.0, "distillation_loss": 0.5242562294006348, "epoch": 1.34, "learning_rate": 5.826164268027987e-05, "loss": 0.7655, "step": 3720, "task_loss": 0.5058928728103638 }, { "compression_loss": 0.0, "distillation_loss": 0.6647918820381165, "epoch": 1.35, "learning_rate": 5.8202199266578704e-05, "loss": 0.7998, "step": 3730, "task_loss": 1.0364404916763306 }, { "compression_loss": 0.0, "distillation_loss": 0.5112893581390381, "epoch": 1.35, "learning_rate": 5.8142636214535976e-05, "loss": 0.7007, "step": 3740, "task_loss": 0.8115373253822327 }, { "compression_loss": 0.0, "distillation_loss": 0.5241594314575195, "epoch": 1.36, "learning_rate": 5.808295383127883e-05, "loss": 0.769, "step": 3750, "task_loss": 0.4635359048843384 }, { "epoch": 1.36, "eval_exact_match": 80.60548722800378, "eval_f1": 88.48461872007243, "step": 3750 }, { "compression_loss": 0.0, "distillation_loss": 0.7614384293556213, "epoch": 1.36, "learning_rate": 5.80231524245497e-05, "loss": 0.7911, "step": 3760, "task_loss": 0.5848983526229858 }, { "compression_loss": 0.0, "distillation_loss": 0.7333616614341736, "epoch": 1.36, "learning_rate": 5.796323230270474e-05, "loss": 0.8573, "step": 3770, "task_loss": 0.6257777214050293 }, { "compression_loss": 0.0, "distillation_loss": 0.7030531764030457, "epoch": 1.37, "learning_rate": 5.790319377471224e-05, "loss": 0.7254, "step": 3780, "task_loss": 0.4509909152984619 }, { "compression_loss": 0.0, "distillation_loss": 0.8695291876792908, "epoch": 1.37, "learning_rate": 5.7843037150151074e-05, "loss": 0.7335, "step": 3790, "task_loss": 0.8156959414482117 }, { "compression_loss": 0.0, "distillation_loss": 0.6627474427223206, "epoch": 1.37, "learning_rate": 5.778276273920899e-05, "loss": 0.7666, "step": 3800, "task_loss": 0.5233876705169678 }, { "compression_loss": 0.0, "distillation_loss": 1.0300884246826172, "epoch": 1.38, "learning_rate": 5.772237085268114e-05, "loss": 0.7196, "step": 3810, "task_loss": 0.7798508405685425 }, { "compression_loss": 0.0, "distillation_loss": 0.5791784524917603, "epoch": 1.38, "learning_rate": 5.766186180196839e-05, "loss": 0.7791, "step": 3820, "task_loss": 0.6178016066551208 }, { "compression_loss": 0.0, "distillation_loss": 0.891481876373291, "epoch": 1.38, "learning_rate": 5.7601235899075745e-05, "loss": 0.7834, "step": 3830, "task_loss": 0.8527568578720093 }, { "compression_loss": 0.0, "distillation_loss": 0.6954624652862549, "epoch": 1.39, "learning_rate": 5.7540493456610766e-05, "loss": 0.7348, "step": 3840, "task_loss": 0.8668036460876465 }, { "compression_loss": 0.0, "distillation_loss": 0.5850803256034851, "epoch": 1.39, "learning_rate": 5.747963478778191e-05, "loss": 0.7299, "step": 3850, "task_loss": 0.4395766258239746 }, { "compression_loss": 0.0, "distillation_loss": 0.8359133005142212, "epoch": 1.4, "learning_rate": 5.741866020639692e-05, "loss": 0.7206, "step": 3860, "task_loss": 1.7376631498336792 }, { "compression_loss": 0.0, "distillation_loss": 0.5748704671859741, "epoch": 1.4, "learning_rate": 5.735757002686126e-05, "loss": 0.7436, "step": 3870, "task_loss": 0.460451602935791 }, { "compression_loss": 0.0, "distillation_loss": 0.7052741050720215, "epoch": 1.4, "learning_rate": 5.7296364564176416e-05, "loss": 0.7916, "step": 3880, "task_loss": 0.82174152135849 }, { "compression_loss": 0.0, "distillation_loss": 1.0083122253417969, "epoch": 1.41, "learning_rate": 5.7235044133938356e-05, "loss": 0.8267, "step": 3890, "task_loss": 0.8673921823501587 }, { "compression_loss": 0.0, "distillation_loss": 0.5959757566452026, "epoch": 1.41, "learning_rate": 5.717360905233584e-05, "loss": 0.7126, "step": 3900, "task_loss": 0.7061013579368591 }, { "compression_loss": 0.0, "distillation_loss": 0.6814622282981873, "epoch": 1.41, "learning_rate": 5.711205963614879e-05, "loss": 0.8038, "step": 3910, "task_loss": 0.4971301853656769 }, { "compression_loss": 0.0, "distillation_loss": 1.2168264389038086, "epoch": 1.42, "learning_rate": 5.70503962027467e-05, "loss": 0.773, "step": 3920, "task_loss": 1.144862413406372 }, { "compression_loss": 0.0, "distillation_loss": 0.63560950756073, "epoch": 1.42, "learning_rate": 5.698861907008698e-05, "loss": 0.797, "step": 3930, "task_loss": 0.6747443079948425 }, { "compression_loss": 0.0, "distillation_loss": 0.7305340766906738, "epoch": 1.42, "learning_rate": 5.692672855671328e-05, "loss": 0.7419, "step": 3940, "task_loss": 0.39194267988204956 }, { "compression_loss": 0.0, "distillation_loss": 0.7829553484916687, "epoch": 1.43, "learning_rate": 5.6864724981753914e-05, "loss": 0.7298, "step": 3950, "task_loss": 0.7858966588973999 }, { "compression_loss": 0.0, "distillation_loss": 0.5866974592208862, "epoch": 1.43, "learning_rate": 5.680260866492015e-05, "loss": 0.7619, "step": 3960, "task_loss": 0.8394837379455566 }, { "compression_loss": 0.0, "distillation_loss": 0.8175315260887146, "epoch": 1.43, "learning_rate": 5.674037992650463e-05, "loss": 0.7816, "step": 3970, "task_loss": 0.7513538599014282 }, { "compression_loss": 0.0, "distillation_loss": 0.9726064205169678, "epoch": 1.44, "learning_rate": 5.667803908737963e-05, "loss": 0.689, "step": 3980, "task_loss": 0.914472758769989 }, { "compression_loss": 0.0, "distillation_loss": 0.5984624624252319, "epoch": 1.44, "learning_rate": 5.6615586468995484e-05, "loss": 0.7236, "step": 3990, "task_loss": 0.4769936203956604 }, { "compression_loss": 0.0, "distillation_loss": 0.6598918437957764, "epoch": 1.45, "learning_rate": 5.65530223933789e-05, "loss": 0.716, "step": 4000, "task_loss": 1.0444022417068481 }, { "epoch": 1.45, "eval_exact_match": 81.05014191106906, "eval_f1": 88.78854316118291, "step": 4000 }, { "compression_loss": 0.0, "distillation_loss": 0.4541385769844055, "epoch": 1.45, "learning_rate": 5.6490347183131293e-05, "loss": 0.7595, "step": 4010, "task_loss": 0.45822444558143616 }, { "compression_loss": 0.0, "distillation_loss": 0.8845678567886353, "epoch": 1.45, "learning_rate": 5.6427561161427114e-05, "loss": 0.7587, "step": 4020, "task_loss": 1.0624125003814697 }, { "compression_loss": 0.0, "distillation_loss": 0.6516546010971069, "epoch": 1.46, "learning_rate": 5.6364664652012204e-05, "loss": 0.692, "step": 4030, "task_loss": 0.6902788877487183 }, { "compression_loss": 0.0, "distillation_loss": 0.8230467438697815, "epoch": 1.46, "learning_rate": 5.630165797920211e-05, "loss": 0.7548, "step": 4040, "task_loss": 1.0239936113357544 }, { "compression_loss": 0.0, "distillation_loss": 0.565695583820343, "epoch": 1.46, "learning_rate": 5.623854146788044e-05, "loss": 0.7528, "step": 4050, "task_loss": 0.9347711801528931 }, { "compression_loss": 0.0, "distillation_loss": 0.48703664541244507, "epoch": 1.47, "learning_rate": 5.6175315443497146e-05, "loss": 0.795, "step": 4060, "task_loss": 0.5277222394943237 }, { "compression_loss": 0.0, "distillation_loss": 0.571143388748169, "epoch": 1.47, "learning_rate": 5.611198023206687e-05, "loss": 0.8009, "step": 4070, "task_loss": 0.8106241822242737 }, { "compression_loss": 0.0, "distillation_loss": 0.5028181672096252, "epoch": 1.47, "learning_rate": 5.6048536160167244e-05, "loss": 0.7494, "step": 4080, "task_loss": 0.39358311891555786 }, { "compression_loss": 0.0, "distillation_loss": 0.8799566030502319, "epoch": 1.48, "learning_rate": 5.598498355493726e-05, "loss": 0.7456, "step": 4090, "task_loss": 0.6059544682502747 }, { "compression_loss": 0.0, "distillation_loss": 0.8514795303344727, "epoch": 1.48, "learning_rate": 5.592132274407551e-05, "loss": 0.7538, "step": 4100, "task_loss": 0.9229733943939209 }, { "compression_loss": 0.0, "distillation_loss": 1.305832862854004, "epoch": 1.49, "learning_rate": 5.5857554055838546e-05, "loss": 0.7833, "step": 4110, "task_loss": 1.1757216453552246 }, { "compression_loss": 0.0, "distillation_loss": 0.7599394917488098, "epoch": 1.49, "learning_rate": 5.5793677819039163e-05, "loss": 0.7546, "step": 4120, "task_loss": 0.8550896644592285 }, { "compression_loss": 0.0, "distillation_loss": 0.9693371057510376, "epoch": 1.49, "learning_rate": 5.572969436304471e-05, "loss": 0.8094, "step": 4130, "task_loss": 1.0503398180007935 }, { "compression_loss": 0.0, "distillation_loss": 0.6661901473999023, "epoch": 1.5, "learning_rate": 5.566560401777542e-05, "loss": 0.7682, "step": 4140, "task_loss": 0.47326183319091797 }, { "compression_loss": 0.0, "distillation_loss": 0.5503145456314087, "epoch": 1.5, "learning_rate": 5.560140711370266e-05, "loss": 0.6929, "step": 4150, "task_loss": 0.5086473226547241 }, { "compression_loss": 0.0, "distillation_loss": 0.6122060418128967, "epoch": 1.5, "learning_rate": 5.553710398184724e-05, "loss": 0.7253, "step": 4160, "task_loss": 0.7502968311309814 }, { "compression_loss": 0.0, "distillation_loss": 0.8065118789672852, "epoch": 1.51, "learning_rate": 5.547269495377774e-05, "loss": 0.7417, "step": 4170, "task_loss": 0.8612374663352966 }, { "compression_loss": 0.0, "distillation_loss": 0.8094028234481812, "epoch": 1.51, "learning_rate": 5.5408180361608775e-05, "loss": 0.7633, "step": 4180, "task_loss": 0.8984237909317017 }, { "compression_loss": 0.0, "distillation_loss": 0.7946018576622009, "epoch": 1.51, "learning_rate": 5.5343560537999265e-05, "loss": 0.7919, "step": 4190, "task_loss": 0.8055480718612671 }, { "compression_loss": 0.0, "distillation_loss": 0.9102916121482849, "epoch": 1.52, "learning_rate": 5.527883581615077e-05, "loss": 0.7809, "step": 4200, "task_loss": 0.8656786680221558 }, { "compression_loss": 0.0, "distillation_loss": 0.7173982858657837, "epoch": 1.52, "learning_rate": 5.521400652980568e-05, "loss": 0.7714, "step": 4210, "task_loss": 0.7693835496902466 }, { "compression_loss": 0.0, "distillation_loss": 0.8590863943099976, "epoch": 1.53, "learning_rate": 5.514907301324564e-05, "loss": 0.7208, "step": 4220, "task_loss": 0.7576629519462585 }, { "compression_loss": 0.0, "distillation_loss": 0.7024766206741333, "epoch": 1.53, "learning_rate": 5.5084035601289674e-05, "loss": 0.7241, "step": 4230, "task_loss": 0.5862898230552673 }, { "compression_loss": 0.0, "distillation_loss": 0.6137065291404724, "epoch": 1.53, "learning_rate": 5.501889462929254e-05, "loss": 0.659, "step": 4240, "task_loss": 0.6031349897384644 }, { "compression_loss": 0.0, "distillation_loss": 0.734519362449646, "epoch": 1.54, "learning_rate": 5.4953650433143015e-05, "loss": 0.777, "step": 4250, "task_loss": 0.5666317343711853 }, { "epoch": 1.54, "eval_exact_match": 80.90823084200568, "eval_f1": 88.62694215371808, "step": 4250 }, { "compression_loss": 0.0, "distillation_loss": 0.8583282232284546, "epoch": 1.54, "learning_rate": 5.4888303349262105e-05, "loss": 0.7682, "step": 4260, "task_loss": 0.5990405082702637 }, { "compression_loss": 0.0, "distillation_loss": 0.7635049223899841, "epoch": 1.54, "learning_rate": 5.4822853714601336e-05, "loss": 0.8497, "step": 4270, "task_loss": 0.9380366206169128 }, { "compression_loss": 0.0, "distillation_loss": 0.5275091528892517, "epoch": 1.55, "learning_rate": 5.475730186664106e-05, "loss": 0.7873, "step": 4280, "task_loss": 0.5869154334068298 }, { "compression_loss": 0.0, "distillation_loss": 0.918228030204773, "epoch": 1.55, "learning_rate": 5.4691648143388625e-05, "loss": 0.7501, "step": 4290, "task_loss": 0.8605305552482605 }, { "compression_loss": 0.0, "distillation_loss": 0.9519420862197876, "epoch": 1.55, "learning_rate": 5.462589288337671e-05, "loss": 0.8131, "step": 4300, "task_loss": 0.8920116424560547 }, { "compression_loss": 0.0, "distillation_loss": 0.5668010115623474, "epoch": 1.56, "learning_rate": 5.456003642566156e-05, "loss": 0.7325, "step": 4310, "task_loss": 0.7306046485900879 }, { "compression_loss": 0.0, "distillation_loss": 0.3651421070098877, "epoch": 1.56, "learning_rate": 5.44940791098212e-05, "loss": 0.7889, "step": 4320, "task_loss": 0.42743948101997375 }, { "compression_loss": 0.0, "distillation_loss": 0.6651080250740051, "epoch": 1.56, "learning_rate": 5.442802127595373e-05, "loss": 0.7759, "step": 4330, "task_loss": 1.0754146575927734 }, { "compression_loss": 0.0, "distillation_loss": 0.752009391784668, "epoch": 1.57, "learning_rate": 5.4361863264675574e-05, "loss": 0.7438, "step": 4340, "task_loss": 0.8343137502670288 }, { "compression_loss": 0.0, "distillation_loss": 0.5974752902984619, "epoch": 1.57, "learning_rate": 5.4295605417119656e-05, "loss": 0.8421, "step": 4350, "task_loss": 0.6965165138244629 }, { "compression_loss": 0.0, "distillation_loss": 0.5219227075576782, "epoch": 1.58, "learning_rate": 5.422924807493375e-05, "loss": 0.7804, "step": 4360, "task_loss": 1.1046372652053833 }, { "compression_loss": 0.0, "distillation_loss": 0.7944738864898682, "epoch": 1.58, "learning_rate": 5.41627915802786e-05, "loss": 0.7957, "step": 4370, "task_loss": 1.3685147762298584 }, { "compression_loss": 0.0, "distillation_loss": 0.7721883058547974, "epoch": 1.58, "learning_rate": 5.409623627582625e-05, "loss": 0.7248, "step": 4380, "task_loss": 0.6489783525466919 }, { "compression_loss": 0.0, "distillation_loss": 0.7032918930053711, "epoch": 1.59, "learning_rate": 5.402958250475823e-05, "loss": 0.7312, "step": 4390, "task_loss": 0.7790108919143677 }, { "compression_loss": 0.0, "distillation_loss": 0.6564103364944458, "epoch": 1.59, "learning_rate": 5.396283061076379e-05, "loss": 0.7982, "step": 4400, "task_loss": 0.7662302255630493 }, { "compression_loss": 0.0, "distillation_loss": 0.9732493162155151, "epoch": 1.59, "learning_rate": 5.389598093803816e-05, "loss": 0.8468, "step": 4410, "task_loss": 0.800963282585144 }, { "compression_loss": 0.0, "distillation_loss": 0.5395100116729736, "epoch": 1.6, "learning_rate": 5.3829033831280716e-05, "loss": 0.7181, "step": 4420, "task_loss": 0.6361441612243652 }, { "compression_loss": 0.0, "distillation_loss": 0.7411954998970032, "epoch": 1.6, "learning_rate": 5.376198963569325e-05, "loss": 0.7124, "step": 4430, "task_loss": 0.6355388164520264 }, { "compression_loss": 0.0, "distillation_loss": 0.6182745695114136, "epoch": 1.6, "learning_rate": 5.36948486969782e-05, "loss": 0.6804, "step": 4440, "task_loss": 0.7747174501419067 }, { "compression_loss": 0.0, "distillation_loss": 0.772506058216095, "epoch": 1.61, "learning_rate": 5.36276113613368e-05, "loss": 0.7687, "step": 4450, "task_loss": 0.6414505839347839 }, { "compression_loss": 0.0, "distillation_loss": 0.9038888216018677, "epoch": 1.61, "learning_rate": 5.356027797546736e-05, "loss": 0.7532, "step": 4460, "task_loss": 0.7040343880653381 }, { "compression_loss": 0.0, "distillation_loss": 0.6857190728187561, "epoch": 1.62, "learning_rate": 5.349284888656348e-05, "loss": 0.7633, "step": 4470, "task_loss": 0.8633918166160583 }, { "compression_loss": 0.0, "distillation_loss": 0.7742190957069397, "epoch": 1.62, "learning_rate": 5.342532444231222e-05, "loss": 0.7441, "step": 4480, "task_loss": 0.7828219532966614 }, { "compression_loss": 0.0, "distillation_loss": 1.0203568935394287, "epoch": 1.62, "learning_rate": 5.33577049908923e-05, "loss": 0.7315, "step": 4490, "task_loss": 0.8948928713798523 }, { "compression_loss": 0.0, "distillation_loss": 0.5982668399810791, "epoch": 1.63, "learning_rate": 5.328999088097236e-05, "loss": 0.7708, "step": 4500, "task_loss": 0.6327080726623535 }, { "epoch": 1.63, "eval_exact_match": 80.85146641438033, "eval_f1": 88.73605738995374, "step": 4500 }, { "compression_loss": 0.0, "distillation_loss": 0.8688200116157532, "epoch": 1.63, "learning_rate": 5.322218246170911e-05, "loss": 0.8681, "step": 4510, "task_loss": 1.0566356182098389 }, { "compression_loss": 0.0, "distillation_loss": 0.9127258062362671, "epoch": 1.63, "learning_rate": 5.315428008274558e-05, "loss": 0.8109, "step": 4520, "task_loss": 0.9632719159126282 }, { "compression_loss": 0.0, "distillation_loss": 0.6907234787940979, "epoch": 1.64, "learning_rate": 5.308628409420925e-05, "loss": 0.7828, "step": 4530, "task_loss": 0.7649120688438416 }, { "compression_loss": 0.0, "distillation_loss": 0.6892116069793701, "epoch": 1.64, "learning_rate": 5.301819484671032e-05, "loss": 0.7633, "step": 4540, "task_loss": 0.5884895324707031 }, { "compression_loss": 0.0, "distillation_loss": 0.8199399709701538, "epoch": 1.64, "learning_rate": 5.295001269133984e-05, "loss": 0.729, "step": 4550, "task_loss": 0.7061965465545654 }, { "compression_loss": 0.0, "distillation_loss": 1.1325392723083496, "epoch": 1.65, "learning_rate": 5.2881737979667926e-05, "loss": 0.8629, "step": 4560, "task_loss": 1.1424285173416138 }, { "compression_loss": 0.0, "distillation_loss": 0.5671732425689697, "epoch": 1.65, "learning_rate": 5.2813371063741945e-05, "loss": 0.6932, "step": 4570, "task_loss": 0.61430424451828 }, { "compression_loss": 0.0, "distillation_loss": 0.9136245846748352, "epoch": 1.66, "learning_rate": 5.2744912296084714e-05, "loss": 0.8854, "step": 4580, "task_loss": 1.0167670249938965 }, { "compression_loss": 0.0, "distillation_loss": 0.6893622875213623, "epoch": 1.66, "learning_rate": 5.2676362029692664e-05, "loss": 0.817, "step": 4590, "task_loss": 0.501244068145752 }, { "compression_loss": 0.0, "distillation_loss": 0.6975424289703369, "epoch": 1.66, "learning_rate": 5.260772061803401e-05, "loss": 0.6955, "step": 4600, "task_loss": 0.8912298679351807 }, { "compression_loss": 0.0, "distillation_loss": 0.6169401407241821, "epoch": 1.67, "learning_rate": 5.2538988415046944e-05, "loss": 0.7314, "step": 4610, "task_loss": 0.5644417405128479 }, { "compression_loss": 0.0, "distillation_loss": 0.5297744274139404, "epoch": 1.67, "learning_rate": 5.247016577513784e-05, "loss": 0.7156, "step": 4620, "task_loss": 0.539046585559845 }, { "compression_loss": 0.0, "distillation_loss": 0.6868759393692017, "epoch": 1.67, "learning_rate": 5.2401253053179365e-05, "loss": 0.7758, "step": 4630, "task_loss": 0.574796736240387 }, { "compression_loss": 0.0, "distillation_loss": 0.7551968097686768, "epoch": 1.68, "learning_rate": 5.2332250604508675e-05, "loss": 0.7911, "step": 4640, "task_loss": 0.7167572379112244 }, { "compression_loss": 0.0, "distillation_loss": 0.6903712749481201, "epoch": 1.68, "learning_rate": 5.226315878492561e-05, "loss": 0.7262, "step": 4650, "task_loss": 0.8787746429443359 }, { "compression_loss": 0.0, "distillation_loss": 0.8185775876045227, "epoch": 1.68, "learning_rate": 5.219397795069082e-05, "loss": 0.8305, "step": 4660, "task_loss": 0.8529611825942993 }, { "compression_loss": 0.0, "distillation_loss": 0.6731795072555542, "epoch": 1.69, "learning_rate": 5.212470845852394e-05, "loss": 0.7395, "step": 4670, "task_loss": 0.8858544230461121 }, { "compression_loss": 0.0, "distillation_loss": 0.9355323910713196, "epoch": 1.69, "learning_rate": 5.205535066560179e-05, "loss": 0.767, "step": 4680, "task_loss": 0.7263451218605042 }, { "compression_loss": 0.0, "distillation_loss": 0.7926836609840393, "epoch": 1.69, "learning_rate": 5.198590492955645e-05, "loss": 0.6869, "step": 4690, "task_loss": 0.5331249833106995 }, { "compression_loss": 0.0, "distillation_loss": 0.9048827886581421, "epoch": 1.7, "learning_rate": 5.1916371608473494e-05, "loss": 0.7267, "step": 4700, "task_loss": 1.2360985279083252 }, { "compression_loss": 0.0, "distillation_loss": 0.5285090208053589, "epoch": 1.7, "learning_rate": 5.1846751060890114e-05, "loss": 0.8697, "step": 4710, "task_loss": 0.521661639213562 }, { "compression_loss": 0.0, "distillation_loss": 1.0250056982040405, "epoch": 1.71, "learning_rate": 5.1777043645793254e-05, "loss": 0.6634, "step": 4720, "task_loss": 0.7965474724769592 }, { "compression_loss": 0.0, "distillation_loss": 0.6251044273376465, "epoch": 1.71, "learning_rate": 5.17072497226178e-05, "loss": 0.7716, "step": 4730, "task_loss": 0.3452892303466797 }, { "compression_loss": 0.0, "distillation_loss": 0.8053218126296997, "epoch": 1.71, "learning_rate": 5.1637369651244666e-05, "loss": 0.6825, "step": 4740, "task_loss": 0.6003007292747498 }, { "compression_loss": 0.0, "distillation_loss": 0.5973554253578186, "epoch": 1.72, "learning_rate": 5.156740379199902e-05, "loss": 0.7522, "step": 4750, "task_loss": 0.8139755725860596 }, { "epoch": 1.72, "eval_exact_match": 81.36234626300852, "eval_f1": 88.9705388336572, "step": 4750 }, { "compression_loss": 0.0, "distillation_loss": 0.7501943111419678, "epoch": 1.72, "learning_rate": 5.149735250564836e-05, "loss": 0.7183, "step": 4760, "task_loss": 0.6855126619338989 }, { "compression_loss": 0.0, "distillation_loss": 0.5616030693054199, "epoch": 1.72, "learning_rate": 5.142721615340066e-05, "loss": 0.6749, "step": 4770, "task_loss": 0.7784498929977417 }, { "compression_loss": 0.0, "distillation_loss": 1.0238778591156006, "epoch": 1.73, "learning_rate": 5.135699509690255e-05, "loss": 0.7818, "step": 4780, "task_loss": 0.8857766389846802 }, { "compression_loss": 0.0, "distillation_loss": 0.5700663328170776, "epoch": 1.73, "learning_rate": 5.128668969823739e-05, "loss": 0.6645, "step": 4790, "task_loss": 0.6657969951629639 }, { "compression_loss": 0.0, "distillation_loss": 0.5193660259246826, "epoch": 1.73, "learning_rate": 5.121630031992348e-05, "loss": 0.7029, "step": 4800, "task_loss": 0.7439495325088501 }, { "compression_loss": 0.0, "distillation_loss": 0.5604043006896973, "epoch": 1.74, "learning_rate": 5.114582732491213e-05, "loss": 0.7065, "step": 4810, "task_loss": 0.7198078036308289 }, { "compression_loss": 0.0, "distillation_loss": 0.7965430021286011, "epoch": 1.74, "learning_rate": 5.107527107658576e-05, "loss": 0.7062, "step": 4820, "task_loss": 0.5083843469619751 }, { "compression_loss": 0.0, "distillation_loss": 0.6073727607727051, "epoch": 1.75, "learning_rate": 5.1004631938756154e-05, "loss": 0.7453, "step": 4830, "task_loss": 0.4514174461364746 }, { "compression_loss": 0.0, "distillation_loss": 0.8077729940414429, "epoch": 1.75, "learning_rate": 5.093391027566244e-05, "loss": 0.6939, "step": 4840, "task_loss": 0.6148980855941772 }, { "compression_loss": 0.0, "distillation_loss": 0.566129207611084, "epoch": 1.75, "learning_rate": 5.086310645196928e-05, "loss": 0.6517, "step": 4850, "task_loss": 0.5865195393562317 }, { "compression_loss": 0.0, "distillation_loss": 0.7375544309616089, "epoch": 1.76, "learning_rate": 5.079222083276504e-05, "loss": 0.7073, "step": 4860, "task_loss": 0.4971655607223511 }, { "compression_loss": 0.0, "distillation_loss": 0.4711465835571289, "epoch": 1.76, "learning_rate": 5.072125378355978e-05, "loss": 0.7163, "step": 4870, "task_loss": 0.36320602893829346 }, { "compression_loss": 0.0, "distillation_loss": 0.90312659740448, "epoch": 1.76, "learning_rate": 5.0650205670283475e-05, "loss": 0.6884, "step": 4880, "task_loss": 0.7906779050827026 }, { "compression_loss": 0.0, "distillation_loss": 0.7571483850479126, "epoch": 1.77, "learning_rate": 5.057907685928408e-05, "loss": 0.6455, "step": 4890, "task_loss": 0.610438346862793 }, { "compression_loss": 0.0, "distillation_loss": 0.6640625, "epoch": 1.77, "learning_rate": 5.050786771732567e-05, "loss": 0.6774, "step": 4900, "task_loss": 0.9800470471382141 }, { "compression_loss": 0.0, "distillation_loss": 0.9504784345626831, "epoch": 1.77, "learning_rate": 5.043657861158653e-05, "loss": 0.7658, "step": 4910, "task_loss": 1.0992169380187988 }, { "compression_loss": 0.0, "distillation_loss": 0.6472917795181274, "epoch": 1.78, "learning_rate": 5.036520990965726e-05, "loss": 0.7905, "step": 4920, "task_loss": 0.5526524782180786 }, { "compression_loss": 0.0, "distillation_loss": 0.9225701093673706, "epoch": 1.78, "learning_rate": 5.029376197953888e-05, "loss": 0.7936, "step": 4930, "task_loss": 1.0977343320846558 }, { "compression_loss": 0.0, "distillation_loss": 0.8606763482093811, "epoch": 1.79, "learning_rate": 5.022223518964095e-05, "loss": 0.7437, "step": 4940, "task_loss": 0.95201575756073 }, { "compression_loss": 0.0, "distillation_loss": 0.7826287150382996, "epoch": 1.79, "learning_rate": 5.015062990877964e-05, "loss": 0.7283, "step": 4950, "task_loss": 0.7915252447128296 }, { "compression_loss": 0.0, "distillation_loss": 0.8127055168151855, "epoch": 1.79, "learning_rate": 5.007894650617588e-05, "loss": 0.7323, "step": 4960, "task_loss": 1.1264145374298096 }, { "compression_loss": 0.0, "distillation_loss": 0.9009028673171997, "epoch": 1.8, "learning_rate": 5.0007185351453374e-05, "loss": 0.7694, "step": 4970, "task_loss": 0.7590454816818237 }, { "compression_loss": 0.0, "distillation_loss": 1.088401198387146, "epoch": 1.8, "learning_rate": 4.9935346814636785e-05, "loss": 0.7431, "step": 4980, "task_loss": 1.1049809455871582 }, { "compression_loss": 0.0, "distillation_loss": 0.7166246175765991, "epoch": 1.8, "learning_rate": 4.9863431266149745e-05, "loss": 0.6911, "step": 4990, "task_loss": 0.7473670840263367 }, { "compression_loss": 0.0, "distillation_loss": 0.5155638456344604, "epoch": 1.81, "learning_rate": 4.979143907681301e-05, "loss": 0.7239, "step": 5000, "task_loss": 0.6666215062141418 }, { "epoch": 1.81, "eval_exact_match": 81.53263954588458, "eval_f1": 89.04411412196261, "step": 5000 }, { "compression_loss": 0.0, "distillation_loss": 0.46164464950561523, "epoch": 1.81, "learning_rate": 4.9719370617842544e-05, "loss": 0.6567, "step": 5010, "task_loss": 0.2584804892539978 }, { "compression_loss": 0.0, "distillation_loss": 1.095167636871338, "epoch": 1.81, "learning_rate": 4.964722626084752e-05, "loss": 0.7566, "step": 5020, "task_loss": 1.149203896522522 }, { "compression_loss": 0.0, "distillation_loss": 0.6007382869720459, "epoch": 1.82, "learning_rate": 4.9575006377828535e-05, "loss": 0.7119, "step": 5030, "task_loss": 0.8033612966537476 }, { "compression_loss": 0.0, "distillation_loss": 0.7142719030380249, "epoch": 1.82, "learning_rate": 4.9502711341175553e-05, "loss": 0.7405, "step": 5040, "task_loss": 1.000267744064331 }, { "compression_loss": 0.0, "distillation_loss": 0.7234902381896973, "epoch": 1.83, "learning_rate": 4.9430341523666124e-05, "loss": 0.7188, "step": 5050, "task_loss": 0.7795543670654297 }, { "compression_loss": 0.0, "distillation_loss": 0.7521551847457886, "epoch": 1.83, "learning_rate": 4.935789729846335e-05, "loss": 0.6675, "step": 5060, "task_loss": 0.6602778434753418 }, { "compression_loss": 0.0, "distillation_loss": 0.48747414350509644, "epoch": 1.83, "learning_rate": 4.928537903911402e-05, "loss": 0.8129, "step": 5070, "task_loss": 0.5280594825744629 }, { "compression_loss": 0.0, "distillation_loss": 0.6115528345108032, "epoch": 1.84, "learning_rate": 4.921278711954666e-05, "loss": 0.7775, "step": 5080, "task_loss": 0.9519979953765869 }, { "compression_loss": 0.0, "distillation_loss": 0.4798819422721863, "epoch": 1.84, "learning_rate": 4.9140121914069626e-05, "loss": 0.6354, "step": 5090, "task_loss": 0.4596359133720398 }, { "compression_loss": 0.0, "distillation_loss": 0.5512056350708008, "epoch": 1.84, "learning_rate": 4.9067383797369146e-05, "loss": 0.706, "step": 5100, "task_loss": 0.275431364774704 }, { "compression_loss": 0.0, "distillation_loss": 0.6959718465805054, "epoch": 1.85, "learning_rate": 4.899457314450742e-05, "loss": 0.6351, "step": 5110, "task_loss": 0.7794166803359985 }, { "compression_loss": 0.0, "distillation_loss": 0.6612961888313293, "epoch": 1.85, "learning_rate": 4.8921690330920637e-05, "loss": 0.7879, "step": 5120, "task_loss": 0.6663255095481873 }, { "compression_loss": 0.0, "distillation_loss": 0.7638074159622192, "epoch": 1.85, "learning_rate": 4.884873573241711e-05, "loss": 0.7483, "step": 5130, "task_loss": 0.5485991835594177 }, { "compression_loss": 0.0, "distillation_loss": 0.5604839324951172, "epoch": 1.86, "learning_rate": 4.8775709725175277e-05, "loss": 0.6781, "step": 5140, "task_loss": 0.41115280985832214 }, { "compression_loss": 0.0, "distillation_loss": 0.4681781530380249, "epoch": 1.86, "learning_rate": 4.870261268574178e-05, "loss": 0.7632, "step": 5150, "task_loss": 0.3362809121608734 }, { "compression_loss": 0.0, "distillation_loss": 0.6780769228935242, "epoch": 1.86, "learning_rate": 4.8629444991029544e-05, "loss": 0.6914, "step": 5160, "task_loss": 0.7485142350196838 }, { "compression_loss": 0.0, "distillation_loss": 0.5212565660476685, "epoch": 1.87, "learning_rate": 4.8556207018315793e-05, "loss": 0.7206, "step": 5170, "task_loss": 0.4486411511898041 }, { "compression_loss": 0.0, "distillation_loss": 0.5345607995986938, "epoch": 1.87, "learning_rate": 4.848289914524017e-05, "loss": 0.7443, "step": 5180, "task_loss": 0.8513927459716797 }, { "compression_loss": 0.0, "distillation_loss": 0.781924843788147, "epoch": 1.88, "learning_rate": 4.840952174980269e-05, "loss": 0.8009, "step": 5190, "task_loss": 0.5516729950904846 }, { "compression_loss": 0.0, "distillation_loss": 0.7696242332458496, "epoch": 1.88, "learning_rate": 4.8336075210361884e-05, "loss": 0.7267, "step": 5200, "task_loss": 1.0042166709899902 }, { "compression_loss": 0.0, "distillation_loss": 1.1635730266571045, "epoch": 1.88, "learning_rate": 4.826255990563282e-05, "loss": 0.7788, "step": 5210, "task_loss": 1.0315825939178467 }, { "compression_loss": 0.0, "distillation_loss": 0.9389041662216187, "epoch": 1.89, "learning_rate": 4.8188976214685125e-05, "loss": 0.7449, "step": 5220, "task_loss": 0.7496635913848877 }, { "compression_loss": 0.0, "distillation_loss": 0.4800257086753845, "epoch": 1.89, "learning_rate": 4.811532451694104e-05, "loss": 0.7274, "step": 5230, "task_loss": 0.6168285608291626 }, { "compression_loss": 0.0, "distillation_loss": 0.5006886720657349, "epoch": 1.89, "learning_rate": 4.8041605192173505e-05, "loss": 0.6992, "step": 5240, "task_loss": 0.5821633338928223 }, { "compression_loss": 0.0, "distillation_loss": 0.7733217477798462, "epoch": 1.9, "learning_rate": 4.796781862050413e-05, "loss": 0.7236, "step": 5250, "task_loss": 0.7929372787475586 }, { "epoch": 1.9, "eval_exact_match": 81.43803216650899, "eval_f1": 88.85914255552852, "step": 5250 }, { "compression_loss": 0.0, "distillation_loss": 0.6627964973449707, "epoch": 1.9, "learning_rate": 4.789396518240132e-05, "loss": 0.7888, "step": 5260, "task_loss": 0.8579301834106445 }, { "compression_loss": 0.0, "distillation_loss": 0.6447975635528564, "epoch": 1.9, "learning_rate": 4.782004525867819e-05, "loss": 0.6355, "step": 5270, "task_loss": 0.42783862352371216 }, { "compression_loss": 0.0, "distillation_loss": 0.5919076204299927, "epoch": 1.91, "learning_rate": 4.774605923049076e-05, "loss": 0.7468, "step": 5280, "task_loss": 0.7170625329017639 }, { "compression_loss": 0.0, "distillation_loss": 0.6702316999435425, "epoch": 1.91, "learning_rate": 4.767200747933586e-05, "loss": 0.6474, "step": 5290, "task_loss": 0.8280285596847534 }, { "compression_loss": 0.0, "distillation_loss": 1.1130871772766113, "epoch": 1.92, "learning_rate": 4.759789038704922e-05, "loss": 0.7438, "step": 5300, "task_loss": 1.4849894046783447 }, { "compression_loss": 0.0, "distillation_loss": 0.5206938982009888, "epoch": 1.92, "learning_rate": 4.7523708335803504e-05, "loss": 0.7162, "step": 5310, "task_loss": 0.5600756406784058 }, { "compression_loss": 0.0, "distillation_loss": 0.5643234252929688, "epoch": 1.92, "learning_rate": 4.7449461708106276e-05, "loss": 0.6722, "step": 5320, "task_loss": 0.6859425902366638 }, { "compression_loss": 0.0, "distillation_loss": 0.7487398386001587, "epoch": 1.93, "learning_rate": 4.7375150886798146e-05, "loss": 0.7401, "step": 5330, "task_loss": 0.9123652577400208 }, { "compression_loss": 0.0, "distillation_loss": 0.5706630945205688, "epoch": 1.93, "learning_rate": 4.73007762550507e-05, "loss": 0.7382, "step": 5340, "task_loss": 0.4185778498649597 }, { "compression_loss": 0.0, "distillation_loss": 0.4675668776035309, "epoch": 1.93, "learning_rate": 4.722633819636453e-05, "loss": 0.6022, "step": 5350, "task_loss": 0.4251108169555664 }, { "compression_loss": 0.0, "distillation_loss": 0.5795633792877197, "epoch": 1.94, "learning_rate": 4.715183709456731e-05, "loss": 0.65, "step": 5360, "task_loss": 0.5892939567565918 }, { "compression_loss": 0.0, "distillation_loss": 0.6301298141479492, "epoch": 1.94, "learning_rate": 4.707727333381177e-05, "loss": 0.7081, "step": 5370, "task_loss": 0.43882763385772705 }, { "compression_loss": 0.0, "distillation_loss": 0.6167176961898804, "epoch": 1.94, "learning_rate": 4.7002647298573725e-05, "loss": 0.7195, "step": 5380, "task_loss": 0.5531620383262634 }, { "compression_loss": 0.0, "distillation_loss": 0.6640338897705078, "epoch": 1.95, "learning_rate": 4.692795937365013e-05, "loss": 0.6945, "step": 5390, "task_loss": 0.5559107661247253 }, { "compression_loss": 0.0, "distillation_loss": 0.9159687161445618, "epoch": 1.95, "learning_rate": 4.685320994415701e-05, "loss": 0.6897, "step": 5400, "task_loss": 1.0885941982269287 }, { "compression_loss": 0.0, "distillation_loss": 1.5111383199691772, "epoch": 1.96, "learning_rate": 4.677839939552759e-05, "loss": 0.8984, "step": 5410, "task_loss": 1.1360901594161987 }, { "compression_loss": 0.0, "distillation_loss": 0.49151280522346497, "epoch": 1.96, "learning_rate": 4.670352811351019e-05, "loss": 0.7072, "step": 5420, "task_loss": 0.2888219654560089 }, { "compression_loss": 0.0, "distillation_loss": 0.6618350744247437, "epoch": 1.96, "learning_rate": 4.662859648416633e-05, "loss": 0.727, "step": 5430, "task_loss": 0.5811130404472351 }, { "compression_loss": 0.0, "distillation_loss": 0.7194912433624268, "epoch": 1.97, "learning_rate": 4.655360489386869e-05, "loss": 0.6801, "step": 5440, "task_loss": 0.6658434867858887 }, { "compression_loss": 0.0, "distillation_loss": 0.9585258960723877, "epoch": 1.97, "learning_rate": 4.647855372929912e-05, "loss": 0.8352, "step": 5450, "task_loss": 0.8630807399749756 }, { "compression_loss": 0.0, "distillation_loss": 0.6633427739143372, "epoch": 1.97, "learning_rate": 4.640344337744667e-05, "loss": 0.7305, "step": 5460, "task_loss": 0.8915340900421143 }, { "compression_loss": 0.0, "distillation_loss": 0.8834680318832397, "epoch": 1.98, "learning_rate": 4.6328274225605556e-05, "loss": 0.7421, "step": 5470, "task_loss": 0.8303129076957703 }, { "compression_loss": 0.0, "distillation_loss": 0.506862223148346, "epoch": 1.98, "learning_rate": 4.625304666137321e-05, "loss": 0.6902, "step": 5480, "task_loss": 0.46703067421913147 }, { "compression_loss": 0.0, "distillation_loss": 0.5266671180725098, "epoch": 1.98, "learning_rate": 4.617776107264826e-05, "loss": 0.6921, "step": 5490, "task_loss": 0.5322936177253723 }, { "compression_loss": 0.0, "distillation_loss": 0.4714794158935547, "epoch": 1.99, "learning_rate": 4.6102417847628494e-05, "loss": 0.7554, "step": 5500, "task_loss": 0.4454458951950073 }, { "epoch": 1.99, "eval_exact_match": 81.52317880794702, "eval_f1": 88.9781389750277, "step": 5500 }, { "compression_loss": 0.0, "distillation_loss": 0.6718301177024841, "epoch": 1.99, "learning_rate": 4.602701737480895e-05, "loss": 0.7268, "step": 5510, "task_loss": 0.6084522008895874 }, { "compression_loss": 0.0, "distillation_loss": 0.5852054953575134, "epoch": 1.99, "learning_rate": 4.595156004297978e-05, "loss": 0.7398, "step": 5520, "task_loss": 0.9601466655731201 }, { "compression_loss": 0.0, "distillation_loss": 0.7977070808410645, "epoch": 2.0, "learning_rate": 4.5876046241224384e-05, "loss": 0.7316, "step": 5530, "task_loss": 1.0793871879577637 }, { "compression_loss": 0.0, "distillation_loss": 0.5240362286567688, "epoch": 2.0, "learning_rate": 4.580047635891733e-05, "loss": 0.5617, "step": 5540, "task_loss": 0.4422588050365448 }, { "compression_loss": 0.0, "distillation_loss": 0.47274863719940186, "epoch": 2.01, "learning_rate": 4.572485078572231e-05, "loss": 0.5784, "step": 5550, "task_loss": 0.5353361964225769 }, { "compression_loss": 0.0, "distillation_loss": 0.4247085452079773, "epoch": 2.01, "learning_rate": 4.564916991159023e-05, "loss": 0.6764, "step": 5560, "task_loss": 0.5559691190719604 }, { "compression_loss": 0.0, "distillation_loss": 0.5100902318954468, "epoch": 2.01, "learning_rate": 4.5573434126757125e-05, "loss": 0.6411, "step": 5570, "task_loss": 0.6340029835700989 }, { "compression_loss": 0.0, "distillation_loss": 0.6564936637878418, "epoch": 2.02, "learning_rate": 4.5497643821742164e-05, "loss": 0.5141, "step": 5580, "task_loss": 0.8005180358886719 }, { "compression_loss": 0.0, "distillation_loss": 0.8931950330734253, "epoch": 2.02, "learning_rate": 4.5421799387345656e-05, "loss": 0.6289, "step": 5590, "task_loss": 0.5530489683151245 }, { "compression_loss": 0.0, "distillation_loss": 0.4969043731689453, "epoch": 2.02, "learning_rate": 4.534590121464699e-05, "loss": 0.6045, "step": 5600, "task_loss": 0.6437075138092041 }, { "compression_loss": 0.0, "distillation_loss": 0.48622632026672363, "epoch": 2.03, "learning_rate": 4.526994969500268e-05, "loss": 0.5805, "step": 5610, "task_loss": 0.7267904281616211 }, { "compression_loss": 0.0, "distillation_loss": 0.5548010468482971, "epoch": 2.03, "learning_rate": 4.5193945220044304e-05, "loss": 0.5796, "step": 5620, "task_loss": 0.8219677209854126 }, { "compression_loss": 0.0, "distillation_loss": 0.7763128280639648, "epoch": 2.03, "learning_rate": 4.5117888181676476e-05, "loss": 0.5508, "step": 5630, "task_loss": 0.6904866099357605 }, { "compression_loss": 0.0, "distillation_loss": 0.5509345531463623, "epoch": 2.04, "learning_rate": 4.5041778972074884e-05, "loss": 0.5718, "step": 5640, "task_loss": 0.36285942792892456 }, { "compression_loss": 0.0, "distillation_loss": 0.5179515480995178, "epoch": 2.04, "learning_rate": 4.4965617983684185e-05, "loss": 0.5675, "step": 5650, "task_loss": 0.6883585453033447 }, { "compression_loss": 0.0, "distillation_loss": 0.5797591209411621, "epoch": 2.05, "learning_rate": 4.4889405609216065e-05, "loss": 0.5678, "step": 5660, "task_loss": 0.6791222095489502 }, { "compression_loss": 0.0, "distillation_loss": 0.865078330039978, "epoch": 2.05, "learning_rate": 4.481314224164713e-05, "loss": 0.6235, "step": 5670, "task_loss": 0.8147318363189697 }, { "compression_loss": 0.0, "distillation_loss": 0.5414189100265503, "epoch": 2.05, "learning_rate": 4.473682827421697e-05, "loss": 0.525, "step": 5680, "task_loss": 0.7834112644195557 }, { "compression_loss": 0.0, "distillation_loss": 0.529310941696167, "epoch": 2.06, "learning_rate": 4.4660464100426054e-05, "loss": 0.6026, "step": 5690, "task_loss": 0.5471886396408081 }, { "compression_loss": 0.0, "distillation_loss": 0.43340983986854553, "epoch": 2.06, "learning_rate": 4.458405011403374e-05, "loss": 0.5505, "step": 5700, "task_loss": 0.3781987428665161 }, { "compression_loss": 0.0, "distillation_loss": 0.6081724166870117, "epoch": 2.06, "learning_rate": 4.450758670905624e-05, "loss": 0.5351, "step": 5710, "task_loss": 0.5465576648712158 }, { "compression_loss": 0.0, "distillation_loss": 0.4424320161342621, "epoch": 2.07, "learning_rate": 4.4431074279764585e-05, "loss": 0.5218, "step": 5720, "task_loss": 0.5846661329269409 }, { "compression_loss": 0.0, "distillation_loss": 0.5387907028198242, "epoch": 2.07, "learning_rate": 4.4354513220682575e-05, "loss": 0.5726, "step": 5730, "task_loss": 0.5176105499267578 }, { "compression_loss": 0.0, "distillation_loss": 0.696904182434082, "epoch": 2.07, "learning_rate": 4.427790392658479e-05, "loss": 0.6044, "step": 5740, "task_loss": 0.7294101715087891 }, { "compression_loss": 0.0, "distillation_loss": 0.6790659427642822, "epoch": 2.08, "learning_rate": 4.4201246792494514e-05, "loss": 0.5654, "step": 5750, "task_loss": 1.0561041831970215 }, { "epoch": 2.08, "eval_exact_match": 81.59886471144749, "eval_f1": 89.04378469586636, "step": 5750 }, { "compression_loss": 0.0, "distillation_loss": 0.4576905071735382, "epoch": 2.08, "learning_rate": 4.41245422136817e-05, "loss": 0.6122, "step": 5760, "task_loss": 0.7306819558143616 }, { "compression_loss": 0.0, "distillation_loss": 0.3834877014160156, "epoch": 2.09, "learning_rate": 4.404779058566094e-05, "loss": 0.5291, "step": 5770, "task_loss": 0.35700392723083496 }, { "compression_loss": 0.0, "distillation_loss": 0.6859825849533081, "epoch": 2.09, "learning_rate": 4.397099230418947e-05, "loss": 0.561, "step": 5780, "task_loss": 1.0357601642608643 }, { "compression_loss": 0.0, "distillation_loss": 0.5586205720901489, "epoch": 2.09, "learning_rate": 4.3894147765265014e-05, "loss": 0.5312, "step": 5790, "task_loss": 0.5139092206954956 }, { "compression_loss": 0.0, "distillation_loss": 0.7590255737304688, "epoch": 2.1, "learning_rate": 4.381725736512387e-05, "loss": 0.6398, "step": 5800, "task_loss": 0.6853212118148804 }, { "compression_loss": 0.0, "distillation_loss": 0.5403786897659302, "epoch": 2.1, "learning_rate": 4.374032150023881e-05, "loss": 0.5527, "step": 5810, "task_loss": 0.8133782744407654 }, { "compression_loss": 0.0, "distillation_loss": 0.7060180306434631, "epoch": 2.1, "learning_rate": 4.366334056731701e-05, "loss": 0.5431, "step": 5820, "task_loss": 0.6709779500961304 }, { "compression_loss": 0.0, "distillation_loss": 0.5633669495582581, "epoch": 2.11, "learning_rate": 4.358631496329804e-05, "loss": 0.549, "step": 5830, "task_loss": 0.44238489866256714 }, { "compression_loss": 0.0, "distillation_loss": 0.45574522018432617, "epoch": 2.11, "learning_rate": 4.350924508535184e-05, "loss": 0.5134, "step": 5840, "task_loss": 0.5507422089576721 }, { "compression_loss": 0.0, "distillation_loss": 0.5343673229217529, "epoch": 2.11, "learning_rate": 4.3432131330876594e-05, "loss": 0.5119, "step": 5850, "task_loss": 0.6400316953659058 }, { "compression_loss": 0.0, "distillation_loss": 0.4289189577102661, "epoch": 2.12, "learning_rate": 4.335497409749677e-05, "loss": 0.5306, "step": 5860, "task_loss": 0.35904762148857117 }, { "compression_loss": 0.0, "distillation_loss": 0.5090552568435669, "epoch": 2.12, "learning_rate": 4.327777378306099e-05, "loss": 0.5495, "step": 5870, "task_loss": 0.43965592980384827 }, { "compression_loss": 0.0, "distillation_loss": 0.3385082185268402, "epoch": 2.13, "learning_rate": 4.3200530785640036e-05, "loss": 0.5618, "step": 5880, "task_loss": 0.5122158527374268 }, { "compression_loss": 0.0, "distillation_loss": 0.5285344123840332, "epoch": 2.13, "learning_rate": 4.312324550352479e-05, "loss": 0.5729, "step": 5890, "task_loss": 0.6291429400444031 }, { "compression_loss": 0.0, "distillation_loss": 0.37837162613868713, "epoch": 2.13, "learning_rate": 4.3045918335224154e-05, "loss": 0.5985, "step": 5900, "task_loss": 0.3076714277267456 }, { "compression_loss": 0.0, "distillation_loss": 0.4923848509788513, "epoch": 2.14, "learning_rate": 4.296854967946301e-05, "loss": 0.5914, "step": 5910, "task_loss": 0.39471399784088135 }, { "compression_loss": 0.0, "distillation_loss": 0.9474149942398071, "epoch": 2.14, "learning_rate": 4.289113993518015e-05, "loss": 0.5734, "step": 5920, "task_loss": 1.258808970451355 }, { "compression_loss": 0.0, "distillation_loss": 0.4284988045692444, "epoch": 2.14, "learning_rate": 4.2813689501526246e-05, "loss": 0.5348, "step": 5930, "task_loss": 0.6401066184043884 }, { "compression_loss": 0.0, "distillation_loss": 0.6566985845565796, "epoch": 2.15, "learning_rate": 4.2736198777861805e-05, "loss": 0.6076, "step": 5940, "task_loss": 0.7970720529556274 }, { "compression_loss": 0.0, "distillation_loss": 0.5755891799926758, "epoch": 2.15, "learning_rate": 4.265866816375503e-05, "loss": 0.5311, "step": 5950, "task_loss": 0.42543694376945496 }, { "compression_loss": 0.0, "distillation_loss": 0.41077885031700134, "epoch": 2.15, "learning_rate": 4.258109805897985e-05, "loss": 0.5489, "step": 5960, "task_loss": 0.32989516854286194 }, { "compression_loss": 0.0, "distillation_loss": 1.1504120826721191, "epoch": 2.16, "learning_rate": 4.2503488863513794e-05, "loss": 0.6178, "step": 5970, "task_loss": 1.1779499053955078 }, { "compression_loss": 0.0, "distillation_loss": 0.4549116790294647, "epoch": 2.16, "learning_rate": 4.242584097753599e-05, "loss": 0.5549, "step": 5980, "task_loss": 0.8180230855941772 }, { "compression_loss": 0.0, "distillation_loss": 0.5225155353546143, "epoch": 2.16, "learning_rate": 4.2348154801425044e-05, "loss": 0.5836, "step": 5990, "task_loss": 1.0198224782943726 }, { "compression_loss": 0.0, "distillation_loss": 0.8069769144058228, "epoch": 2.17, "learning_rate": 4.227043073575699e-05, "loss": 0.5687, "step": 6000, "task_loss": 0.9743910431861877 }, { "epoch": 2.17, "eval_exact_match": 81.69347209082308, "eval_f1": 89.0618531655906, "step": 6000 }, { "compression_loss": 0.0, "distillation_loss": 0.795231282711029, "epoch": 2.17, "learning_rate": 4.219266918130327e-05, "loss": 0.5749, "step": 6010, "task_loss": 0.9268602132797241 }, { "compression_loss": 0.0, "distillation_loss": 0.6040865182876587, "epoch": 2.18, "learning_rate": 4.211487053902858e-05, "loss": 0.5849, "step": 6020, "task_loss": 0.6653480529785156 }, { "compression_loss": 0.0, "distillation_loss": 0.5331346988677979, "epoch": 2.18, "learning_rate": 4.203703521008891e-05, "loss": 0.6451, "step": 6030, "task_loss": 0.5994354486465454 }, { "compression_loss": 0.0, "distillation_loss": 0.4456799328327179, "epoch": 2.18, "learning_rate": 4.195916359582938e-05, "loss": 0.5041, "step": 6040, "task_loss": 0.989681601524353 }, { "compression_loss": 0.0, "distillation_loss": 0.4132393002510071, "epoch": 2.19, "learning_rate": 4.188125609778219e-05, "loss": 0.5796, "step": 6050, "task_loss": 1.066744089126587 }, { "compression_loss": 0.0, "distillation_loss": 0.91560298204422, "epoch": 2.19, "learning_rate": 4.180331311766464e-05, "loss": 0.6226, "step": 6060, "task_loss": 0.8165582418441772 }, { "compression_loss": 0.0, "distillation_loss": 0.430245965719223, "epoch": 2.19, "learning_rate": 4.1725335057376915e-05, "loss": 0.5809, "step": 6070, "task_loss": 0.5763449668884277 }, { "compression_loss": 0.0, "distillation_loss": 0.5203709006309509, "epoch": 2.2, "learning_rate": 4.164732231900013e-05, "loss": 0.5706, "step": 6080, "task_loss": 0.4892015755176544 }, { "compression_loss": 0.0, "distillation_loss": 0.434043824672699, "epoch": 2.2, "learning_rate": 4.15692753047942e-05, "loss": 0.6104, "step": 6090, "task_loss": 0.4391113817691803 }, { "compression_loss": 0.0, "distillation_loss": 0.492077112197876, "epoch": 2.2, "learning_rate": 4.149119441719576e-05, "loss": 0.6113, "step": 6100, "task_loss": 0.33191928267478943 }, { "compression_loss": 0.0, "distillation_loss": 0.3813079595565796, "epoch": 2.21, "learning_rate": 4.141308005881614e-05, "loss": 0.5083, "step": 6110, "task_loss": 0.3728364109992981 }, { "compression_loss": 0.0, "distillation_loss": 0.44847387075424194, "epoch": 2.21, "learning_rate": 4.133493263243922e-05, "loss": 0.5311, "step": 6120, "task_loss": 0.34665340185165405 }, { "compression_loss": 0.0, "distillation_loss": 0.5354032516479492, "epoch": 2.22, "learning_rate": 4.1256752541019415e-05, "loss": 0.5935, "step": 6130, "task_loss": 0.7769221067428589 }, { "compression_loss": 0.0, "distillation_loss": 0.5569686889648438, "epoch": 2.22, "learning_rate": 4.1178540187679585e-05, "loss": 0.6126, "step": 6140, "task_loss": 0.6996185779571533 }, { "compression_loss": 0.0, "distillation_loss": 0.5548681020736694, "epoch": 2.22, "learning_rate": 4.1100295975708904e-05, "loss": 0.5007, "step": 6150, "task_loss": 0.4657728672027588 }, { "compression_loss": 0.0, "distillation_loss": 0.650769829750061, "epoch": 2.23, "learning_rate": 4.102202030856085e-05, "loss": 0.6611, "step": 6160, "task_loss": 0.7665435075759888 }, { "compression_loss": 0.0, "distillation_loss": 0.4635891020298004, "epoch": 2.23, "learning_rate": 4.0943713589851066e-05, "loss": 0.5795, "step": 6170, "task_loss": 0.41729113459587097 }, { "compression_loss": 0.0, "distillation_loss": 0.4042726457118988, "epoch": 2.23, "learning_rate": 4.086537622335534e-05, "loss": 0.5163, "step": 6180, "task_loss": 0.31872618198394775 }, { "compression_loss": 0.0, "distillation_loss": 0.45881569385528564, "epoch": 2.24, "learning_rate": 4.0787008613007484e-05, "loss": 0.5228, "step": 6190, "task_loss": 0.5137763023376465 }, { "compression_loss": 0.0, "distillation_loss": 0.45852431654930115, "epoch": 2.24, "learning_rate": 4.070861116289723e-05, "loss": 0.5515, "step": 6200, "task_loss": 0.5384382009506226 }, { "compression_loss": 0.0, "distillation_loss": 0.8377902507781982, "epoch": 2.24, "learning_rate": 4.063018427726821e-05, "loss": 0.5644, "step": 6210, "task_loss": 0.8768316507339478 }, { "compression_loss": 0.0, "distillation_loss": 0.5842976570129395, "epoch": 2.25, "learning_rate": 4.05517283605158e-05, "loss": 0.5803, "step": 6220, "task_loss": 0.6779041886329651 }, { "compression_loss": 0.0, "distillation_loss": 0.6453624963760376, "epoch": 2.25, "learning_rate": 4.047324381718511e-05, "loss": 0.6245, "step": 6230, "task_loss": 0.44167518615722656 }, { "compression_loss": 0.0, "distillation_loss": 0.6187850832939148, "epoch": 2.26, "learning_rate": 4.039473105196883e-05, "loss": 0.5975, "step": 6240, "task_loss": 0.5326038599014282 }, { "compression_loss": 0.0, "distillation_loss": 0.37316951155662537, "epoch": 2.26, "learning_rate": 4.031619046970517e-05, "loss": 0.5677, "step": 6250, "task_loss": 0.4080279767513275 }, { "epoch": 2.26, "eval_exact_match": 81.9205298013245, "eval_f1": 89.22495923106058, "step": 6250 }, { "compression_loss": 0.0, "distillation_loss": 0.6016359329223633, "epoch": 2.26, "learning_rate": 4.02376224753758e-05, "loss": 0.599, "step": 6260, "task_loss": 0.4683157503604889 }, { "compression_loss": 0.0, "distillation_loss": 0.8311920762062073, "epoch": 2.27, "learning_rate": 4.01590274741037e-05, "loss": 0.6095, "step": 6270, "task_loss": 1.272101640701294 }, { "compression_loss": 0.0, "distillation_loss": 0.6363869309425354, "epoch": 2.27, "learning_rate": 4.008040587115112e-05, "loss": 0.5894, "step": 6280, "task_loss": 1.0684162378311157 }, { "compression_loss": 0.0, "distillation_loss": 0.3956725001335144, "epoch": 2.27, "learning_rate": 4.000175807191752e-05, "loss": 0.5567, "step": 6290, "task_loss": 0.3566119074821472 }, { "compression_loss": 0.0, "distillation_loss": 0.7650632858276367, "epoch": 2.28, "learning_rate": 3.992308448193736e-05, "loss": 0.674, "step": 6300, "task_loss": 0.5824695825576782 }, { "compression_loss": 0.0, "distillation_loss": 0.7076523303985596, "epoch": 2.28, "learning_rate": 3.984438550687815e-05, "loss": 0.5626, "step": 6310, "task_loss": 1.0556983947753906 }, { "compression_loss": 0.0, "distillation_loss": 0.47868841886520386, "epoch": 2.28, "learning_rate": 3.976566155253826e-05, "loss": 0.5895, "step": 6320, "task_loss": 0.6626901626586914 }, { "compression_loss": 0.0, "distillation_loss": 0.5259031057357788, "epoch": 2.29, "learning_rate": 3.9686913024844855e-05, "loss": 0.5349, "step": 6330, "task_loss": 0.3104013502597809 }, { "compression_loss": 0.0, "distillation_loss": 0.4891437590122223, "epoch": 2.29, "learning_rate": 3.960814032985186e-05, "loss": 0.5576, "step": 6340, "task_loss": 0.6433727741241455 }, { "compression_loss": 0.0, "distillation_loss": 0.5479400753974915, "epoch": 2.29, "learning_rate": 3.952934387373775e-05, "loss": 0.5763, "step": 6350, "task_loss": 0.6837672591209412 }, { "compression_loss": 0.0, "distillation_loss": 0.6273237466812134, "epoch": 2.3, "learning_rate": 3.945052406280356e-05, "loss": 0.5408, "step": 6360, "task_loss": 1.0349431037902832 }, { "compression_loss": 0.0, "distillation_loss": 0.48982030153274536, "epoch": 2.3, "learning_rate": 3.937168130347074e-05, "loss": 0.5978, "step": 6370, "task_loss": 0.8352075815200806 }, { "compression_loss": 0.0, "distillation_loss": 0.6010058522224426, "epoch": 2.31, "learning_rate": 3.9292816002279055e-05, "loss": 0.5013, "step": 6380, "task_loss": 0.645653486251831 }, { "compression_loss": 0.0, "distillation_loss": 0.6257983446121216, "epoch": 2.31, "learning_rate": 3.921392856588455e-05, "loss": 0.4692, "step": 6390, "task_loss": 0.7384665012359619 }, { "compression_loss": 0.0, "distillation_loss": 0.5193866491317749, "epoch": 2.31, "learning_rate": 3.9135019401057337e-05, "loss": 0.5861, "step": 6400, "task_loss": 0.575605571269989 }, { "compression_loss": 0.0, "distillation_loss": 0.6001657247543335, "epoch": 2.32, "learning_rate": 3.9056088914679635e-05, "loss": 0.585, "step": 6410, "task_loss": 0.48626482486724854 }, { "compression_loss": 0.0, "distillation_loss": 0.630062460899353, "epoch": 2.32, "learning_rate": 3.897713751374355e-05, "loss": 0.5203, "step": 6420, "task_loss": 0.657923698425293 }, { "compression_loss": 0.0, "distillation_loss": 0.44871628284454346, "epoch": 2.32, "learning_rate": 3.889816560534907e-05, "loss": 0.5597, "step": 6430, "task_loss": 0.691073477268219 }, { "compression_loss": 0.0, "distillation_loss": 0.4313535690307617, "epoch": 2.33, "learning_rate": 3.881917359670191e-05, "loss": 0.5059, "step": 6440, "task_loss": 0.70628821849823 }, { "compression_loss": 0.0, "distillation_loss": 0.483176052570343, "epoch": 2.33, "learning_rate": 3.874016189511141e-05, "loss": 0.5833, "step": 6450, "task_loss": 0.5552691221237183 }, { "compression_loss": 0.0, "distillation_loss": 0.582874059677124, "epoch": 2.33, "learning_rate": 3.8661130907988505e-05, "loss": 0.5192, "step": 6460, "task_loss": 0.6283879280090332 }, { "compression_loss": 0.0, "distillation_loss": 0.45941537618637085, "epoch": 2.34, "learning_rate": 3.85820810428435e-05, "loss": 0.59, "step": 6470, "task_loss": 0.8111543655395508 }, { "compression_loss": 0.0, "distillation_loss": 0.6040874719619751, "epoch": 2.34, "learning_rate": 3.850301270728412e-05, "loss": 0.5399, "step": 6480, "task_loss": 0.8921731114387512 }, { "compression_loss": 0.0, "distillation_loss": 0.6502740979194641, "epoch": 2.35, "learning_rate": 3.8423926309013267e-05, "loss": 0.5538, "step": 6490, "task_loss": 0.6972618699073792 }, { "compression_loss": 0.0, "distillation_loss": 0.5272783041000366, "epoch": 2.35, "learning_rate": 3.834482225582701e-05, "loss": 0.5857, "step": 6500, "task_loss": 0.8147091865539551 }, { "epoch": 2.35, "eval_exact_match": 81.54210028382214, "eval_f1": 89.0964633965607, "step": 6500 }, { "compression_loss": 0.0, "distillation_loss": 0.7971393465995789, "epoch": 2.35, "learning_rate": 3.826570095561245e-05, "loss": 0.5413, "step": 6510, "task_loss": 0.8758788108825684 }, { "compression_loss": 0.0, "distillation_loss": 0.6447325944900513, "epoch": 2.36, "learning_rate": 3.8186562816345615e-05, "loss": 0.5911, "step": 6520, "task_loss": 0.6143375635147095 }, { "compression_loss": 0.0, "distillation_loss": 0.5946791172027588, "epoch": 2.36, "learning_rate": 3.8107408246089365e-05, "loss": 0.5965, "step": 6530, "task_loss": 0.712870717048645 }, { "compression_loss": 0.0, "distillation_loss": 0.5228013396263123, "epoch": 2.36, "learning_rate": 3.802823765299128e-05, "loss": 0.5602, "step": 6540, "task_loss": 0.8571695685386658 }, { "compression_loss": 0.0, "distillation_loss": 0.6156158447265625, "epoch": 2.37, "learning_rate": 3.794905144528156e-05, "loss": 0.5644, "step": 6550, "task_loss": 0.9497164487838745 }, { "compression_loss": 0.0, "distillation_loss": 0.6857894659042358, "epoch": 2.37, "learning_rate": 3.7869850031270916e-05, "loss": 0.5606, "step": 6560, "task_loss": 0.6282366514205933 }, { "compression_loss": 0.0, "distillation_loss": 0.5698332190513611, "epoch": 2.37, "learning_rate": 3.7790633819348476e-05, "loss": 0.5349, "step": 6570, "task_loss": 0.48486849665641785 }, { "compression_loss": 0.0, "distillation_loss": 0.551287055015564, "epoch": 2.38, "learning_rate": 3.771140321797967e-05, "loss": 0.5857, "step": 6580, "task_loss": 0.6155105829238892 }, { "compression_loss": 0.0, "distillation_loss": 0.4047469198703766, "epoch": 2.38, "learning_rate": 3.7632158635704116e-05, "loss": 0.5965, "step": 6590, "task_loss": 0.68361496925354 }, { "compression_loss": 0.0, "distillation_loss": 0.5632671117782593, "epoch": 2.39, "learning_rate": 3.755290048113352e-05, "loss": 0.5808, "step": 6600, "task_loss": 1.0737969875335693 }, { "compression_loss": 0.0, "distillation_loss": 0.5189325213432312, "epoch": 2.39, "learning_rate": 3.747362916294959e-05, "loss": 0.5092, "step": 6610, "task_loss": 1.006134033203125 }, { "compression_loss": 0.0, "distillation_loss": 0.5590713024139404, "epoch": 2.39, "learning_rate": 3.7394345089901914e-05, "loss": 0.5843, "step": 6620, "task_loss": 1.0642428398132324 }, { "compression_loss": 0.0, "distillation_loss": 0.6743557453155518, "epoch": 2.4, "learning_rate": 3.731504867080579e-05, "loss": 0.6301, "step": 6630, "task_loss": 1.1177798509597778 }, { "compression_loss": 0.0, "distillation_loss": 0.6843348741531372, "epoch": 2.4, "learning_rate": 3.723574031454026e-05, "loss": 0.6255, "step": 6640, "task_loss": 1.0057365894317627 }, { "compression_loss": 0.0, "distillation_loss": 0.5847528576850891, "epoch": 2.4, "learning_rate": 3.715642043004586e-05, "loss": 0.5272, "step": 6650, "task_loss": 0.7699944972991943 }, { "compression_loss": 0.0, "distillation_loss": 0.48671820759773254, "epoch": 2.41, "learning_rate": 3.7077089426322586e-05, "loss": 0.5762, "step": 6660, "task_loss": 0.7598029375076294 }, { "compression_loss": 0.0, "distillation_loss": 0.5621806383132935, "epoch": 2.41, "learning_rate": 3.699774771242778e-05, "loss": 0.5534, "step": 6670, "task_loss": 0.47044456005096436 }, { "compression_loss": 0.0, "distillation_loss": 0.499066561460495, "epoch": 2.41, "learning_rate": 3.691839569747399e-05, "loss": 0.5246, "step": 6680, "task_loss": 0.48376011848449707 }, { "compression_loss": 0.0, "distillation_loss": 0.6511626839637756, "epoch": 2.42, "learning_rate": 3.68390337906269e-05, "loss": 0.6135, "step": 6690, "task_loss": 0.9496736526489258 }, { "compression_loss": 0.0, "distillation_loss": 0.7415065169334412, "epoch": 2.42, "learning_rate": 3.675966240110316e-05, "loss": 0.5912, "step": 6700, "task_loss": 0.7771897315979004 }, { "compression_loss": 0.0, "distillation_loss": 0.6975048780441284, "epoch": 2.43, "learning_rate": 3.668028193816837e-05, "loss": 0.6043, "step": 6710, "task_loss": 0.7732299566268921 }, { "compression_loss": 0.0, "distillation_loss": 0.5505435466766357, "epoch": 2.43, "learning_rate": 3.6600892811134887e-05, "loss": 0.5488, "step": 6720, "task_loss": 0.47356608510017395 }, { "compression_loss": 0.0, "distillation_loss": 0.6242953538894653, "epoch": 2.43, "learning_rate": 3.652149542935974e-05, "loss": 0.5837, "step": 6730, "task_loss": 0.6268038749694824 }, { "compression_loss": 0.0, "distillation_loss": 0.5890005826950073, "epoch": 2.44, "learning_rate": 3.644209020224254e-05, "loss": 0.5406, "step": 6740, "task_loss": 0.503425121307373 }, { "compression_loss": 0.0, "distillation_loss": 0.510681688785553, "epoch": 2.44, "learning_rate": 3.6362677539223316e-05, "loss": 0.4745, "step": 6750, "task_loss": 0.6263338327407837 }, { "epoch": 2.44, "eval_exact_match": 81.9205298013245, "eval_f1": 89.17913767330805, "step": 6750 }, { "compression_loss": 0.0, "distillation_loss": 0.48122429847717285, "epoch": 2.44, "learning_rate": 3.628325784978048e-05, "loss": 0.6206, "step": 6760, "task_loss": 0.7160674333572388 }, { "compression_loss": 0.0, "distillation_loss": 0.5861722230911255, "epoch": 2.45, "learning_rate": 3.620383154342866e-05, "loss": 0.5832, "step": 6770, "task_loss": 0.7612639665603638 }, { "compression_loss": 0.0, "distillation_loss": 0.35261979699134827, "epoch": 2.45, "learning_rate": 3.612439902971659e-05, "loss": 0.6246, "step": 6780, "task_loss": 0.43168213963508606 }, { "compression_loss": 0.0, "distillation_loss": 0.4552045464515686, "epoch": 2.45, "learning_rate": 3.604496071822503e-05, "loss": 0.5272, "step": 6790, "task_loss": 0.5549889206886292 }, { "compression_loss": 0.0, "distillation_loss": 0.5096551775932312, "epoch": 2.46, "learning_rate": 3.596551701856461e-05, "loss": 0.4898, "step": 6800, "task_loss": 0.6168457865715027 }, { "compression_loss": 0.0, "distillation_loss": 0.44083502888679504, "epoch": 2.46, "learning_rate": 3.5886068340373774e-05, "loss": 0.5555, "step": 6810, "task_loss": 0.525178074836731 }, { "compression_loss": 0.0, "distillation_loss": 0.7820392847061157, "epoch": 2.46, "learning_rate": 3.580661509331662e-05, "loss": 0.5692, "step": 6820, "task_loss": 0.7302403450012207 }, { "compression_loss": 0.0, "distillation_loss": 0.5217739343643188, "epoch": 2.47, "learning_rate": 3.572715768708081e-05, "loss": 0.5292, "step": 6830, "task_loss": 0.6324975490570068 }, { "compression_loss": 0.0, "distillation_loss": 0.6764619946479797, "epoch": 2.47, "learning_rate": 3.564769653137545e-05, "loss": 0.5874, "step": 6840, "task_loss": 0.6163495779037476 }, { "compression_loss": 0.0, "distillation_loss": 0.45639386773109436, "epoch": 2.48, "learning_rate": 3.556823203592897e-05, "loss": 0.5314, "step": 6850, "task_loss": 0.3424451947212219 }, { "compression_loss": 0.0, "distillation_loss": 0.6600396633148193, "epoch": 2.48, "learning_rate": 3.548876461048703e-05, "loss": 0.5602, "step": 6860, "task_loss": 0.7038537263870239 }, { "compression_loss": 0.0, "distillation_loss": 0.6123368740081787, "epoch": 2.48, "learning_rate": 3.5409294664810414e-05, "loss": 0.6264, "step": 6870, "task_loss": 0.5839701890945435 }, { "compression_loss": 0.0, "distillation_loss": 0.50824373960495, "epoch": 2.49, "learning_rate": 3.5329822608672863e-05, "loss": 0.5268, "step": 6880, "task_loss": 0.5268850922584534 }, { "compression_loss": 0.0, "distillation_loss": 0.5141448974609375, "epoch": 2.49, "learning_rate": 3.5250348851859044e-05, "loss": 0.5426, "step": 6890, "task_loss": 0.3040235638618469 }, { "compression_loss": 0.0, "distillation_loss": 0.53406822681427, "epoch": 2.49, "learning_rate": 3.517087380416235e-05, "loss": 0.5786, "step": 6900, "task_loss": 0.5800484418869019 }, { "compression_loss": 0.0, "distillation_loss": 0.7829024791717529, "epoch": 2.5, "learning_rate": 3.509139787538286e-05, "loss": 0.5593, "step": 6910, "task_loss": 0.8509390950202942 }, { "compression_loss": 0.0, "distillation_loss": 0.7222518920898438, "epoch": 2.5, "learning_rate": 3.501192147532521e-05, "loss": 0.5797, "step": 6920, "task_loss": 1.3599720001220703 }, { "compression_loss": 0.0, "distillation_loss": 0.5438934564590454, "epoch": 2.5, "learning_rate": 3.49324450137964e-05, "loss": 0.6199, "step": 6930, "task_loss": 0.6372125148773193 }, { "compression_loss": 0.0, "distillation_loss": 0.4705803394317627, "epoch": 2.51, "learning_rate": 3.485296890060384e-05, "loss": 0.5347, "step": 6940, "task_loss": 0.7346433401107788 }, { "compression_loss": 0.0, "distillation_loss": 0.5038604140281677, "epoch": 2.51, "learning_rate": 3.4773493545553046e-05, "loss": 0.5929, "step": 6950, "task_loss": 0.8522573709487915 }, { "compression_loss": 0.0, "distillation_loss": 0.4030439853668213, "epoch": 2.52, "learning_rate": 3.469401935844572e-05, "loss": 0.5134, "step": 6960, "task_loss": 0.23669208586215973 }, { "compression_loss": 0.0, "distillation_loss": 0.5648401975631714, "epoch": 2.52, "learning_rate": 3.461454674907746e-05, "loss": 0.5579, "step": 6970, "task_loss": 0.5249247550964355 }, { "compression_loss": 0.0, "distillation_loss": 0.7584888339042664, "epoch": 2.52, "learning_rate": 3.453507612723578e-05, "loss": 0.5626, "step": 6980, "task_loss": 0.7458637356758118 }, { "compression_loss": 0.0, "distillation_loss": 0.6533717513084412, "epoch": 2.53, "learning_rate": 3.445560790269793e-05, "loss": 0.5996, "step": 6990, "task_loss": 0.7032555341720581 }, { "compression_loss": 0.0, "distillation_loss": 0.4984493553638458, "epoch": 2.53, "learning_rate": 3.437614248522879e-05, "loss": 0.6306, "step": 7000, "task_loss": 0.46757519245147705 }, { "epoch": 2.53, "eval_exact_match": 81.42857142857143, "eval_f1": 88.92804168222406, "step": 7000 }, { "compression_loss": 0.0, "distillation_loss": 0.4259212017059326, "epoch": 2.53, "learning_rate": 3.429668028457878e-05, "loss": 0.5477, "step": 7010, "task_loss": 0.58353590965271 }, { "compression_loss": 0.0, "distillation_loss": 0.7622242569923401, "epoch": 2.54, "learning_rate": 3.421722171048173e-05, "loss": 0.54, "step": 7020, "task_loss": 0.8156880140304565 }, { "compression_loss": 0.0, "distillation_loss": 0.6038123369216919, "epoch": 2.54, "learning_rate": 3.413776717265275e-05, "loss": 0.6034, "step": 7030, "task_loss": 0.7022565603256226 }, { "compression_loss": 0.0, "distillation_loss": 0.33076730370521545, "epoch": 2.54, "learning_rate": 3.4058317080786186e-05, "loss": 0.5226, "step": 7040, "task_loss": 0.26581063866615295 }, { "compression_loss": 0.0, "distillation_loss": 0.7431491613388062, "epoch": 2.55, "learning_rate": 3.397887184455339e-05, "loss": 0.5195, "step": 7050, "task_loss": 0.5218563079833984 }, { "compression_loss": 0.0, "distillation_loss": 0.6481354236602783, "epoch": 2.55, "learning_rate": 3.389943187360075e-05, "loss": 0.629, "step": 7060, "task_loss": 0.6381015777587891 }, { "compression_loss": 0.0, "distillation_loss": 0.5212134122848511, "epoch": 2.56, "learning_rate": 3.381999757754745e-05, "loss": 0.5217, "step": 7070, "task_loss": 0.6918044686317444 }, { "compression_loss": 0.0, "distillation_loss": 0.42733877897262573, "epoch": 2.56, "learning_rate": 3.374056936598344e-05, "loss": 0.5712, "step": 7080, "task_loss": 0.6634185314178467 }, { "compression_loss": 0.0, "distillation_loss": 0.36479300260543823, "epoch": 2.56, "learning_rate": 3.3661147648467286e-05, "loss": 0.4548, "step": 7090, "task_loss": 0.9132073521614075 }, { "compression_loss": 0.0, "distillation_loss": 0.43868011236190796, "epoch": 2.57, "learning_rate": 3.3581732834524065e-05, "loss": 0.5306, "step": 7100, "task_loss": 0.44713014364242554 }, { "compression_loss": 0.0, "distillation_loss": 0.738867461681366, "epoch": 2.57, "learning_rate": 3.350232533364326e-05, "loss": 0.541, "step": 7110, "task_loss": 0.9076246023178101 }, { "compression_loss": 0.0, "distillation_loss": 0.47398871183395386, "epoch": 2.57, "learning_rate": 3.342292555527666e-05, "loss": 0.5282, "step": 7120, "task_loss": 0.48428577184677124 }, { "compression_loss": 0.0, "distillation_loss": 0.6713889837265015, "epoch": 2.58, "learning_rate": 3.3343533908836205e-05, "loss": 0.6313, "step": 7130, "task_loss": 1.2264108657836914 }, { "compression_loss": 0.0, "distillation_loss": 0.7997803688049316, "epoch": 2.58, "learning_rate": 3.326415080369194e-05, "loss": 0.5618, "step": 7140, "task_loss": 1.1236743927001953 }, { "compression_loss": 0.0, "distillation_loss": 0.4207914471626282, "epoch": 2.58, "learning_rate": 3.3184776649169834e-05, "loss": 0.5202, "step": 7150, "task_loss": 0.477580726146698 }, { "compression_loss": 0.0, "distillation_loss": 0.5528711080551147, "epoch": 2.59, "learning_rate": 3.3105411854549734e-05, "loss": 0.5922, "step": 7160, "task_loss": 0.9368165731430054 }, { "compression_loss": 0.0, "distillation_loss": 0.4040418267250061, "epoch": 2.59, "learning_rate": 3.302605682906319e-05, "loss": 0.553, "step": 7170, "task_loss": 0.4502519369125366 }, { "compression_loss": 0.0, "distillation_loss": 0.5803967714309692, "epoch": 2.59, "learning_rate": 3.294671198189141e-05, "loss": 0.5853, "step": 7180, "task_loss": 0.8804277181625366 }, { "compression_loss": 0.0, "distillation_loss": 0.7026090621948242, "epoch": 2.6, "learning_rate": 3.2867377722163106e-05, "loss": 0.6469, "step": 7190, "task_loss": 0.45715072751045227 }, { "compression_loss": 0.0, "distillation_loss": 0.45515570044517517, "epoch": 2.6, "learning_rate": 3.2788054458952395e-05, "loss": 0.4855, "step": 7200, "task_loss": 1.0894179344177246 }, { "compression_loss": 0.0, "distillation_loss": 0.6373165249824524, "epoch": 2.61, "learning_rate": 3.270874260127669e-05, "loss": 0.5423, "step": 7210, "task_loss": 0.43047064542770386 }, { "compression_loss": 0.0, "distillation_loss": 0.5695480108261108, "epoch": 2.61, "learning_rate": 3.2637372019106546e-05, "loss": 0.602, "step": 7220, "task_loss": 0.5894209742546082 }, { "compression_loss": 0.0, "distillation_loss": 0.35404130816459656, "epoch": 2.61, "learning_rate": 3.255808295857803e-05, "loss": 0.5373, "step": 7230, "task_loss": 0.6537626385688782 }, { "compression_loss": 0.0, "distillation_loss": 0.5384463667869568, "epoch": 2.62, "learning_rate": 3.247880648939492e-05, "loss": 0.5688, "step": 7240, "task_loss": 0.7174534797668457 }, { "compression_loss": 0.0, "distillation_loss": 0.5153848528862, "epoch": 2.62, "learning_rate": 3.239954302033335e-05, "loss": 0.5985, "step": 7250, "task_loss": 0.8951561450958252 }, { "epoch": 2.62, "eval_exact_match": 82.03405865657521, "eval_f1": 89.46367633239403, "step": 7250 }, { "compression_loss": 0.0, "distillation_loss": 0.36988773941993713, "epoch": 2.62, "learning_rate": 3.232029296010241e-05, "loss": 0.508, "step": 7260, "task_loss": 0.371121346950531 }, { "compression_loss": 0.0, "distillation_loss": 0.5778599977493286, "epoch": 2.63, "learning_rate": 3.224105671734206e-05, "loss": 0.5983, "step": 7270, "task_loss": 0.6856058239936829 }, { "compression_loss": 0.0, "distillation_loss": 0.5628077983856201, "epoch": 2.63, "learning_rate": 3.2161834700621026e-05, "loss": 0.525, "step": 7280, "task_loss": 0.6911593675613403 }, { "compression_loss": 0.0, "distillation_loss": 0.7071762681007385, "epoch": 2.63, "learning_rate": 3.2082627318434634e-05, "loss": 0.5646, "step": 7290, "task_loss": 1.0483005046844482 }, { "compression_loss": 0.0, "distillation_loss": 0.677939236164093, "epoch": 2.64, "learning_rate": 3.2003434979202836e-05, "loss": 0.5433, "step": 7300, "task_loss": 0.41416674852371216 }, { "compression_loss": 0.0, "distillation_loss": 0.5552082061767578, "epoch": 2.64, "learning_rate": 3.19242580912679e-05, "loss": 0.5089, "step": 7310, "task_loss": 0.7204807996749878 }, { "compression_loss": 0.0, "distillation_loss": 0.5466291904449463, "epoch": 2.65, "learning_rate": 3.1845097062892545e-05, "loss": 0.6588, "step": 7320, "task_loss": 1.0191127061843872 }, { "compression_loss": 0.0, "distillation_loss": 0.5075401067733765, "epoch": 2.65, "learning_rate": 3.1765952302257604e-05, "loss": 0.5275, "step": 7330, "task_loss": 0.4421289265155792 }, { "compression_loss": 0.0, "distillation_loss": 0.720165491104126, "epoch": 2.65, "learning_rate": 3.1686824217460105e-05, "loss": 0.5934, "step": 7340, "task_loss": 1.0821608304977417 }, { "compression_loss": 0.0, "distillation_loss": 0.7632290124893188, "epoch": 2.66, "learning_rate": 3.160771321651105e-05, "loss": 0.5672, "step": 7350, "task_loss": 0.9648917317390442 }, { "compression_loss": 0.0, "distillation_loss": 0.4810241758823395, "epoch": 2.66, "learning_rate": 3.152861970733336e-05, "loss": 0.589, "step": 7360, "task_loss": 0.7089782953262329 }, { "compression_loss": 0.0, "distillation_loss": 0.4056462049484253, "epoch": 2.66, "learning_rate": 3.144954409775978e-05, "loss": 0.5529, "step": 7370, "task_loss": 0.30260807275772095 }, { "compression_loss": 0.0, "distillation_loss": 0.5448493361473083, "epoch": 2.67, "learning_rate": 3.1370486795530724e-05, "loss": 0.5954, "step": 7380, "task_loss": 0.7051540613174438 }, { "compression_loss": 0.0, "distillation_loss": 0.4768620729446411, "epoch": 2.67, "learning_rate": 3.129144820829223e-05, "loss": 0.5176, "step": 7390, "task_loss": 0.4055364727973938 }, { "compression_loss": 0.0, "distillation_loss": 0.5303025841712952, "epoch": 2.67, "learning_rate": 3.1212428743593856e-05, "loss": 0.4851, "step": 7400, "task_loss": 0.6226521730422974 }, { "compression_loss": 0.0, "distillation_loss": 0.4235970675945282, "epoch": 2.68, "learning_rate": 3.113342880888649e-05, "loss": 0.486, "step": 7410, "task_loss": 0.5380692481994629 }, { "compression_loss": 0.0, "distillation_loss": 0.5234407186508179, "epoch": 2.68, "learning_rate": 3.10544488115204e-05, "loss": 0.5456, "step": 7420, "task_loss": 0.6824461221694946 }, { "compression_loss": 0.0, "distillation_loss": 0.6319477558135986, "epoch": 2.69, "learning_rate": 3.097548915874299e-05, "loss": 0.593, "step": 7430, "task_loss": 0.5957967042922974 }, { "compression_loss": 0.0, "distillation_loss": 0.5229743123054504, "epoch": 2.69, "learning_rate": 3.08965502576968e-05, "loss": 0.4986, "step": 7440, "task_loss": 0.44954097270965576 }, { "compression_loss": 0.0, "distillation_loss": 0.5310331583023071, "epoch": 2.69, "learning_rate": 3.081763251541732e-05, "loss": 0.5477, "step": 7450, "task_loss": 0.8785396218299866 }, { "compression_loss": 0.0, "distillation_loss": 0.6118483543395996, "epoch": 2.7, "learning_rate": 3.0738736338830997e-05, "loss": 0.5452, "step": 7460, "task_loss": 0.8830308318138123 }, { "compression_loss": 0.0, "distillation_loss": 0.7529491186141968, "epoch": 2.7, "learning_rate": 3.0659862134753025e-05, "loss": 0.5697, "step": 7470, "task_loss": 1.0144604444503784 }, { "compression_loss": 0.0, "distillation_loss": 0.46560198068618774, "epoch": 2.7, "learning_rate": 3.0581010309885335e-05, "loss": 0.5672, "step": 7480, "task_loss": 0.5504160523414612 }, { "compression_loss": 0.0, "distillation_loss": 0.44873565435409546, "epoch": 2.71, "learning_rate": 3.0502181270814433e-05, "loss": 0.5469, "step": 7490, "task_loss": 0.730833888053894 }, { "compression_loss": 0.0, "distillation_loss": 0.8784873485565186, "epoch": 2.71, "learning_rate": 3.042337542400939e-05, "loss": 0.5677, "step": 7500, "task_loss": 0.9325830936431885 }, { "epoch": 2.71, "eval_exact_match": 82.02459791863765, "eval_f1": 89.43948705493302, "step": 7500 }, { "compression_loss": 0.0, "distillation_loss": 0.4937174320220947, "epoch": 2.71, "learning_rate": 3.0344593175819606e-05, "loss": 0.5672, "step": 7510, "task_loss": 0.693834662437439 }, { "compression_loss": 0.0, "distillation_loss": 0.5106741189956665, "epoch": 2.72, "learning_rate": 3.026583493247288e-05, "loss": 0.5034, "step": 7520, "task_loss": 0.9328588843345642 }, { "compression_loss": 0.0, "distillation_loss": 0.594123363494873, "epoch": 2.72, "learning_rate": 3.018710110007318e-05, "loss": 0.514, "step": 7530, "task_loss": 1.1014786958694458 }, { "compression_loss": 0.0, "distillation_loss": 0.5339140892028809, "epoch": 2.72, "learning_rate": 3.010839208459863e-05, "loss": 0.5621, "step": 7540, "task_loss": 0.6708189249038696 }, { "compression_loss": 0.0, "distillation_loss": 0.6270914673805237, "epoch": 2.73, "learning_rate": 3.0029708291899378e-05, "loss": 0.5361, "step": 7550, "task_loss": 0.8918893337249756 }, { "compression_loss": 0.0, "distillation_loss": 0.36830925941467285, "epoch": 2.73, "learning_rate": 2.9951050127695518e-05, "loss": 0.5261, "step": 7560, "task_loss": 0.5323593616485596 }, { "compression_loss": 0.0, "distillation_loss": 0.4848215579986572, "epoch": 2.74, "learning_rate": 2.9872417997574987e-05, "loss": 0.5538, "step": 7570, "task_loss": 0.719284176826477 }, { "compression_loss": 0.0, "distillation_loss": 0.4425901472568512, "epoch": 2.74, "learning_rate": 2.979381230699151e-05, "loss": 0.5173, "step": 7580, "task_loss": 0.6015424132347107 }, { "compression_loss": 0.0, "distillation_loss": 0.5119283199310303, "epoch": 2.74, "learning_rate": 2.9715233461262427e-05, "loss": 0.5088, "step": 7590, "task_loss": 0.641405463218689 }, { "compression_loss": 0.0, "distillation_loss": 0.48643380403518677, "epoch": 2.75, "learning_rate": 2.9636681865566735e-05, "loss": 0.566, "step": 7600, "task_loss": 0.47257906198501587 }, { "compression_loss": 0.0, "distillation_loss": 0.45301955938339233, "epoch": 2.75, "learning_rate": 2.9558157924942824e-05, "loss": 0.5988, "step": 7610, "task_loss": 0.802404522895813 }, { "compression_loss": 0.0, "distillation_loss": 0.41510939598083496, "epoch": 2.75, "learning_rate": 2.947966204428658e-05, "loss": 0.5027, "step": 7620, "task_loss": 0.6679592132568359 }, { "compression_loss": 0.0, "distillation_loss": 0.5907465815544128, "epoch": 2.76, "learning_rate": 2.940119462834914e-05, "loss": 0.5256, "step": 7630, "task_loss": 1.0826715230941772 }, { "compression_loss": 0.0, "distillation_loss": 0.6161787509918213, "epoch": 2.76, "learning_rate": 2.9322756081734894e-05, "loss": 0.595, "step": 7640, "task_loss": 0.7296005487442017 }, { "compression_loss": 0.0, "distillation_loss": 0.4290156960487366, "epoch": 2.76, "learning_rate": 2.9244346808899347e-05, "loss": 0.4999, "step": 7650, "task_loss": 0.6461764574050903 }, { "compression_loss": 0.0, "distillation_loss": 0.5703807473182678, "epoch": 2.77, "learning_rate": 2.91659672141471e-05, "loss": 0.5634, "step": 7660, "task_loss": 0.7666523456573486 }, { "compression_loss": 0.0, "distillation_loss": 0.42602774500846863, "epoch": 2.77, "learning_rate": 2.9087617701629685e-05, "loss": 0.5413, "step": 7670, "task_loss": 0.5653133392333984 }, { "compression_loss": 0.0, "distillation_loss": 0.5586769580841064, "epoch": 2.78, "learning_rate": 2.9009298675343544e-05, "loss": 0.5102, "step": 7680, "task_loss": 1.4154021739959717 }, { "compression_loss": 0.0, "distillation_loss": 0.5477520227432251, "epoch": 2.78, "learning_rate": 2.8931010539127894e-05, "loss": 0.5165, "step": 7690, "task_loss": 0.6499199867248535 }, { "compression_loss": 0.0, "distillation_loss": 0.46430933475494385, "epoch": 2.78, "learning_rate": 2.8852753696662726e-05, "loss": 0.5086, "step": 7700, "task_loss": 0.4722907543182373 }, { "compression_loss": 0.0, "distillation_loss": 0.2965603768825531, "epoch": 2.79, "learning_rate": 2.877452855146659e-05, "loss": 0.5257, "step": 7710, "task_loss": 0.3856130540370941 }, { "compression_loss": 0.0, "distillation_loss": 0.7141191363334656, "epoch": 2.79, "learning_rate": 2.8696335506894678e-05, "loss": 0.6047, "step": 7720, "task_loss": 1.0723342895507812 }, { "compression_loss": 0.0, "distillation_loss": 0.5581351518630981, "epoch": 2.79, "learning_rate": 2.8618174966136597e-05, "loss": 0.5185, "step": 7730, "task_loss": 0.6514792442321777 }, { "compression_loss": 0.0, "distillation_loss": 0.5451016426086426, "epoch": 2.8, "learning_rate": 2.8540047332214402e-05, "loss": 0.6023, "step": 7740, "task_loss": 0.6417021751403809 }, { "compression_loss": 0.0, "distillation_loss": 0.4637037217617035, "epoch": 2.8, "learning_rate": 2.8461953007980413e-05, "loss": 0.5725, "step": 7750, "task_loss": 0.5279682278633118 }, { "epoch": 2.8, "eval_exact_match": 81.86376537369915, "eval_f1": 89.34181998595834, "step": 7750 }, { "compression_loss": 0.0, "distillation_loss": 0.622160792350769, "epoch": 2.8, "learning_rate": 2.8383892396115255e-05, "loss": 0.5184, "step": 7760, "task_loss": 0.8936756253242493 }, { "compression_loss": 0.0, "distillation_loss": 0.39960283041000366, "epoch": 2.81, "learning_rate": 2.830586589912568e-05, "loss": 0.5541, "step": 7770, "task_loss": 0.635195791721344 }, { "compression_loss": 0.0, "distillation_loss": 0.6131960153579712, "epoch": 2.81, "learning_rate": 2.8227873919342545e-05, "loss": 0.5496, "step": 7780, "task_loss": 0.845227837562561 }, { "compression_loss": 0.0, "distillation_loss": 0.48792392015457153, "epoch": 2.82, "learning_rate": 2.814991685891871e-05, "loss": 0.4879, "step": 7790, "task_loss": 0.7151265144348145 }, { "compression_loss": 0.0, "distillation_loss": 0.659151554107666, "epoch": 2.82, "learning_rate": 2.807199511982703e-05, "loss": 0.6531, "step": 7800, "task_loss": 1.1711469888687134 }, { "compression_loss": 0.0, "distillation_loss": 0.4288066029548645, "epoch": 2.82, "learning_rate": 2.7994109103858147e-05, "loss": 0.5295, "step": 7810, "task_loss": 0.40471869707107544 }, { "compression_loss": 0.0, "distillation_loss": 0.46426552534103394, "epoch": 2.83, "learning_rate": 2.7916259212618592e-05, "loss": 0.5089, "step": 7820, "task_loss": 0.44435638189315796 }, { "compression_loss": 0.0, "distillation_loss": 0.6056751608848572, "epoch": 2.83, "learning_rate": 2.7838445847528554e-05, "loss": 0.5387, "step": 7830, "task_loss": 0.74387526512146 }, { "compression_loss": 0.0, "distillation_loss": 0.3924906253814697, "epoch": 2.83, "learning_rate": 2.776066940981993e-05, "loss": 0.5363, "step": 7840, "task_loss": 0.6373250484466553 }, { "compression_loss": 0.0, "distillation_loss": 0.5626404285430908, "epoch": 2.84, "learning_rate": 2.7682930300534165e-05, "loss": 0.5712, "step": 7850, "task_loss": 0.7083041667938232 }, { "compression_loss": 0.0, "distillation_loss": 0.6474617719650269, "epoch": 2.84, "learning_rate": 2.7605228920520273e-05, "loss": 0.5376, "step": 7860, "task_loss": 0.6635825634002686 }, { "compression_loss": 0.0, "distillation_loss": 0.7015373706817627, "epoch": 2.84, "learning_rate": 2.7527565670432668e-05, "loss": 0.6284, "step": 7870, "task_loss": 1.1057612895965576 }, { "compression_loss": 0.0, "distillation_loss": 0.592483401298523, "epoch": 2.85, "learning_rate": 2.7449940950729213e-05, "loss": 0.5704, "step": 7880, "task_loss": 0.8539618253707886 }, { "compression_loss": 0.0, "distillation_loss": 0.5118753910064697, "epoch": 2.85, "learning_rate": 2.737235516166903e-05, "loss": 0.5517, "step": 7890, "task_loss": 0.6258624792098999 }, { "compression_loss": 0.0, "distillation_loss": 0.47284987568855286, "epoch": 2.86, "learning_rate": 2.729480870331058e-05, "loss": 0.4877, "step": 7900, "task_loss": 0.6712979674339294 }, { "compression_loss": 0.0, "distillation_loss": 0.5549010038375854, "epoch": 2.86, "learning_rate": 2.721730197550944e-05, "loss": 0.5215, "step": 7910, "task_loss": 1.0644373893737793 }, { "compression_loss": 0.0, "distillation_loss": 0.6102703809738159, "epoch": 2.86, "learning_rate": 2.7139835377916394e-05, "loss": 0.5548, "step": 7920, "task_loss": 0.5565172433853149 }, { "compression_loss": 0.0, "distillation_loss": 0.49971044063568115, "epoch": 2.87, "learning_rate": 2.7062409309975242e-05, "loss": 0.5297, "step": 7930, "task_loss": 1.1361805200576782 }, { "compression_loss": 0.0, "distillation_loss": 0.6527148485183716, "epoch": 2.87, "learning_rate": 2.6985024170920843e-05, "loss": 0.6014, "step": 7940, "task_loss": 0.7518099546432495 }, { "compression_loss": 0.0, "distillation_loss": 0.5662878155708313, "epoch": 2.87, "learning_rate": 2.690768035977699e-05, "loss": 0.5916, "step": 7950, "task_loss": 0.7932103872299194 }, { "compression_loss": 0.0, "distillation_loss": 0.6615817546844482, "epoch": 2.88, "learning_rate": 2.6830378275354378e-05, "loss": 0.54, "step": 7960, "task_loss": 0.6306872367858887 }, { "compression_loss": 0.0, "distillation_loss": 0.4921113848686218, "epoch": 2.88, "learning_rate": 2.6753118316248545e-05, "loss": 0.4873, "step": 7970, "task_loss": 0.5558697581291199 }, { "compression_loss": 0.0, "distillation_loss": 0.7511510848999023, "epoch": 2.88, "learning_rate": 2.6675900880837846e-05, "loss": 0.5769, "step": 7980, "task_loss": 0.8355304598808289 }, { "compression_loss": 0.0, "distillation_loss": 0.5489118099212646, "epoch": 2.89, "learning_rate": 2.65987263672813e-05, "loss": 0.5927, "step": 7990, "task_loss": 1.078818440437317 }, { "compression_loss": 0.0, "distillation_loss": 0.46048545837402344, "epoch": 2.89, "learning_rate": 2.652159517351669e-05, "loss": 0.6023, "step": 8000, "task_loss": 0.5179476737976074 }, { "epoch": 2.89, "eval_exact_match": 82.23273415326395, "eval_f1": 89.49494539305317, "step": 8000 }, { "compression_loss": 0.0, "distillation_loss": 0.4600314497947693, "epoch": 2.89, "learning_rate": 2.644450769725837e-05, "loss": 0.6, "step": 8010, "task_loss": 1.121453046798706 }, { "compression_loss": 0.0, "distillation_loss": 0.5110729336738586, "epoch": 2.9, "learning_rate": 2.6367464335995296e-05, "loss": 0.5577, "step": 8020, "task_loss": 0.517736554145813 }, { "compression_loss": 0.0, "distillation_loss": 0.6211916208267212, "epoch": 2.9, "learning_rate": 2.6290465486988934e-05, "loss": 0.5561, "step": 8030, "task_loss": 0.9467544555664062 }, { "compression_loss": 0.0, "distillation_loss": 0.451476514339447, "epoch": 2.91, "learning_rate": 2.6213511547271256e-05, "loss": 0.5386, "step": 8040, "task_loss": 0.6299540400505066 }, { "compression_loss": 0.0, "distillation_loss": 0.397861510515213, "epoch": 2.91, "learning_rate": 2.613660291364264e-05, "loss": 0.5725, "step": 8050, "task_loss": 0.6148253679275513 }, { "compression_loss": 0.0, "distillation_loss": 0.42952167987823486, "epoch": 2.91, "learning_rate": 2.6059739982669875e-05, "loss": 0.5305, "step": 8060, "task_loss": 0.49444225430488586 }, { "compression_loss": 0.0, "distillation_loss": 0.4712672829627991, "epoch": 2.92, "learning_rate": 2.5982923150684062e-05, "loss": 0.5201, "step": 8070, "task_loss": 0.3702479302883148 }, { "compression_loss": 0.0, "distillation_loss": 0.5742895603179932, "epoch": 2.92, "learning_rate": 2.5906152813778642e-05, "loss": 0.5038, "step": 8080, "task_loss": 0.6904934644699097 }, { "compression_loss": 0.0, "distillation_loss": 0.44902893900871277, "epoch": 2.92, "learning_rate": 2.582942936780726e-05, "loss": 0.5227, "step": 8090, "task_loss": 0.5900508165359497 }, { "compression_loss": 0.0, "distillation_loss": 0.5167535543441772, "epoch": 2.93, "learning_rate": 2.575275320838183e-05, "loss": 0.5737, "step": 8100, "task_loss": 0.7928590178489685 }, { "compression_loss": 0.0, "distillation_loss": 0.40067818760871887, "epoch": 2.93, "learning_rate": 2.5676124730870394e-05, "loss": 0.456, "step": 8110, "task_loss": 0.3831639885902405 }, { "compression_loss": 0.0, "distillation_loss": 0.3589646816253662, "epoch": 2.93, "learning_rate": 2.5599544330395164e-05, "loss": 0.537, "step": 8120, "task_loss": 0.42124050855636597 }, { "compression_loss": 0.0, "distillation_loss": 0.6571911573410034, "epoch": 2.94, "learning_rate": 2.5523012401830417e-05, "loss": 0.5946, "step": 8130, "task_loss": 0.8380374312400818 }, { "compression_loss": 0.0, "distillation_loss": 0.5014092326164246, "epoch": 2.94, "learning_rate": 2.5446529339800535e-05, "loss": 0.5636, "step": 8140, "task_loss": 0.4395813047885895 }, { "compression_loss": 0.0, "distillation_loss": 0.7437175512313843, "epoch": 2.95, "learning_rate": 2.5370095538677883e-05, "loss": 0.5487, "step": 8150, "task_loss": 1.2434213161468506 }, { "compression_loss": 0.0, "distillation_loss": 0.4760008454322815, "epoch": 2.95, "learning_rate": 2.529371139258086e-05, "loss": 0.5244, "step": 8160, "task_loss": 0.5885391235351562 }, { "compression_loss": 0.0, "distillation_loss": 0.6417983770370483, "epoch": 2.95, "learning_rate": 2.5217377295371787e-05, "loss": 0.5579, "step": 8170, "task_loss": 0.9728447198867798 }, { "compression_loss": 0.0, "distillation_loss": 0.6246418952941895, "epoch": 2.96, "learning_rate": 2.5141093640654972e-05, "loss": 0.6308, "step": 8180, "task_loss": 0.5883153676986694 }, { "compression_loss": 0.0, "distillation_loss": 0.605870246887207, "epoch": 2.96, "learning_rate": 2.506486082177455e-05, "loss": 0.5425, "step": 8190, "task_loss": 0.861560583114624 }, { "compression_loss": 0.0, "distillation_loss": 0.5695328712463379, "epoch": 2.96, "learning_rate": 2.4988679231812602e-05, "loss": 0.5019, "step": 8200, "task_loss": 0.6430089473724365 }, { "compression_loss": 0.0, "distillation_loss": 0.5561754703521729, "epoch": 2.97, "learning_rate": 2.4912549263587004e-05, "loss": 0.522, "step": 8210, "task_loss": 0.7524732351303101 }, { "compression_loss": 0.0, "distillation_loss": 0.4856584072113037, "epoch": 2.97, "learning_rate": 2.4836471309649488e-05, "loss": 0.5376, "step": 8220, "task_loss": 0.44399377703666687 }, { "compression_loss": 0.0, "distillation_loss": 0.417047381401062, "epoch": 2.97, "learning_rate": 2.4760445762283548e-05, "loss": 0.5393, "step": 8230, "task_loss": 0.7637783288955688 }, { "compression_loss": 0.0, "distillation_loss": 0.418597936630249, "epoch": 2.98, "learning_rate": 2.468447301350249e-05, "loss": 0.4973, "step": 8240, "task_loss": 0.5073602199554443 }, { "compression_loss": 0.0, "distillation_loss": 0.4748353362083435, "epoch": 2.98, "learning_rate": 2.4608553455047338e-05, "loss": 0.4767, "step": 8250, "task_loss": 0.853038489818573 }, { "epoch": 2.98, "eval_exact_match": 82.20435193945127, "eval_f1": 89.58123307407982, "step": 8250 }, { "compression_loss": 0.0, "distillation_loss": 0.44185155630111694, "epoch": 2.99, "learning_rate": 2.4532687478384877e-05, "loss": 0.5337, "step": 8260, "task_loss": 0.7460501790046692 }, { "compression_loss": 0.0, "distillation_loss": 0.4590781331062317, "epoch": 2.99, "learning_rate": 2.4456875474705573e-05, "loss": 0.5245, "step": 8270, "task_loss": 0.6354435086250305 }, { "compression_loss": 0.0, "distillation_loss": 0.4764559864997864, "epoch": 2.99, "learning_rate": 2.4381117834921653e-05, "loss": 0.5644, "step": 8280, "task_loss": 0.44803768396377563 }, { "compression_loss": 0.0, "distillation_loss": 0.4564550817012787, "epoch": 3.0, "learning_rate": 2.4305414949664922e-05, "loss": 0.6295, "step": 8290, "task_loss": 0.7989727258682251 }, { "compression_loss": 0.0, "distillation_loss": 0.7786110639572144, "epoch": 3.0, "learning_rate": 2.422976720928497e-05, "loss": 0.5651, "step": 8300, "task_loss": 1.1269173622131348 }, { "compression_loss": 0.0, "distillation_loss": 0.40761882066726685, "epoch": 3.0, "learning_rate": 2.415417500384695e-05, "loss": 0.4447, "step": 8310, "task_loss": 0.45310571789741516 }, { "compression_loss": 0.0, "distillation_loss": 0.48288601636886597, "epoch": 3.01, "learning_rate": 2.4078638723129704e-05, "loss": 0.4888, "step": 8320, "task_loss": 0.739985466003418 }, { "compression_loss": 0.0, "distillation_loss": 0.2868315875530243, "epoch": 3.01, "learning_rate": 2.40031587566237e-05, "loss": 0.4326, "step": 8330, "task_loss": 0.24925296008586884 }, { "compression_loss": 0.0, "distillation_loss": 0.4214899241924286, "epoch": 3.01, "learning_rate": 2.3927735493529002e-05, "loss": 0.4645, "step": 8340, "task_loss": 0.9287468194961548 }, { "compression_loss": 0.0, "distillation_loss": 0.5637334585189819, "epoch": 3.02, "learning_rate": 2.385236932275336e-05, "loss": 0.4484, "step": 8350, "task_loss": 0.9525846838951111 }, { "compression_loss": 0.0, "distillation_loss": 0.40637287497520447, "epoch": 3.02, "learning_rate": 2.377706063291005e-05, "loss": 0.4851, "step": 8360, "task_loss": 0.7950841784477234 }, { "compression_loss": 0.0, "distillation_loss": 0.3816872835159302, "epoch": 3.02, "learning_rate": 2.3701809812316033e-05, "loss": 0.4679, "step": 8370, "task_loss": 0.6461036205291748 }, { "compression_loss": 0.0, "distillation_loss": 0.4131890535354614, "epoch": 3.03, "learning_rate": 2.3626617248989822e-05, "loss": 0.4446, "step": 8380, "task_loss": 0.47412508726119995 }, { "compression_loss": 0.0, "distillation_loss": 0.4663524627685547, "epoch": 3.03, "learning_rate": 2.3551483330649577e-05, "loss": 0.478, "step": 8390, "task_loss": 0.5707106590270996 }, { "compression_loss": 0.0, "distillation_loss": 0.6307777166366577, "epoch": 3.04, "learning_rate": 2.3476408444711027e-05, "loss": 0.474, "step": 8400, "task_loss": 0.7121979594230652 }, { "compression_loss": 0.0, "distillation_loss": 0.4564501941204071, "epoch": 3.04, "learning_rate": 2.3401392978285548e-05, "loss": 0.4281, "step": 8410, "task_loss": 0.8050240278244019 }, { "compression_loss": 0.0, "distillation_loss": 0.35332757234573364, "epoch": 3.04, "learning_rate": 2.3326437318178093e-05, "loss": 0.461, "step": 8420, "task_loss": 0.6041104197502136 }, { "compression_loss": 0.0, "distillation_loss": 0.5488748550415039, "epoch": 3.05, "learning_rate": 2.3251541850885265e-05, "loss": 0.4821, "step": 8430, "task_loss": 0.7619943618774414 }, { "compression_loss": 0.0, "distillation_loss": 0.3080117106437683, "epoch": 3.05, "learning_rate": 2.3176706962593264e-05, "loss": 0.4015, "step": 8440, "task_loss": 0.44871240854263306 }, { "compression_loss": 0.0, "distillation_loss": 0.5030428767204285, "epoch": 3.05, "learning_rate": 2.310193303917596e-05, "loss": 0.4905, "step": 8450, "task_loss": 0.7244405150413513 }, { "compression_loss": 0.0, "distillation_loss": 0.36335813999176025, "epoch": 3.06, "learning_rate": 2.302722046619281e-05, "loss": 0.4854, "step": 8460, "task_loss": 0.7136834263801575 }, { "compression_loss": 0.0, "distillation_loss": 0.3732087016105652, "epoch": 3.06, "learning_rate": 2.2952569628887004e-05, "loss": 0.445, "step": 8470, "task_loss": 0.5682865381240845 }, { "compression_loss": 0.0, "distillation_loss": 0.5076338052749634, "epoch": 3.06, "learning_rate": 2.2877980912183336e-05, "loss": 0.4496, "step": 8480, "task_loss": 0.5227811336517334 }, { "compression_loss": 0.0, "distillation_loss": 0.28474703431129456, "epoch": 3.07, "learning_rate": 2.2803454700686325e-05, "loss": 0.4638, "step": 8490, "task_loss": 0.3647550940513611 }, { "compression_loss": 0.0, "distillation_loss": 0.5656081438064575, "epoch": 3.07, "learning_rate": 2.2728991378678165e-05, "loss": 0.4743, "step": 8500, "task_loss": 0.40611183643341064 }, { "epoch": 3.07, "eval_exact_match": 81.90160832544939, "eval_f1": 89.43148494540868, "step": 8500 }, { "compression_loss": 0.0, "distillation_loss": 0.4654257893562317, "epoch": 3.08, "learning_rate": 2.2654591330116794e-05, "loss": 0.4993, "step": 8510, "task_loss": 0.9065669775009155 }, { "compression_loss": 0.0, "distillation_loss": 0.4588896632194519, "epoch": 3.08, "learning_rate": 2.258025493863388e-05, "loss": 0.4447, "step": 8520, "task_loss": 0.7351205348968506 }, { "compression_loss": 0.0, "distillation_loss": 0.451030969619751, "epoch": 3.08, "learning_rate": 2.250598258753285e-05, "loss": 0.4581, "step": 8530, "task_loss": 0.4265928268432617 }, { "compression_loss": 0.0, "distillation_loss": 0.5207918882369995, "epoch": 3.09, "learning_rate": 2.243177465978691e-05, "loss": 0.4342, "step": 8540, "task_loss": 0.8488381505012512 }, { "compression_loss": 0.0, "distillation_loss": 0.2967930734157562, "epoch": 3.09, "learning_rate": 2.2357631538037123e-05, "loss": 0.394, "step": 8550, "task_loss": 0.2662898600101471 }, { "compression_loss": 0.0, "distillation_loss": 0.2869606018066406, "epoch": 3.09, "learning_rate": 2.2283553604590325e-05, "loss": 0.4646, "step": 8560, "task_loss": 0.317579984664917 }, { "compression_loss": 0.0, "distillation_loss": 0.5217782855033875, "epoch": 3.1, "learning_rate": 2.220954124141727e-05, "loss": 0.43, "step": 8570, "task_loss": 0.6486431360244751 }, { "compression_loss": 0.0, "distillation_loss": 0.5660613775253296, "epoch": 3.1, "learning_rate": 2.2135594830150573e-05, "loss": 0.489, "step": 8580, "task_loss": 0.7349064350128174 }, { "compression_loss": 0.0, "distillation_loss": 0.4088625907897949, "epoch": 3.1, "learning_rate": 2.2061714752082815e-05, "loss": 0.4138, "step": 8590, "task_loss": 0.62070232629776 }, { "compression_loss": 0.0, "distillation_loss": 0.4042903184890747, "epoch": 3.11, "learning_rate": 2.1987901388164503e-05, "loss": 0.433, "step": 8600, "task_loss": 0.39558160305023193 }, { "compression_loss": 0.0, "distillation_loss": 0.4541303217411041, "epoch": 3.11, "learning_rate": 2.1914155119002177e-05, "loss": 0.5357, "step": 8610, "task_loss": 0.6944977045059204 }, { "compression_loss": 0.0, "distillation_loss": 0.5707521438598633, "epoch": 3.12, "learning_rate": 2.1840476324856386e-05, "loss": 0.441, "step": 8620, "task_loss": 0.8816947937011719 }, { "compression_loss": 0.0, "distillation_loss": 0.5475913286209106, "epoch": 3.12, "learning_rate": 2.1766865385639794e-05, "loss": 0.4894, "step": 8630, "task_loss": 0.7322568893432617 }, { "compression_loss": 0.0, "distillation_loss": 0.4617810845375061, "epoch": 3.12, "learning_rate": 2.169332268091512e-05, "loss": 0.4456, "step": 8640, "task_loss": 0.43918541073799133 }, { "compression_loss": 0.0, "distillation_loss": 0.3960902690887451, "epoch": 3.13, "learning_rate": 2.161984858989331e-05, "loss": 0.4294, "step": 8650, "task_loss": 0.6775773763656616 }, { "compression_loss": 0.0, "distillation_loss": 0.549976110458374, "epoch": 3.13, "learning_rate": 2.154644349143147e-05, "loss": 0.4091, "step": 8660, "task_loss": 0.8885008096694946 }, { "compression_loss": 0.0, "distillation_loss": 0.4155174791812897, "epoch": 3.13, "learning_rate": 2.1473107764030978e-05, "loss": 0.4348, "step": 8670, "task_loss": 0.8198248744010925 }, { "compression_loss": 0.0, "distillation_loss": 0.47810494899749756, "epoch": 3.14, "learning_rate": 2.1399841785835495e-05, "loss": 0.4994, "step": 8680, "task_loss": 0.44902503490448 }, { "compression_loss": 0.0, "distillation_loss": 0.4945884346961975, "epoch": 3.14, "learning_rate": 2.1326645934629052e-05, "loss": 0.4552, "step": 8690, "task_loss": 1.0011985301971436 }, { "compression_loss": 0.0, "distillation_loss": 0.4940509796142578, "epoch": 3.14, "learning_rate": 2.1253520587834062e-05, "loss": 0.4673, "step": 8700, "task_loss": 0.6739383935928345 }, { "compression_loss": 0.0, "distillation_loss": 0.45873111486434937, "epoch": 3.15, "learning_rate": 2.1180466122509413e-05, "loss": 0.4844, "step": 8710, "task_loss": 0.9118214845657349 }, { "compression_loss": 0.0, "distillation_loss": 0.5551230907440186, "epoch": 3.15, "learning_rate": 2.1107482915348477e-05, "loss": 0.4288, "step": 8720, "task_loss": 0.6939976215362549 }, { "compression_loss": 0.0, "distillation_loss": 0.4679955244064331, "epoch": 3.16, "learning_rate": 2.1034571342677242e-05, "loss": 0.5021, "step": 8730, "task_loss": 0.3576204776763916 }, { "compression_loss": 0.0, "distillation_loss": 0.34198206663131714, "epoch": 3.16, "learning_rate": 2.0961731780452256e-05, "loss": 0.4387, "step": 8740, "task_loss": 0.5113054513931274 }, { "compression_loss": 0.0, "distillation_loss": 0.41313353180885315, "epoch": 3.16, "learning_rate": 2.0888964604258828e-05, "loss": 0.4645, "step": 8750, "task_loss": 0.6293415427207947 }, { "epoch": 3.16, "eval_exact_match": 82.30842005676443, "eval_f1": 89.4770482916315, "step": 8750 }, { "compression_loss": 0.0, "distillation_loss": 0.45705389976501465, "epoch": 3.17, "learning_rate": 2.0816270189308964e-05, "loss": 0.4733, "step": 8760, "task_loss": 0.9850953817367554 }, { "compression_loss": 0.0, "distillation_loss": 0.3981572091579437, "epoch": 3.17, "learning_rate": 2.0743648910439537e-05, "loss": 0.4788, "step": 8770, "task_loss": 1.0468618869781494 }, { "compression_loss": 0.0, "distillation_loss": 0.44604384899139404, "epoch": 3.17, "learning_rate": 2.0671101142110257e-05, "loss": 0.4668, "step": 8780, "task_loss": 0.6613199710845947 }, { "compression_loss": 0.0, "distillation_loss": 0.4133979380130768, "epoch": 3.18, "learning_rate": 2.0598627258401834e-05, "loss": 0.482, "step": 8790, "task_loss": 0.5200196504592896 }, { "compression_loss": 0.0, "distillation_loss": 0.4611911475658417, "epoch": 3.18, "learning_rate": 2.0526227633013956e-05, "loss": 0.4248, "step": 8800, "task_loss": 0.6944129467010498 }, { "compression_loss": 0.0, "distillation_loss": 0.3930094838142395, "epoch": 3.18, "learning_rate": 2.045390263926347e-05, "loss": 0.4411, "step": 8810, "task_loss": 0.529641330242157 }, { "compression_loss": 0.0, "distillation_loss": 0.4318530559539795, "epoch": 3.19, "learning_rate": 2.0381652650082324e-05, "loss": 0.5077, "step": 8820, "task_loss": 0.4306020140647888 }, { "compression_loss": 0.0, "distillation_loss": 0.40004050731658936, "epoch": 3.19, "learning_rate": 2.0309478038015807e-05, "loss": 0.4431, "step": 8830, "task_loss": 0.5305246710777283 }, { "compression_loss": 0.0, "distillation_loss": 0.4837936758995056, "epoch": 3.19, "learning_rate": 2.023737917522045e-05, "loss": 0.4706, "step": 8840, "task_loss": 0.4521811902523041 }, { "compression_loss": 0.0, "distillation_loss": 0.4394914507865906, "epoch": 3.2, "learning_rate": 2.016535643346224e-05, "loss": 0.4401, "step": 8850, "task_loss": 0.5930649042129517 }, { "compression_loss": 0.0, "distillation_loss": 0.3942365348339081, "epoch": 3.2, "learning_rate": 2.0093410184114667e-05, "loss": 0.459, "step": 8860, "task_loss": 0.3833394944667816 }, { "compression_loss": 0.0, "distillation_loss": 0.4827319383621216, "epoch": 3.21, "learning_rate": 2.0021540798156785e-05, "loss": 0.5369, "step": 8870, "task_loss": 0.8428436517715454 }, { "compression_loss": 0.0, "distillation_loss": 0.39570751786231995, "epoch": 3.21, "learning_rate": 1.9949748646171282e-05, "loss": 0.4456, "step": 8880, "task_loss": 1.0797185897827148 }, { "compression_loss": 0.0, "distillation_loss": 0.4631249010562897, "epoch": 3.21, "learning_rate": 1.9878034098342688e-05, "loss": 0.4824, "step": 8890, "task_loss": 0.9745497107505798 }, { "compression_loss": 0.0, "distillation_loss": 0.3670880198478699, "epoch": 3.22, "learning_rate": 1.980639752445529e-05, "loss": 0.4259, "step": 8900, "task_loss": 0.38064563274383545 }, { "compression_loss": 0.0, "distillation_loss": 0.41449859738349915, "epoch": 3.22, "learning_rate": 1.973483929389136e-05, "loss": 0.4552, "step": 8910, "task_loss": 0.5112131237983704 }, { "compression_loss": 0.0, "distillation_loss": 0.6296032071113586, "epoch": 3.22, "learning_rate": 1.9663359775629198e-05, "loss": 0.5241, "step": 8920, "task_loss": 0.9333739280700684 }, { "compression_loss": 0.0, "distillation_loss": 0.3224326968193054, "epoch": 3.23, "learning_rate": 1.959195933824125e-05, "loss": 0.4352, "step": 8930, "task_loss": 0.7056103944778442 }, { "compression_loss": 0.0, "distillation_loss": 0.700102686882019, "epoch": 3.23, "learning_rate": 1.9520638349892158e-05, "loss": 0.4737, "step": 8940, "task_loss": 0.7871214151382446 }, { "compression_loss": 0.0, "distillation_loss": 0.4205997884273529, "epoch": 3.23, "learning_rate": 1.9449397178336928e-05, "loss": 0.4576, "step": 8950, "task_loss": 0.888438880443573 }, { "compression_loss": 0.0, "distillation_loss": 0.46629273891448975, "epoch": 3.24, "learning_rate": 1.9378236190919002e-05, "loss": 0.4272, "step": 8960, "task_loss": 0.6628609299659729 }, { "compression_loss": 0.0, "distillation_loss": 0.47699084877967834, "epoch": 3.24, "learning_rate": 1.9307155754568368e-05, "loss": 0.5153, "step": 8970, "task_loss": 0.3555866479873657 }, { "compression_loss": 0.0, "distillation_loss": 0.4512190520763397, "epoch": 3.25, "learning_rate": 1.9236156235799624e-05, "loss": 0.4604, "step": 8980, "task_loss": 0.7414048910140991 }, { "compression_loss": 0.0, "distillation_loss": 0.34125715494155884, "epoch": 3.25, "learning_rate": 1.9165238000710218e-05, "loss": 0.4164, "step": 8990, "task_loss": 0.6094802618026733 }, { "compression_loss": 0.0, "distillation_loss": 0.5650801658630371, "epoch": 3.25, "learning_rate": 1.9094401414978365e-05, "loss": 0.4677, "step": 9000, "task_loss": 0.7018455266952515 }, { "epoch": 3.25, "eval_exact_match": 82.35572374645223, "eval_f1": 89.5948549052119, "step": 9000 }, { "compression_loss": 0.0, "distillation_loss": 0.40010178089141846, "epoch": 3.26, "learning_rate": 1.9023646843861352e-05, "loss": 0.4391, "step": 9010, "task_loss": 0.5890183448791504 }, { "compression_loss": 0.0, "distillation_loss": 0.4083946943283081, "epoch": 3.26, "learning_rate": 1.8952974652193525e-05, "loss": 0.4411, "step": 9020, "task_loss": 0.4983852207660675 }, { "compression_loss": 0.0, "distillation_loss": 0.46902433037757874, "epoch": 3.26, "learning_rate": 1.888238520438449e-05, "loss": 0.4439, "step": 9030, "task_loss": 0.5505859851837158 }, { "compression_loss": 0.0, "distillation_loss": 0.43851032853126526, "epoch": 3.27, "learning_rate": 1.8811878864417145e-05, "loss": 0.4677, "step": 9040, "task_loss": 0.7526010870933533 }, { "compression_loss": 0.0, "distillation_loss": 0.36553966999053955, "epoch": 3.27, "learning_rate": 1.8741455995845906e-05, "loss": 0.4361, "step": 9050, "task_loss": 0.4793585240840912 }, { "compression_loss": 0.0, "distillation_loss": 0.3230721354484558, "epoch": 3.27, "learning_rate": 1.8671116961794767e-05, "loss": 0.4381, "step": 9060, "task_loss": 0.685264527797699 }, { "compression_loss": 0.0, "distillation_loss": 0.4076061546802521, "epoch": 3.28, "learning_rate": 1.860086212495545e-05, "loss": 0.4927, "step": 9070, "task_loss": 0.5918831825256348 }, { "compression_loss": 0.0, "distillation_loss": 0.4831254780292511, "epoch": 3.28, "learning_rate": 1.8530691847585468e-05, "loss": 0.4676, "step": 9080, "task_loss": 0.8359205722808838 }, { "compression_loss": 0.0, "distillation_loss": 0.48880958557128906, "epoch": 3.29, "learning_rate": 1.8460606491506437e-05, "loss": 0.44, "step": 9090, "task_loss": 0.9667303562164307 }, { "compression_loss": 0.0, "distillation_loss": 0.572074294090271, "epoch": 3.29, "learning_rate": 1.8390606418101974e-05, "loss": 0.4763, "step": 9100, "task_loss": 0.7159460783004761 }, { "compression_loss": 0.0, "distillation_loss": 0.28445014357566833, "epoch": 3.29, "learning_rate": 1.8320691988316008e-05, "loss": 0.4735, "step": 9110, "task_loss": 0.3898680806159973 }, { "compression_loss": 0.0, "distillation_loss": 0.403300404548645, "epoch": 3.3, "learning_rate": 1.8250863562650848e-05, "loss": 0.4487, "step": 9120, "task_loss": 0.6166889667510986 }, { "compression_loss": 0.0, "distillation_loss": 0.48287683725357056, "epoch": 3.3, "learning_rate": 1.8181121501165358e-05, "loss": 0.4543, "step": 9130, "task_loss": 0.7038086652755737 }, { "compression_loss": 0.0, "distillation_loss": 0.4671033024787903, "epoch": 3.3, "learning_rate": 1.811146616347302e-05, "loss": 0.4809, "step": 9140, "task_loss": 0.6968979239463806 }, { "compression_loss": 0.0, "distillation_loss": 0.40570127964019775, "epoch": 3.31, "learning_rate": 1.8041897908740205e-05, "loss": 0.4245, "step": 9150, "task_loss": 0.5374201536178589 }, { "compression_loss": 0.0, "distillation_loss": 0.565341055393219, "epoch": 3.31, "learning_rate": 1.7972417095684214e-05, "loss": 0.4669, "step": 9160, "task_loss": 0.586516261100769 }, { "compression_loss": 0.0, "distillation_loss": 0.5152424573898315, "epoch": 3.31, "learning_rate": 1.790302408257151e-05, "loss": 0.4803, "step": 9170, "task_loss": 0.8241206407546997 }, { "compression_loss": 0.0, "distillation_loss": 0.487447053194046, "epoch": 3.32, "learning_rate": 1.7833719227215755e-05, "loss": 0.4344, "step": 9180, "task_loss": 0.8506260514259338 }, { "compression_loss": 0.0, "distillation_loss": 0.5072003602981567, "epoch": 3.32, "learning_rate": 1.7764502886976142e-05, "loss": 0.4939, "step": 9190, "task_loss": 0.8282922506332397 }, { "compression_loss": 0.0, "distillation_loss": 0.6491203904151917, "epoch": 3.32, "learning_rate": 1.769537541875536e-05, "loss": 0.4493, "step": 9200, "task_loss": 0.9028392434120178 }, { "compression_loss": 0.0, "distillation_loss": 0.35966378450393677, "epoch": 3.33, "learning_rate": 1.7626337178997885e-05, "loss": 0.4309, "step": 9210, "task_loss": 0.4382772445678711 }, { "compression_loss": 0.0, "distillation_loss": 0.44351670145988464, "epoch": 3.33, "learning_rate": 1.755738852368811e-05, "loss": 0.5132, "step": 9220, "task_loss": 0.5579593777656555 }, { "compression_loss": 0.0, "distillation_loss": 0.5415867567062378, "epoch": 3.34, "learning_rate": 1.748852980834849e-05, "loss": 0.5029, "step": 9230, "task_loss": 0.8813443183898926 }, { "compression_loss": 0.0, "distillation_loss": 0.5128371715545654, "epoch": 3.34, "learning_rate": 1.7419761388037698e-05, "loss": 0.4431, "step": 9240, "task_loss": 0.7216111421585083 }, { "compression_loss": 0.0, "distillation_loss": 0.42158788442611694, "epoch": 3.34, "learning_rate": 1.7351083617348838e-05, "loss": 0.4622, "step": 9250, "task_loss": 0.6663612723350525 }, { "epoch": 3.34, "eval_exact_match": 82.61116367076632, "eval_f1": 89.75797445902968, "step": 9250 }, { "compression_loss": 0.0, "distillation_loss": 0.5766861438751221, "epoch": 3.35, "learning_rate": 1.7282496850407593e-05, "loss": 0.4458, "step": 9260, "task_loss": 0.5448006391525269 }, { "compression_loss": 0.0, "distillation_loss": 0.5536984205245972, "epoch": 3.35, "learning_rate": 1.721400144087041e-05, "loss": 0.4782, "step": 9270, "task_loss": 0.9958784580230713 }, { "compression_loss": 0.0, "distillation_loss": 0.394481360912323, "epoch": 3.35, "learning_rate": 1.714559774192262e-05, "loss": 0.4619, "step": 9280, "task_loss": 0.7101040482521057 }, { "compression_loss": 0.0, "distillation_loss": 0.356433629989624, "epoch": 3.36, "learning_rate": 1.707728610627674e-05, "loss": 0.4389, "step": 9290, "task_loss": 0.619407057762146 }, { "compression_loss": 0.0, "distillation_loss": 0.4516601264476776, "epoch": 3.36, "learning_rate": 1.7009066886170497e-05, "loss": 0.484, "step": 9300, "task_loss": 0.5669988393783569 }, { "compression_loss": 0.0, "distillation_loss": 0.4672652781009674, "epoch": 3.36, "learning_rate": 1.6940940433365148e-05, "loss": 0.4677, "step": 9310, "task_loss": 0.8607810735702515 }, { "compression_loss": 0.0, "distillation_loss": 0.33394983410835266, "epoch": 3.37, "learning_rate": 1.6872907099143585e-05, "loss": 0.4618, "step": 9320, "task_loss": 0.43108028173446655 }, { "compression_loss": 0.0, "distillation_loss": 0.5350024700164795, "epoch": 3.37, "learning_rate": 1.6804967234308577e-05, "loss": 0.5019, "step": 9330, "task_loss": 0.9354665279388428 }, { "compression_loss": 0.0, "distillation_loss": 0.5334118604660034, "epoch": 3.38, "learning_rate": 1.673712118918088e-05, "loss": 0.4638, "step": 9340, "task_loss": 0.7986032962799072 }, { "compression_loss": 0.0, "distillation_loss": 0.3785496652126312, "epoch": 3.38, "learning_rate": 1.6669369313597535e-05, "loss": 0.4518, "step": 9350, "task_loss": 0.8192988038063049 }, { "compression_loss": 0.0, "distillation_loss": 0.4895620346069336, "epoch": 3.38, "learning_rate": 1.6601711956909998e-05, "loss": 0.4556, "step": 9360, "task_loss": 0.7032527923583984 }, { "compression_loss": 0.0, "distillation_loss": 0.41272759437561035, "epoch": 3.39, "learning_rate": 1.653414946798235e-05, "loss": 0.5252, "step": 9370, "task_loss": 0.4573909044265747 }, { "compression_loss": 0.0, "distillation_loss": 0.4580502510070801, "epoch": 3.39, "learning_rate": 1.646668219518948e-05, "loss": 0.4687, "step": 9380, "task_loss": 0.6156324148178101 }, { "compression_loss": 0.0, "distillation_loss": 0.34736135601997375, "epoch": 3.39, "learning_rate": 1.639931048641538e-05, "loss": 0.4566, "step": 9390, "task_loss": 0.42701518535614014 }, { "compression_loss": 0.0, "distillation_loss": 0.36195576190948486, "epoch": 3.4, "learning_rate": 1.6332034689051194e-05, "loss": 0.4718, "step": 9400, "task_loss": 0.3998080790042877 }, { "compression_loss": 0.0, "distillation_loss": 0.3932255804538727, "epoch": 3.4, "learning_rate": 1.6264855149993574e-05, "loss": 0.4301, "step": 9410, "task_loss": 0.7975203990936279 }, { "compression_loss": 0.0, "distillation_loss": 0.40939027070999146, "epoch": 3.4, "learning_rate": 1.6197772215642807e-05, "loss": 0.4091, "step": 9420, "task_loss": 0.643854022026062 }, { "compression_loss": 0.0, "distillation_loss": 0.39888107776641846, "epoch": 3.41, "learning_rate": 1.6130786231901073e-05, "loss": 0.3992, "step": 9430, "task_loss": 0.543474555015564 }, { "compression_loss": 0.0, "distillation_loss": 0.46289771795272827, "epoch": 3.41, "learning_rate": 1.606389754417061e-05, "loss": 0.4787, "step": 9440, "task_loss": 0.9618111848831177 }, { "compression_loss": 0.0, "distillation_loss": 0.4148654341697693, "epoch": 3.42, "learning_rate": 1.5997106497351993e-05, "loss": 0.4771, "step": 9450, "task_loss": 0.5072375535964966 }, { "compression_loss": 0.0, "distillation_loss": 0.49600380659103394, "epoch": 3.42, "learning_rate": 1.593041343584232e-05, "loss": 0.4332, "step": 9460, "task_loss": 0.7816798686981201 }, { "compression_loss": 0.0, "distillation_loss": 0.34944719076156616, "epoch": 3.42, "learning_rate": 1.5863818703533445e-05, "loss": 0.453, "step": 9470, "task_loss": 0.6673324108123779 }, { "compression_loss": 0.0, "distillation_loss": 0.4825136959552765, "epoch": 3.43, "learning_rate": 1.5797322643810196e-05, "loss": 0.4553, "step": 9480, "task_loss": 0.6439677476882935 }, { "compression_loss": 0.0, "distillation_loss": 0.5216967463493347, "epoch": 3.43, "learning_rate": 1.5730925599548637e-05, "loss": 0.4688, "step": 9490, "task_loss": 0.5796299576759338 }, { "compression_loss": 0.0, "distillation_loss": 0.504321277141571, "epoch": 3.43, "learning_rate": 1.5664627913114222e-05, "loss": 0.4935, "step": 9500, "task_loss": 0.5632234215736389 }, { "epoch": 3.43, "eval_exact_match": 82.52601702932829, "eval_f1": 89.72896108835486, "step": 9500 }, { "compression_loss": 0.0, "distillation_loss": 0.37664616107940674, "epoch": 3.44, "learning_rate": 1.5598429926360136e-05, "loss": 0.4524, "step": 9510, "task_loss": 0.47461533546447754 }, { "compression_loss": 0.0, "distillation_loss": 0.38051480054855347, "epoch": 3.44, "learning_rate": 1.5532331980625454e-05, "loss": 0.4261, "step": 9520, "task_loss": 0.6248530149459839 }, { "compression_loss": 0.0, "distillation_loss": 0.4424772262573242, "epoch": 3.44, "learning_rate": 1.5466334416733425e-05, "loss": 0.4905, "step": 9530, "task_loss": 0.7271965146064758 }, { "compression_loss": 0.0, "distillation_loss": 0.3982722759246826, "epoch": 3.45, "learning_rate": 1.5400437574989648e-05, "loss": 0.4329, "step": 9540, "task_loss": 0.5037179589271545 }, { "compression_loss": 0.0, "distillation_loss": 0.5665621757507324, "epoch": 3.45, "learning_rate": 1.5334641795180442e-05, "loss": 0.4662, "step": 9550, "task_loss": 0.8826358318328857 }, { "compression_loss": 0.0, "distillation_loss": 0.4354783892631531, "epoch": 3.46, "learning_rate": 1.5268947416570933e-05, "loss": 0.4084, "step": 9560, "task_loss": 0.9508954286575317 }, { "compression_loss": 0.0, "distillation_loss": 0.5282255411148071, "epoch": 3.46, "learning_rate": 1.5203354777903448e-05, "loss": 0.4618, "step": 9570, "task_loss": 0.6860237121582031 }, { "compression_loss": 0.0, "distillation_loss": 0.3957028090953827, "epoch": 3.46, "learning_rate": 1.5137864217395681e-05, "loss": 0.4416, "step": 9580, "task_loss": 0.7152292728424072 }, { "compression_loss": 0.0, "distillation_loss": 0.3824961483478546, "epoch": 3.47, "learning_rate": 1.5072476072739005e-05, "loss": 0.4534, "step": 9590, "task_loss": 0.379571795463562 }, { "compression_loss": 0.0, "distillation_loss": 0.34231045842170715, "epoch": 3.47, "learning_rate": 1.5007190681096652e-05, "loss": 0.4064, "step": 9600, "task_loss": 0.44205284118652344 }, { "compression_loss": 0.0, "distillation_loss": 0.3934401869773865, "epoch": 3.47, "learning_rate": 1.4942008379102068e-05, "loss": 0.5002, "step": 9610, "task_loss": 0.36241433024406433 }, { "compression_loss": 0.0, "distillation_loss": 0.35561633110046387, "epoch": 3.48, "learning_rate": 1.4876929502857115e-05, "loss": 0.4556, "step": 9620, "task_loss": 0.6349127292633057 }, { "compression_loss": 0.0, "distillation_loss": 0.30296555161476135, "epoch": 3.48, "learning_rate": 1.4811954387930384e-05, "loss": 0.4188, "step": 9630, "task_loss": 0.9882563352584839 }, { "compression_loss": 0.0, "distillation_loss": 0.2964661717414856, "epoch": 3.48, "learning_rate": 1.474708336935538e-05, "loss": 0.4593, "step": 9640, "task_loss": 0.3808435797691345 }, { "compression_loss": 0.0, "distillation_loss": 0.45735228061676025, "epoch": 3.49, "learning_rate": 1.4682316781628928e-05, "loss": 0.4316, "step": 9650, "task_loss": 0.5127953886985779 }, { "compression_loss": 0.0, "distillation_loss": 0.29700976610183716, "epoch": 3.49, "learning_rate": 1.4617654958709308e-05, "loss": 0.443, "step": 9660, "task_loss": 0.3418108820915222 }, { "compression_loss": 0.0, "distillation_loss": 0.4318172037601471, "epoch": 3.49, "learning_rate": 1.4553098234014623e-05, "loss": 0.4149, "step": 9670, "task_loss": 0.5029164552688599 }, { "compression_loss": 0.0, "distillation_loss": 0.42593926191329956, "epoch": 3.5, "learning_rate": 1.4488646940421064e-05, "loss": 0.4384, "step": 9680, "task_loss": 0.6561027765274048 }, { "compression_loss": 0.0, "distillation_loss": 0.4087527394294739, "epoch": 3.5, "learning_rate": 1.4424301410261182e-05, "loss": 0.4536, "step": 9690, "task_loss": 0.29617658257484436 }, { "compression_loss": 0.0, "distillation_loss": 0.3533773124217987, "epoch": 3.51, "learning_rate": 1.4360061975322142e-05, "loss": 0.4615, "step": 9700, "task_loss": 0.45025789737701416 }, { "compression_loss": 0.0, "distillation_loss": 0.4536002278327942, "epoch": 3.51, "learning_rate": 1.4295928966844085e-05, "loss": 0.4683, "step": 9710, "task_loss": 0.6415740251541138 }, { "compression_loss": 0.0, "distillation_loss": 0.38968485593795776, "epoch": 3.51, "learning_rate": 1.423190271551837e-05, "loss": 0.4867, "step": 9720, "task_loss": 0.3250332474708557 }, { "compression_loss": 0.0, "distillation_loss": 0.38814741373062134, "epoch": 3.52, "learning_rate": 1.4167983551485887e-05, "loss": 0.4569, "step": 9730, "task_loss": 0.923711895942688 }, { "compression_loss": 0.0, "distillation_loss": 0.4382472038269043, "epoch": 3.52, "learning_rate": 1.4104171804335311e-05, "loss": 0.4405, "step": 9740, "task_loss": 0.7725265026092529 }, { "compression_loss": 0.0, "distillation_loss": 0.4493694305419922, "epoch": 3.52, "learning_rate": 1.404046780310151e-05, "loss": 0.4066, "step": 9750, "task_loss": 0.39111417531967163 }, { "epoch": 3.52, "eval_exact_match": 82.35572374645223, "eval_f1": 89.55060641123724, "step": 9750 }, { "compression_loss": 0.0, "distillation_loss": 0.4694647789001465, "epoch": 3.53, "learning_rate": 1.39768718762637e-05, "loss": 0.5056, "step": 9760, "task_loss": 0.6846310496330261 }, { "compression_loss": 0.0, "distillation_loss": 0.5013730525970459, "epoch": 3.53, "learning_rate": 1.391338435174388e-05, "loss": 0.4405, "step": 9770, "task_loss": 0.47695648670196533 }, { "compression_loss": 0.0, "distillation_loss": 0.36243951320648193, "epoch": 3.53, "learning_rate": 1.3850005556905072e-05, "loss": 0.4278, "step": 9780, "task_loss": 0.41346102952957153 }, { "compression_loss": 0.0, "distillation_loss": 0.500864565372467, "epoch": 3.54, "learning_rate": 1.3786735818549667e-05, "loss": 0.4443, "step": 9790, "task_loss": 0.7879412770271301 }, { "compression_loss": 0.0, "distillation_loss": 0.4714551866054535, "epoch": 3.54, "learning_rate": 1.372357546291769e-05, "loss": 0.4504, "step": 9800, "task_loss": 0.5894259214401245 }, { "compression_loss": 0.0, "distillation_loss": 0.490497887134552, "epoch": 3.55, "learning_rate": 1.3660524815685188e-05, "loss": 0.4955, "step": 9810, "task_loss": 0.5708312392234802 }, { "compression_loss": 0.0, "distillation_loss": 0.5023337602615356, "epoch": 3.55, "learning_rate": 1.359758420196249e-05, "loss": 0.4816, "step": 9820, "task_loss": 0.570620059967041 }, { "compression_loss": 0.0, "distillation_loss": 0.5660896301269531, "epoch": 3.55, "learning_rate": 1.3534753946292586e-05, "loss": 0.4025, "step": 9830, "task_loss": 0.5451761484146118 }, { "compression_loss": 0.0, "distillation_loss": 0.3981624245643616, "epoch": 3.56, "learning_rate": 1.347203437264936e-05, "loss": 0.4706, "step": 9840, "task_loss": 0.7182565927505493 }, { "compression_loss": 0.0, "distillation_loss": 0.5007762908935547, "epoch": 3.56, "learning_rate": 1.3409425804436078e-05, "loss": 0.4933, "step": 9850, "task_loss": 0.520271360874176 }, { "compression_loss": 0.0, "distillation_loss": 0.36946970224380493, "epoch": 3.56, "learning_rate": 1.3346928564483535e-05, "loss": 0.477, "step": 9860, "task_loss": 0.9104056358337402 }, { "compression_loss": 0.0, "distillation_loss": 0.4460700750350952, "epoch": 3.57, "learning_rate": 1.3284542975048519e-05, "loss": 0.4966, "step": 9870, "task_loss": 0.5107903480529785 }, { "compression_loss": 0.0, "distillation_loss": 0.40421903133392334, "epoch": 3.57, "learning_rate": 1.3222269357812115e-05, "loss": 0.4479, "step": 9880, "task_loss": 0.3044702708721161 }, { "compression_loss": 0.0, "distillation_loss": 0.37104153633117676, "epoch": 3.57, "learning_rate": 1.3160108033878046e-05, "loss": 0.3931, "step": 9890, "task_loss": 0.5892044305801392 }, { "compression_loss": 0.0, "distillation_loss": 0.572177529335022, "epoch": 3.58, "learning_rate": 1.3098059323770975e-05, "loss": 0.4358, "step": 9900, "task_loss": 0.8855876922607422 }, { "compression_loss": 0.0, "distillation_loss": 0.4747053384780884, "epoch": 3.58, "learning_rate": 1.3036123547434927e-05, "loss": 0.4494, "step": 9910, "task_loss": 0.758881688117981 }, { "compression_loss": 0.0, "distillation_loss": 0.4006056785583496, "epoch": 3.59, "learning_rate": 1.2974301024231595e-05, "loss": 0.4463, "step": 9920, "task_loss": 0.4315032362937927 }, { "compression_loss": 0.0, "distillation_loss": 0.41052359342575073, "epoch": 3.59, "learning_rate": 1.2912592072938709e-05, "loss": 0.4069, "step": 9930, "task_loss": 0.3629281520843506 }, { "compression_loss": 0.0, "distillation_loss": 0.37371566891670227, "epoch": 3.59, "learning_rate": 1.2850997011748333e-05, "loss": 0.4324, "step": 9940, "task_loss": 0.8833544254302979 }, { "compression_loss": 0.0, "distillation_loss": 0.5907098650932312, "epoch": 3.6, "learning_rate": 1.2789516158265369e-05, "loss": 0.4559, "step": 9950, "task_loss": 0.8144785165786743 }, { "compression_loss": 0.0, "distillation_loss": 0.4753618538379669, "epoch": 3.6, "learning_rate": 1.272814982950573e-05, "loss": 0.4728, "step": 9960, "task_loss": 0.9171135425567627 }, { "compression_loss": 0.0, "distillation_loss": 0.3954315781593323, "epoch": 3.6, "learning_rate": 1.2666898341894853e-05, "loss": 0.4018, "step": 9970, "task_loss": 0.590205192565918 }, { "compression_loss": 0.0, "distillation_loss": 0.3371639549732208, "epoch": 3.61, "learning_rate": 1.2605762011266012e-05, "loss": 0.4689, "step": 9980, "task_loss": 0.644167423248291 }, { "compression_loss": 0.0, "distillation_loss": 0.35054704546928406, "epoch": 3.61, "learning_rate": 1.2544741152858692e-05, "loss": 0.4749, "step": 9990, "task_loss": 0.4521157145500183 }, { "compression_loss": 0.0, "distillation_loss": 0.45311444997787476, "epoch": 3.61, "learning_rate": 1.248383608131694e-05, "loss": 0.4068, "step": 10000, "task_loss": 0.718034029006958 }, { "epoch": 3.61, "eval_exact_match": 82.56385998107852, "eval_f1": 89.63155210704089, "step": 10000 }, { "compression_loss": 0.0, "distillation_loss": 0.2593729794025421, "epoch": 3.62, "learning_rate": 1.2423047110687794e-05, "loss": 0.428, "step": 10010, "task_loss": 0.4959852695465088 }, { "compression_loss": 0.0, "distillation_loss": 0.5165232419967651, "epoch": 3.62, "learning_rate": 1.2362374554419625e-05, "loss": 0.4786, "step": 10020, "task_loss": 0.510176420211792 }, { "compression_loss": 0.0, "distillation_loss": 0.4051707088947296, "epoch": 3.62, "learning_rate": 1.2301818725360548e-05, "loss": 0.4223, "step": 10030, "task_loss": 0.7712464332580566 }, { "compression_loss": 0.0, "distillation_loss": 0.44198185205459595, "epoch": 3.63, "learning_rate": 1.2241379935756749e-05, "loss": 0.4015, "step": 10040, "task_loss": 0.7498027086257935 }, { "compression_loss": 0.0, "distillation_loss": 0.4870222210884094, "epoch": 3.63, "learning_rate": 1.2181058497250998e-05, "loss": 0.4714, "step": 10050, "task_loss": 0.29204386472702026 }, { "compression_loss": 0.0, "distillation_loss": 0.3149113059043884, "epoch": 3.64, "learning_rate": 1.2120854720880877e-05, "loss": 0.4304, "step": 10060, "task_loss": 0.31059128046035767 }, { "compression_loss": 0.0, "distillation_loss": 0.40889161825180054, "epoch": 3.64, "learning_rate": 1.206076891707731e-05, "loss": 0.4754, "step": 10070, "task_loss": 0.5112392902374268 }, { "compression_loss": 0.0, "distillation_loss": 0.40656381845474243, "epoch": 3.64, "learning_rate": 1.2000801395662918e-05, "loss": 0.4317, "step": 10080, "task_loss": 0.867332398891449 }, { "compression_loss": 0.0, "distillation_loss": 0.32227474451065063, "epoch": 3.65, "learning_rate": 1.1940952465850405e-05, "loss": 0.4197, "step": 10090, "task_loss": 0.4937788248062134 }, { "compression_loss": 0.0, "distillation_loss": 0.40178176760673523, "epoch": 3.65, "learning_rate": 1.1881222436240966e-05, "loss": 0.4706, "step": 10100, "task_loss": 0.5314730405807495 }, { "compression_loss": 0.0, "distillation_loss": 0.3422333598136902, "epoch": 3.65, "learning_rate": 1.1821611614822724e-05, "loss": 0.4499, "step": 10110, "task_loss": 0.6373083591461182 }, { "compression_loss": 0.0, "distillation_loss": 0.4214498996734619, "epoch": 3.66, "learning_rate": 1.176212030896912e-05, "loss": 0.4645, "step": 10120, "task_loss": 0.6125006079673767 }, { "compression_loss": 0.0, "distillation_loss": 0.4471798539161682, "epoch": 3.66, "learning_rate": 1.1702748825437348e-05, "loss": 0.4203, "step": 10130, "task_loss": 0.8206405639648438 }, { "compression_loss": 0.0, "distillation_loss": 0.5667499303817749, "epoch": 3.66, "learning_rate": 1.16434974703667e-05, "loss": 0.4428, "step": 10140, "task_loss": 0.7912921905517578 }, { "compression_loss": 0.0, "distillation_loss": 0.44536837935447693, "epoch": 3.67, "learning_rate": 1.1584366549277138e-05, "loss": 0.4994, "step": 10150, "task_loss": 0.7000430226325989 }, { "compression_loss": 0.0, "distillation_loss": 0.43894514441490173, "epoch": 3.67, "learning_rate": 1.1525356367067541e-05, "loss": 0.4713, "step": 10160, "task_loss": 0.7236070036888123 }, { "compression_loss": 0.0, "distillation_loss": 0.4418973922729492, "epoch": 3.68, "learning_rate": 1.1466467228014262e-05, "loss": 0.4531, "step": 10170, "task_loss": 1.1132642030715942 }, { "compression_loss": 0.0, "distillation_loss": 0.47217002511024475, "epoch": 3.68, "learning_rate": 1.14076994357695e-05, "loss": 0.457, "step": 10180, "task_loss": 0.7230472564697266 }, { "compression_loss": 0.0, "distillation_loss": 0.3841246962547302, "epoch": 3.68, "learning_rate": 1.134905329335976e-05, "loss": 0.5203, "step": 10190, "task_loss": 0.5335608720779419 }, { "compression_loss": 0.0, "distillation_loss": 0.45422253012657166, "epoch": 3.69, "learning_rate": 1.1290529103184282e-05, "loss": 0.4765, "step": 10200, "task_loss": 0.7611352801322937 }, { "compression_loss": 0.0, "distillation_loss": 0.511451005935669, "epoch": 3.69, "learning_rate": 1.1232127167013436e-05, "loss": 0.4526, "step": 10210, "task_loss": 0.935626208782196 }, { "compression_loss": 0.0, "distillation_loss": 0.4894786477088928, "epoch": 3.69, "learning_rate": 1.1173847785987288e-05, "loss": 0.4566, "step": 10220, "task_loss": 0.316392183303833 }, { "compression_loss": 0.0, "distillation_loss": 0.56304931640625, "epoch": 3.7, "learning_rate": 1.1115691260613889e-05, "loss": 0.4464, "step": 10230, "task_loss": 0.7091505527496338 }, { "compression_loss": 0.0, "distillation_loss": 0.3679378628730774, "epoch": 3.7, "learning_rate": 1.1057657890767854e-05, "loss": 0.4543, "step": 10240, "task_loss": 0.48593321442604065 }, { "compression_loss": 0.0, "distillation_loss": 0.44522807002067566, "epoch": 3.7, "learning_rate": 1.0999747975688754e-05, "loss": 0.4578, "step": 10250, "task_loss": 0.6225043535232544 }, { "epoch": 3.7, "eval_exact_match": 82.55439924314096, "eval_f1": 89.67409457937354, "step": 10250 }, { "compression_loss": 0.0, "distillation_loss": 0.40633970499038696, "epoch": 3.71, "learning_rate": 1.094196181397959e-05, "loss": 0.4626, "step": 10260, "task_loss": 0.4013146758079529 }, { "compression_loss": 0.0, "distillation_loss": 0.39074042439460754, "epoch": 3.71, "learning_rate": 1.0884299703605228e-05, "loss": 0.4468, "step": 10270, "task_loss": 0.5073047876358032 }, { "compression_loss": 0.0, "distillation_loss": 0.48598477244377136, "epoch": 3.72, "learning_rate": 1.0826761941890906e-05, "loss": 0.4369, "step": 10280, "task_loss": 0.933097243309021 }, { "compression_loss": 0.0, "distillation_loss": 0.5473797917366028, "epoch": 3.72, "learning_rate": 1.0769348825520669e-05, "loss": 0.4574, "step": 10290, "task_loss": 1.121228575706482 }, { "compression_loss": 0.0, "distillation_loss": 0.34773969650268555, "epoch": 3.72, "learning_rate": 1.0712060650535867e-05, "loss": 0.4501, "step": 10300, "task_loss": 0.27765679359436035 }, { "compression_loss": 0.0, "distillation_loss": 0.421405553817749, "epoch": 3.73, "learning_rate": 1.0654897712333557e-05, "loss": 0.5029, "step": 10310, "task_loss": 0.7494446039199829 }, { "compression_loss": 0.0, "distillation_loss": 0.5293338298797607, "epoch": 3.73, "learning_rate": 1.059786030566512e-05, "loss": 0.4723, "step": 10320, "task_loss": 1.1732032299041748 }, { "compression_loss": 0.0, "distillation_loss": 0.3712977468967438, "epoch": 3.73, "learning_rate": 1.0540948724634564e-05, "loss": 0.4501, "step": 10330, "task_loss": 0.8228405714035034 }, { "compression_loss": 0.0, "distillation_loss": 0.43470335006713867, "epoch": 3.74, "learning_rate": 1.0484163262697156e-05, "loss": 0.4082, "step": 10340, "task_loss": 0.4585592746734619 }, { "compression_loss": 0.0, "distillation_loss": 0.34602972865104675, "epoch": 3.74, "learning_rate": 1.042750421265783e-05, "loss": 0.4309, "step": 10350, "task_loss": 0.63847815990448 }, { "compression_loss": 0.0, "distillation_loss": 0.3850482702255249, "epoch": 3.74, "learning_rate": 1.0370971866669712e-05, "loss": 0.4563, "step": 10360, "task_loss": 0.3458658456802368 }, { "compression_loss": 0.0, "distillation_loss": 0.4272826910018921, "epoch": 3.75, "learning_rate": 1.0314566516232566e-05, "loss": 0.4322, "step": 10370, "task_loss": 0.5228935480117798 }, { "compression_loss": 0.0, "distillation_loss": 0.42405521869659424, "epoch": 3.75, "learning_rate": 1.0263910522432138e-05, "loss": 0.4398, "step": 10380, "task_loss": 0.7290164232254028 }, { "compression_loss": 0.0, "distillation_loss": 0.4163869023323059, "epoch": 3.75, "learning_rate": 1.020774726427728e-05, "loss": 0.4492, "step": 10390, "task_loss": 0.8367542028427124 }, { "compression_loss": 0.0, "distillation_loss": 0.6735716462135315, "epoch": 3.76, "learning_rate": 1.0151711843314342e-05, "loss": 0.4848, "step": 10400, "task_loss": 0.6577698588371277 }, { "compression_loss": 0.0, "distillation_loss": 0.40833598375320435, "epoch": 3.76, "learning_rate": 1.0095804548480798e-05, "loss": 0.4422, "step": 10410, "task_loss": 0.502683162689209 }, { "compression_loss": 0.0, "distillation_loss": 0.3372454047203064, "epoch": 3.77, "learning_rate": 1.004002566805347e-05, "loss": 0.4176, "step": 10420, "task_loss": 0.6605957746505737 }, { "compression_loss": 0.0, "distillation_loss": 0.4468865990638733, "epoch": 3.77, "learning_rate": 9.984375489647e-06, "loss": 0.443, "step": 10430, "task_loss": 0.3105127215385437 }, { "compression_loss": 0.0, "distillation_loss": 0.40403956174850464, "epoch": 3.77, "learning_rate": 9.92885430021243e-06, "loss": 0.5085, "step": 10440, "task_loss": 0.5763041973114014 }, { "compression_loss": 0.0, "distillation_loss": 0.35261040925979614, "epoch": 3.78, "learning_rate": 9.87346238603569e-06, "loss": 0.4012, "step": 10450, "task_loss": 0.4983939826488495 }, { "compression_loss": 0.0, "distillation_loss": 0.43633800745010376, "epoch": 3.78, "learning_rate": 9.81820003273612e-06, "loss": 0.4526, "step": 10460, "task_loss": 0.5212877988815308 }, { "compression_loss": 0.0, "distillation_loss": 0.4117966294288635, "epoch": 3.78, "learning_rate": 9.763067525264964e-06, "loss": 0.4783, "step": 10470, "task_loss": 0.40295490622520447 }, { "compression_loss": 0.0, "distillation_loss": 0.3620610237121582, "epoch": 3.79, "learning_rate": 9.708065147904013e-06, "loss": 0.4127, "step": 10480, "task_loss": 0.4528779983520508 }, { "compression_loss": 0.0, "distillation_loss": 0.4549371302127838, "epoch": 3.79, "learning_rate": 9.653193184263991e-06, "loss": 0.4901, "step": 10490, "task_loss": 0.4713435769081116 }, { "compression_loss": 0.0, "distillation_loss": 0.4598999619483948, "epoch": 3.79, "learning_rate": 9.598451917283206e-06, "loss": 0.4689, "step": 10500, "task_loss": 0.7843847274780273 }, { "epoch": 3.79, "eval_exact_match": 82.639545884579, "eval_f1": 89.74238716713347, "step": 10500 }, { "compression_loss": 0.0, "distillation_loss": 0.4294615387916565, "epoch": 3.8, "learning_rate": 9.543841629226033e-06, "loss": 0.4288, "step": 10510, "task_loss": 0.38632312417030334 }, { "compression_loss": 0.0, "distillation_loss": 0.3702619969844818, "epoch": 3.8, "learning_rate": 9.489362601681498e-06, "loss": 0.4508, "step": 10520, "task_loss": 0.6389187574386597 }, { "compression_loss": 0.0, "distillation_loss": 0.5605703592300415, "epoch": 3.81, "learning_rate": 9.435015115561758e-06, "loss": 0.4653, "step": 10530, "task_loss": 0.8252593278884888 }, { "compression_loss": 0.0, "distillation_loss": 0.29313477873802185, "epoch": 3.81, "learning_rate": 9.380799451100741e-06, "loss": 0.5044, "step": 10540, "task_loss": 0.4142262637615204 }, { "compression_loss": 0.0, "distillation_loss": 0.36696240305900574, "epoch": 3.81, "learning_rate": 9.326715887852645e-06, "loss": 0.4497, "step": 10550, "task_loss": 0.4621296525001526 }, { "compression_loss": 0.0, "distillation_loss": 0.5388485193252563, "epoch": 3.82, "learning_rate": 9.272764704690518e-06, "loss": 0.4476, "step": 10560, "task_loss": 0.5078322291374207 }, { "compression_loss": 0.0, "distillation_loss": 0.38809970021247864, "epoch": 3.82, "learning_rate": 9.218946179804766e-06, "loss": 0.4435, "step": 10570, "task_loss": 0.4280998706817627 }, { "compression_loss": 0.0, "distillation_loss": 0.5328583717346191, "epoch": 3.82, "learning_rate": 9.16526059070185e-06, "loss": 0.4354, "step": 10580, "task_loss": 0.7839690446853638 }, { "compression_loss": 0.0, "distillation_loss": 0.41611891984939575, "epoch": 3.83, "learning_rate": 9.11170821420269e-06, "loss": 0.5341, "step": 10590, "task_loss": 0.4555775821208954 }, { "compression_loss": 0.0, "distillation_loss": 0.4526450037956238, "epoch": 3.83, "learning_rate": 9.058289326441349e-06, "loss": 0.442, "step": 10600, "task_loss": 0.5670619606971741 }, { "compression_loss": 0.0, "distillation_loss": 0.4477660357952118, "epoch": 3.83, "learning_rate": 9.005004202863581e-06, "loss": 0.4255, "step": 10610, "task_loss": 1.0125763416290283 }, { "compression_loss": 0.0, "distillation_loss": 0.5518783330917358, "epoch": 3.84, "learning_rate": 8.951853118225413e-06, "loss": 0.5069, "step": 10620, "task_loss": 0.4689984619617462 }, { "compression_loss": 0.0, "distillation_loss": 0.39925044775009155, "epoch": 3.84, "learning_rate": 8.898836346591686e-06, "loss": 0.4361, "step": 10630, "task_loss": 0.7678056955337524 }, { "compression_loss": 0.0, "distillation_loss": 0.40966880321502686, "epoch": 3.85, "learning_rate": 8.845954161334712e-06, "loss": 0.4182, "step": 10640, "task_loss": 0.4853663742542267 }, { "compression_loss": 0.0, "distillation_loss": 0.5304992198944092, "epoch": 3.85, "learning_rate": 8.793206835132823e-06, "loss": 0.4547, "step": 10650, "task_loss": 0.6776725053787231 }, { "compression_loss": 0.0, "distillation_loss": 0.3908195495605469, "epoch": 3.85, "learning_rate": 8.74059463996898e-06, "loss": 0.4477, "step": 10660, "task_loss": 0.4654383063316345 }, { "compression_loss": 0.0, "distillation_loss": 0.42154234647750854, "epoch": 3.86, "learning_rate": 8.688117847129323e-06, "loss": 0.4834, "step": 10670, "task_loss": 0.6709318161010742 }, { "compression_loss": 0.0, "distillation_loss": 0.4247440695762634, "epoch": 3.86, "learning_rate": 8.635776727201879e-06, "loss": 0.4722, "step": 10680, "task_loss": 0.5344759225845337 }, { "compression_loss": 0.0, "distillation_loss": 0.3206929564476013, "epoch": 3.86, "learning_rate": 8.583571550075038e-06, "loss": 0.4014, "step": 10690, "task_loss": 0.5088073015213013 }, { "compression_loss": 0.0, "distillation_loss": 0.3521346151828766, "epoch": 3.87, "learning_rate": 8.531502584936257e-06, "loss": 0.4669, "step": 10700, "task_loss": 0.546211838722229 }, { "compression_loss": 0.0, "distillation_loss": 0.43378379940986633, "epoch": 3.87, "learning_rate": 8.479570100270628e-06, "loss": 0.4134, "step": 10710, "task_loss": 0.3153116703033447 }, { "compression_loss": 0.0, "distillation_loss": 0.3812393844127655, "epoch": 3.87, "learning_rate": 8.427774363859511e-06, "loss": 0.4282, "step": 10720, "task_loss": 0.4867439568042755 }, { "compression_loss": 0.0, "distillation_loss": 0.47613561153411865, "epoch": 3.88, "learning_rate": 8.37611564277913e-06, "loss": 0.4332, "step": 10730, "task_loss": 0.5595312714576721 }, { "compression_loss": 0.0, "distillation_loss": 0.384829044342041, "epoch": 3.88, "learning_rate": 8.324594203399223e-06, "loss": 0.4575, "step": 10740, "task_loss": 0.8852812051773071 }, { "compression_loss": 0.0, "distillation_loss": 0.5001972913742065, "epoch": 3.89, "learning_rate": 8.273210311381659e-06, "loss": 0.4724, "step": 10750, "task_loss": 0.8678057193756104 }, { "epoch": 3.89, "eval_exact_match": 82.64900662251655, "eval_f1": 89.66552807530321, "step": 10750 }, { "compression_loss": 0.0, "distillation_loss": 0.5500179529190063, "epoch": 3.89, "learning_rate": 8.221964231679075e-06, "loss": 0.4497, "step": 10760, "task_loss": 0.6250838041305542 }, { "compression_loss": 0.0, "distillation_loss": 0.3807331919670105, "epoch": 3.89, "learning_rate": 8.170856228533461e-06, "loss": 0.4349, "step": 10770, "task_loss": 0.48087456822395325 }, { "compression_loss": 0.0, "distillation_loss": 0.4343903958797455, "epoch": 3.9, "learning_rate": 8.119886565474906e-06, "loss": 0.438, "step": 10780, "task_loss": 0.7211394309997559 }, { "compression_loss": 0.0, "distillation_loss": 0.291828989982605, "epoch": 3.9, "learning_rate": 8.069055505320102e-06, "loss": 0.4236, "step": 10790, "task_loss": 0.4594219923019409 }, { "compression_loss": 0.0, "distillation_loss": 0.4952172338962555, "epoch": 3.9, "learning_rate": 8.018363310171097e-06, "loss": 0.4617, "step": 10800, "task_loss": 0.6142617464065552 }, { "compression_loss": 0.0, "distillation_loss": 0.3711337447166443, "epoch": 3.91, "learning_rate": 7.9678102414139e-06, "loss": 0.4719, "step": 10810, "task_loss": 0.5707615613937378 }, { "compression_loss": 0.0, "distillation_loss": 0.34765708446502686, "epoch": 3.91, "learning_rate": 7.917396559717134e-06, "loss": 0.4705, "step": 10820, "task_loss": 0.5658267140388489 }, { "compression_loss": 0.0, "distillation_loss": 0.3544590473175049, "epoch": 3.91, "learning_rate": 7.867122525030685e-06, "loss": 0.4342, "step": 10830, "task_loss": 1.0905160903930664 }, { "compression_loss": 0.0, "distillation_loss": 0.44853660464286804, "epoch": 3.92, "learning_rate": 7.816988396584382e-06, "loss": 0.4501, "step": 10840, "task_loss": 0.6044590473175049 }, { "compression_loss": 0.0, "distillation_loss": 0.4930655360221863, "epoch": 3.92, "learning_rate": 7.766994432886658e-06, "loss": 0.4838, "step": 10850, "task_loss": 0.8701499104499817 }, { "compression_loss": 0.0, "distillation_loss": 0.3618244528770447, "epoch": 3.92, "learning_rate": 7.7171408917232e-06, "loss": 0.4294, "step": 10860, "task_loss": 0.5858584046363831 }, { "compression_loss": 0.0, "distillation_loss": 0.5228819847106934, "epoch": 3.93, "learning_rate": 7.667428030155615e-06, "loss": 0.4462, "step": 10870, "task_loss": 0.40236058831214905 }, { "compression_loss": 0.0, "distillation_loss": 0.39159971475601196, "epoch": 3.93, "learning_rate": 7.61785610452016e-06, "loss": 0.468, "step": 10880, "task_loss": 0.874620795249939 }, { "compression_loss": 0.0, "distillation_loss": 0.437158465385437, "epoch": 3.94, "learning_rate": 7.568425370426332e-06, "loss": 0.4786, "step": 10890, "task_loss": 0.7076048254966736 }, { "compression_loss": 0.0, "distillation_loss": 0.4604962170124054, "epoch": 3.94, "learning_rate": 7.5191360827556245e-06, "loss": 0.4425, "step": 10900, "task_loss": 0.425723135471344 }, { "compression_loss": 0.0, "distillation_loss": 0.40846937894821167, "epoch": 3.94, "learning_rate": 7.4699884956601825e-06, "loss": 0.4462, "step": 10910, "task_loss": 0.376265287399292 }, { "compression_loss": 0.0, "distillation_loss": 0.5597336888313293, "epoch": 3.95, "learning_rate": 7.420982862561493e-06, "loss": 0.4404, "step": 10920, "task_loss": 0.7780491709709167 }, { "compression_loss": 0.0, "distillation_loss": 0.3314722776412964, "epoch": 3.95, "learning_rate": 7.372119436149068e-06, "loss": 0.4198, "step": 10930, "task_loss": 0.6404004096984863 }, { "compression_loss": 0.0, "distillation_loss": 0.41364434361457825, "epoch": 3.95, "learning_rate": 7.323398468379176e-06, "loss": 0.446, "step": 10940, "task_loss": 0.7026541829109192 }, { "compression_loss": 0.0, "distillation_loss": 0.512392520904541, "epoch": 3.96, "learning_rate": 7.2748202104735e-06, "loss": 0.4714, "step": 10950, "task_loss": 0.7072031497955322 }, { "compression_loss": 0.0, "distillation_loss": 0.2855644226074219, "epoch": 3.96, "learning_rate": 7.226384912917893e-06, "loss": 0.3969, "step": 10960, "task_loss": 0.496487557888031 }, { "compression_loss": 0.0, "distillation_loss": 0.5489861965179443, "epoch": 3.96, "learning_rate": 7.178092825461004e-06, "loss": 0.4441, "step": 10970, "task_loss": 0.6558412313461304 }, { "compression_loss": 0.0, "distillation_loss": 0.3471415042877197, "epoch": 3.97, "learning_rate": 7.129944197113112e-06, "loss": 0.4202, "step": 10980, "task_loss": 0.5004458427429199 }, { "compression_loss": 0.0, "distillation_loss": 0.5401880741119385, "epoch": 3.97, "learning_rate": 7.081939276144708e-06, "loss": 0.4106, "step": 10990, "task_loss": 0.4734864830970764 }, { "compression_loss": 0.0, "distillation_loss": 0.49857717752456665, "epoch": 3.98, "learning_rate": 7.034078310085315e-06, "loss": 0.4135, "step": 11000, "task_loss": 0.8906891942024231 }, { "epoch": 3.98, "eval_exact_match": 82.34626300851467, "eval_f1": 89.59125330964915, "step": 11000 }, { "compression_loss": 0.0, "distillation_loss": 0.38503700494766235, "epoch": 3.98, "learning_rate": 6.986361545722173e-06, "loss": 0.4515, "step": 11010, "task_loss": 0.42232775688171387 }, { "compression_loss": 0.0, "distillation_loss": 0.322620689868927, "epoch": 3.98, "learning_rate": 6.938789229098967e-06, "loss": 0.4919, "step": 11020, "task_loss": 0.25172334909439087 }, { "compression_loss": 0.0, "distillation_loss": 0.5034948587417603, "epoch": 3.99, "learning_rate": 6.8913616055145456e-06, "loss": 0.4706, "step": 11030, "task_loss": 0.6424163579940796 }, { "compression_loss": 0.0, "distillation_loss": 0.4091174602508545, "epoch": 3.99, "learning_rate": 6.8440789195216915e-06, "loss": 0.4358, "step": 11040, "task_loss": 0.3128480315208435 }, { "compression_loss": 0.0, "distillation_loss": 0.43555712699890137, "epoch": 3.99, "learning_rate": 6.796941414925831e-06, "loss": 0.411, "step": 11050, "task_loss": 0.4940514862537384 }, { "compression_loss": 0.0, "distillation_loss": 0.4541236162185669, "epoch": 4.0, "learning_rate": 6.7499493347838e-06, "loss": 0.395, "step": 11060, "task_loss": 0.4762210249900818 }, { "compression_loss": 0.0, "distillation_loss": 0.35681986808776855, "epoch": 4.0, "learning_rate": 6.703102921402538e-06, "loss": 0.4215, "step": 11070, "task_loss": 0.577312171459198 }, { "compression_loss": 0.0, "distillation_loss": 0.5038410425186157, "epoch": 4.0, "learning_rate": 6.656402416337944e-06, "loss": 0.4241, "step": 11080, "task_loss": 0.5972139835357666 }, { "compression_loss": 0.0, "distillation_loss": 0.3108671307563782, "epoch": 4.01, "learning_rate": 6.609848060393494e-06, "loss": 0.3705, "step": 11090, "task_loss": 0.5614968538284302 }, { "compression_loss": 0.0, "distillation_loss": 0.5272907614707947, "epoch": 4.01, "learning_rate": 6.563440093619115e-06, "loss": 0.4174, "step": 11100, "task_loss": 1.0142651796340942 }, { "compression_loss": 0.0, "distillation_loss": 0.4231366515159607, "epoch": 4.02, "learning_rate": 6.5171787553098855e-06, "loss": 0.3767, "step": 11110, "task_loss": 0.5615295171737671 }, { "compression_loss": 0.0, "distillation_loss": 0.37049567699432373, "epoch": 4.02, "learning_rate": 6.471064284004835e-06, "loss": 0.4511, "step": 11120, "task_loss": 0.5118829011917114 }, { "compression_loss": 0.0, "distillation_loss": 0.4037175476551056, "epoch": 4.02, "learning_rate": 6.4250969174856635e-06, "loss": 0.4546, "step": 11130, "task_loss": 0.40569519996643066 }, { "compression_loss": 0.0, "distillation_loss": 0.43485087156295776, "epoch": 4.03, "learning_rate": 6.379276892775579e-06, "loss": 0.4269, "step": 11140, "task_loss": 0.9028794169425964 }, { "compression_loss": 0.0, "distillation_loss": 0.3825688362121582, "epoch": 4.03, "learning_rate": 6.3336044461380385e-06, "loss": 0.3916, "step": 11150, "task_loss": 0.5466604232788086 }, { "compression_loss": 0.0, "distillation_loss": 0.3695110082626343, "epoch": 4.03, "learning_rate": 6.288079813075543e-06, "loss": 0.4324, "step": 11160, "task_loss": 0.3143315315246582 }, { "compression_loss": 0.0, "distillation_loss": 0.3524971008300781, "epoch": 4.04, "learning_rate": 6.242703228328384e-06, "loss": 0.3955, "step": 11170, "task_loss": 0.49497929215431213 }, { "compression_loss": 0.0, "distillation_loss": 0.5673324465751648, "epoch": 4.04, "learning_rate": 6.197474925873525e-06, "loss": 0.4068, "step": 11180, "task_loss": 0.5235052704811096 }, { "compression_loss": 0.0, "distillation_loss": 0.39023974537849426, "epoch": 4.04, "learning_rate": 6.1523951389232715e-06, "loss": 0.4131, "step": 11190, "task_loss": 0.7273709177970886 }, { "compression_loss": 0.0, "distillation_loss": 0.544334352016449, "epoch": 4.05, "learning_rate": 6.107464099924179e-06, "loss": 0.4283, "step": 11200, "task_loss": 0.5451278686523438 }, { "compression_loss": 0.0, "distillation_loss": 0.4094650149345398, "epoch": 4.05, "learning_rate": 6.062682040555794e-06, "loss": 0.3935, "step": 11210, "task_loss": 0.6262540817260742 }, { "compression_loss": 0.0, "distillation_loss": 0.3312651813030243, "epoch": 4.05, "learning_rate": 6.018049191729475e-06, "loss": 0.4116, "step": 11220, "task_loss": 0.5048397183418274 }, { "compression_loss": 0.0, "distillation_loss": 0.37144196033477783, "epoch": 4.06, "learning_rate": 5.9735657835871894e-06, "loss": 0.387, "step": 11230, "task_loss": 0.6016961336135864 }, { "compression_loss": 0.0, "distillation_loss": 0.38478124141693115, "epoch": 4.06, "learning_rate": 5.929232045500351e-06, "loss": 0.4095, "step": 11240, "task_loss": 0.7590740919113159 }, { "compression_loss": 0.0, "distillation_loss": 0.3207855820655823, "epoch": 4.07, "learning_rate": 5.885048206068627e-06, "loss": 0.4483, "step": 11250, "task_loss": 0.3834182024002075 }, { "epoch": 4.07, "eval_exact_match": 82.45033112582782, "eval_f1": 89.61437887010287, "step": 11250 }, { "compression_loss": 0.0, "distillation_loss": 0.33126235008239746, "epoch": 4.07, "learning_rate": 5.841014493118753e-06, "loss": 0.373, "step": 11260, "task_loss": 0.4069299101829529 }, { "compression_loss": 0.0, "distillation_loss": 0.32440701127052307, "epoch": 4.07, "learning_rate": 5.797131133703335e-06, "loss": 0.4187, "step": 11270, "task_loss": 0.3777425289154053 }, { "compression_loss": 0.0, "distillation_loss": 0.38386988639831543, "epoch": 4.08, "learning_rate": 5.753398354099773e-06, "loss": 0.4056, "step": 11280, "task_loss": 0.4748387038707733 }, { "compression_loss": 0.0, "distillation_loss": 0.40675050020217896, "epoch": 4.08, "learning_rate": 5.7098163798089505e-06, "loss": 0.4438, "step": 11290, "task_loss": 0.5782796144485474 }, { "compression_loss": 0.0, "distillation_loss": 0.41655170917510986, "epoch": 4.08, "learning_rate": 5.666385435554192e-06, "loss": 0.3852, "step": 11300, "task_loss": 0.415513813495636 }, { "compression_loss": 0.0, "distillation_loss": 0.3474239706993103, "epoch": 4.09, "learning_rate": 5.623105745280054e-06, "loss": 0.4068, "step": 11310, "task_loss": 0.7074331045150757 }, { "compression_loss": 0.0, "distillation_loss": 0.49055156111717224, "epoch": 4.09, "learning_rate": 5.5799775321511795e-06, "loss": 0.419, "step": 11320, "task_loss": 0.5741523504257202 }, { "compression_loss": 0.0, "distillation_loss": 0.35831817984580994, "epoch": 4.09, "learning_rate": 5.53700101855112e-06, "loss": 0.3905, "step": 11330, "task_loss": 0.35589608550071716 }, { "compression_loss": 0.0, "distillation_loss": 0.3777387738227844, "epoch": 4.1, "learning_rate": 5.494176426081241e-06, "loss": 0.3921, "step": 11340, "task_loss": 0.5492821931838989 }, { "compression_loss": 0.0, "distillation_loss": 0.5529429316520691, "epoch": 4.1, "learning_rate": 5.45150397555954e-06, "loss": 0.4691, "step": 11350, "task_loss": 0.39025944471359253 }, { "compression_loss": 0.0, "distillation_loss": 0.40030044317245483, "epoch": 4.11, "learning_rate": 5.4089838870195325e-06, "loss": 0.4313, "step": 11360, "task_loss": 0.5147404670715332 }, { "compression_loss": 0.0, "distillation_loss": 0.4512081742286682, "epoch": 4.11, "learning_rate": 5.366616379709066e-06, "loss": 0.4517, "step": 11370, "task_loss": 0.6353756189346313 }, { "compression_loss": 0.0, "distillation_loss": 0.40005961060523987, "epoch": 4.11, "learning_rate": 5.324401672089289e-06, "loss": 0.4823, "step": 11380, "task_loss": 0.8307480216026306 }, { "compression_loss": 0.0, "distillation_loss": 0.3458729386329651, "epoch": 4.12, "learning_rate": 5.282339981833406e-06, "loss": 0.4048, "step": 11390, "task_loss": 0.6542222499847412 }, { "compression_loss": 0.0, "distillation_loss": 0.6230404376983643, "epoch": 4.12, "learning_rate": 5.240431525825641e-06, "loss": 0.4144, "step": 11400, "task_loss": 0.8542134761810303 }, { "compression_loss": 0.0, "distillation_loss": 0.4207066297531128, "epoch": 4.12, "learning_rate": 5.198676520160087e-06, "loss": 0.4289, "step": 11410, "task_loss": 0.49778881669044495 }, { "compression_loss": 0.0, "distillation_loss": 0.4467724561691284, "epoch": 4.13, "learning_rate": 5.157075180139602e-06, "loss": 0.4279, "step": 11420, "task_loss": 1.0865116119384766 }, { "compression_loss": 0.0, "distillation_loss": 0.43505191802978516, "epoch": 4.13, "learning_rate": 5.1156277202746694e-06, "loss": 0.4192, "step": 11430, "task_loss": 0.5657995939254761 }, { "compression_loss": 0.0, "distillation_loss": 0.3828865885734558, "epoch": 4.13, "learning_rate": 5.074334354282335e-06, "loss": 0.3946, "step": 11440, "task_loss": 0.3974769115447998 }, { "compression_loss": 0.0, "distillation_loss": 0.44511428475379944, "epoch": 4.14, "learning_rate": 5.033195295085081e-06, "loss": 0.4205, "step": 11450, "task_loss": 0.5045269727706909 }, { "compression_loss": 0.0, "distillation_loss": 0.4921976327896118, "epoch": 4.14, "learning_rate": 4.992210754809733e-06, "loss": 0.3766, "step": 11460, "task_loss": 0.8440622091293335 }, { "compression_loss": 0.0, "distillation_loss": 0.34847381711006165, "epoch": 4.15, "learning_rate": 4.951380944786361e-06, "loss": 0.4214, "step": 11470, "task_loss": 0.4547831416130066 }, { "compression_loss": 0.0, "distillation_loss": 0.5087148547172546, "epoch": 4.15, "learning_rate": 4.910706075547198e-06, "loss": 0.4062, "step": 11480, "task_loss": 0.9185361862182617 }, { "compression_loss": 0.0, "distillation_loss": 0.427493155002594, "epoch": 4.15, "learning_rate": 4.8701863568255366e-06, "loss": 0.4216, "step": 11490, "task_loss": 0.5520419478416443 }, { "compression_loss": 0.0, "distillation_loss": 0.5256533026695251, "epoch": 4.16, "learning_rate": 4.829821997554683e-06, "loss": 0.4155, "step": 11500, "task_loss": 0.7243201732635498 }, { "epoch": 4.16, "eval_exact_match": 82.75307473982971, "eval_f1": 89.83958785566995, "step": 11500 }, { "compression_loss": 0.0, "distillation_loss": 0.33289045095443726, "epoch": 4.16, "learning_rate": 4.789613205866839e-06, "loss": 0.4289, "step": 11510, "task_loss": 0.4773218631744385 }, { "compression_loss": 0.0, "distillation_loss": 0.5849810838699341, "epoch": 4.16, "learning_rate": 4.749560189092066e-06, "loss": 0.4359, "step": 11520, "task_loss": 1.2744464874267578 }, { "compression_loss": 0.0, "distillation_loss": 0.378299355506897, "epoch": 4.17, "learning_rate": 4.709663153757165e-06, "loss": 0.3864, "step": 11530, "task_loss": 0.4580954313278198 }, { "compression_loss": 0.0, "distillation_loss": 0.4290379285812378, "epoch": 4.17, "learning_rate": 4.669922305584701e-06, "loss": 0.4094, "step": 11540, "task_loss": 0.649642825126648 }, { "compression_loss": 0.0, "distillation_loss": 0.31375768780708313, "epoch": 4.17, "learning_rate": 4.630337849491818e-06, "loss": 0.4181, "step": 11550, "task_loss": 0.4781082272529602 }, { "compression_loss": 0.0, "distillation_loss": 0.29374831914901733, "epoch": 4.18, "learning_rate": 4.5909099895892995e-06, "loss": 0.3873, "step": 11560, "task_loss": 0.4056081771850586 }, { "compression_loss": 0.0, "distillation_loss": 0.32750147581100464, "epoch": 4.18, "learning_rate": 4.551638929180444e-06, "loss": 0.4488, "step": 11570, "task_loss": 0.5592796802520752 }, { "compression_loss": 0.0, "distillation_loss": 0.31511664390563965, "epoch": 4.19, "learning_rate": 4.512524870760054e-06, "loss": 0.3618, "step": 11580, "task_loss": 0.4754389226436615 }, { "compression_loss": 0.0, "distillation_loss": 0.3744344711303711, "epoch": 4.19, "learning_rate": 4.473568016013349e-06, "loss": 0.4034, "step": 11590, "task_loss": 0.6776789426803589 }, { "compression_loss": 0.0, "distillation_loss": 0.4056161344051361, "epoch": 4.19, "learning_rate": 4.4347685658149885e-06, "loss": 0.3945, "step": 11600, "task_loss": 0.6023022532463074 }, { "compression_loss": 0.0, "distillation_loss": 0.3327157199382782, "epoch": 4.2, "learning_rate": 4.396126720227975e-06, "loss": 0.3806, "step": 11610, "task_loss": 0.5939061641693115 }, { "compression_loss": 0.0, "distillation_loss": 0.5144981145858765, "epoch": 4.2, "learning_rate": 4.357642678502669e-06, "loss": 0.3975, "step": 11620, "task_loss": 0.972696840763092 }, { "compression_loss": 0.0, "distillation_loss": 0.46392837166786194, "epoch": 4.2, "learning_rate": 4.319316639075711e-06, "loss": 0.4097, "step": 11630, "task_loss": 0.30408644676208496 }, { "compression_loss": 0.0, "distillation_loss": 0.26904988288879395, "epoch": 4.21, "learning_rate": 4.281148799569073e-06, "loss": 0.4108, "step": 11640, "task_loss": 0.5989107489585876 }, { "compression_loss": 0.0, "distillation_loss": 0.409515380859375, "epoch": 4.21, "learning_rate": 4.243139356788951e-06, "loss": 0.4074, "step": 11650, "task_loss": 0.4942619800567627 }, { "compression_loss": 0.0, "distillation_loss": 0.3208807706832886, "epoch": 4.21, "learning_rate": 4.205288506724823e-06, "loss": 0.3768, "step": 11660, "task_loss": 0.46400171518325806 }, { "compression_loss": 0.0, "distillation_loss": 0.4018038213253021, "epoch": 4.22, "learning_rate": 4.167596444548396e-06, "loss": 0.4199, "step": 11670, "task_loss": 0.6154822111129761 }, { "compression_loss": 0.0, "distillation_loss": 0.30387967824935913, "epoch": 4.22, "learning_rate": 4.130063364612621e-06, "loss": 0.401, "step": 11680, "task_loss": 0.5835756063461304 }, { "compression_loss": 0.0, "distillation_loss": 0.5109657049179077, "epoch": 4.22, "learning_rate": 4.0926894604506725e-06, "loss": 0.4202, "step": 11690, "task_loss": 0.9396585822105408 }, { "compression_loss": 0.0, "distillation_loss": 0.3630220890045166, "epoch": 4.23, "learning_rate": 4.055474924774975e-06, "loss": 0.4453, "step": 11700, "task_loss": 1.1204054355621338 }, { "compression_loss": 0.0, "distillation_loss": 0.3808678388595581, "epoch": 4.23, "learning_rate": 4.01841994947618e-06, "loss": 0.4071, "step": 11710, "task_loss": 0.6350036859512329 }, { "compression_loss": 0.0, "distillation_loss": 0.36871522665023804, "epoch": 4.24, "learning_rate": 3.981524725622215e-06, "loss": 0.4058, "step": 11720, "task_loss": 0.48085230588912964 }, { "compression_loss": 0.0, "distillation_loss": 0.379439115524292, "epoch": 4.24, "learning_rate": 3.944789443457237e-06, "loss": 0.3727, "step": 11730, "task_loss": 0.4141382873058319 }, { "compression_loss": 0.0, "distillation_loss": 0.49233728647232056, "epoch": 4.24, "learning_rate": 3.908214292400743e-06, "loss": 0.4215, "step": 11740, "task_loss": 0.3407425284385681 }, { "compression_loss": 0.0, "distillation_loss": 0.40830057859420776, "epoch": 4.25, "learning_rate": 3.871799461046491e-06, "loss": 0.4168, "step": 11750, "task_loss": 0.7030295133590698 }, { "epoch": 4.25, "eval_exact_match": 82.67738883632923, "eval_f1": 89.83433781229459, "step": 11750 }, { "compression_loss": 0.0, "distillation_loss": 0.5997090339660645, "epoch": 4.25, "learning_rate": 3.8355451371616e-06, "loss": 0.4535, "step": 11760, "task_loss": 0.7779182195663452 }, { "compression_loss": 0.0, "distillation_loss": 0.3632296621799469, "epoch": 4.25, "learning_rate": 3.799451507685557e-06, "loss": 0.408, "step": 11770, "task_loss": 0.6163282990455627 }, { "compression_loss": 0.0, "distillation_loss": 0.3645058274269104, "epoch": 4.26, "learning_rate": 3.7635187587292616e-06, "loss": 0.4098, "step": 11780, "task_loss": 0.4656088948249817 }, { "compression_loss": 0.0, "distillation_loss": 0.3899230659008026, "epoch": 4.26, "learning_rate": 3.727747075574033e-06, "loss": 0.3789, "step": 11790, "task_loss": 0.37167468667030334 }, { "compression_loss": 0.0, "distillation_loss": 0.3266602158546448, "epoch": 4.26, "learning_rate": 3.6921366426707064e-06, "loss": 0.393, "step": 11800, "task_loss": 0.42901307344436646 }, { "compression_loss": 0.0, "distillation_loss": 0.48315972089767456, "epoch": 4.27, "learning_rate": 3.656687643638646e-06, "loss": 0.4205, "step": 11810, "task_loss": 0.883271336555481 }, { "compression_loss": 0.0, "distillation_loss": 0.3944903016090393, "epoch": 4.27, "learning_rate": 3.6214002612648187e-06, "loss": 0.4132, "step": 11820, "task_loss": 0.3511267900466919 }, { "compression_loss": 0.0, "distillation_loss": 0.3845597505569458, "epoch": 4.28, "learning_rate": 3.5862746775028274e-06, "loss": 0.3884, "step": 11830, "task_loss": 0.8081369400024414 }, { "compression_loss": 0.0, "distillation_loss": 0.52008455991745, "epoch": 4.28, "learning_rate": 3.5513110734719947e-06, "loss": 0.388, "step": 11840, "task_loss": 0.6470053195953369 }, { "compression_loss": 0.0, "distillation_loss": 0.32628077268600464, "epoch": 4.28, "learning_rate": 3.5165096294564294e-06, "loss": 0.412, "step": 11850, "task_loss": 0.5431479215621948 }, { "compression_loss": 0.0, "distillation_loss": 0.3263109624385834, "epoch": 4.29, "learning_rate": 3.481870524904063e-06, "loss": 0.4327, "step": 11860, "task_loss": 0.4655928313732147 }, { "compression_loss": 0.0, "distillation_loss": 0.33923470973968506, "epoch": 4.29, "learning_rate": 3.4473939384257713e-06, "loss": 0.4195, "step": 11870, "task_loss": 0.5043308734893799 }, { "compression_loss": 0.0, "distillation_loss": 0.3827700614929199, "epoch": 4.29, "learning_rate": 3.4130800477944353e-06, "loss": 0.4015, "step": 11880, "task_loss": 0.5053728222846985 }, { "compression_loss": 0.0, "distillation_loss": 0.418369323015213, "epoch": 4.3, "learning_rate": 3.378929029944014e-06, "loss": 0.3662, "step": 11890, "task_loss": 0.5845623016357422 }, { "compression_loss": 0.0, "distillation_loss": 0.3653680384159088, "epoch": 4.3, "learning_rate": 3.3449410609686222e-06, "loss": 0.4075, "step": 11900, "task_loss": 0.5306881070137024 }, { "compression_loss": 0.0, "distillation_loss": 0.38282740116119385, "epoch": 4.3, "learning_rate": 3.311116316121681e-06, "loss": 0.4187, "step": 11910, "task_loss": 0.5226006507873535 }, { "compression_loss": 0.0, "distillation_loss": 0.3763877749443054, "epoch": 4.31, "learning_rate": 3.27745496981493e-06, "loss": 0.374, "step": 11920, "task_loss": 0.5572165846824646 }, { "compression_loss": 0.0, "distillation_loss": 0.4219665825366974, "epoch": 4.31, "learning_rate": 3.2439571956175955e-06, "loss": 0.4161, "step": 11930, "task_loss": 0.3990279734134674 }, { "compression_loss": 0.0, "distillation_loss": 0.36016109585762024, "epoch": 4.32, "learning_rate": 3.2106231662554687e-06, "loss": 0.4118, "step": 11940, "task_loss": 0.7736810445785522 }, { "compression_loss": 0.0, "distillation_loss": 0.4748755395412445, "epoch": 4.32, "learning_rate": 3.17745305361002e-06, "loss": 0.4131, "step": 11950, "task_loss": 0.7507423758506775 }, { "compression_loss": 0.0, "distillation_loss": 0.32424837350845337, "epoch": 4.32, "learning_rate": 3.144447028717492e-06, "loss": 0.3758, "step": 11960, "task_loss": 0.3291553854942322 }, { "compression_loss": 0.0, "distillation_loss": 0.4312622547149658, "epoch": 4.33, "learning_rate": 3.1116052617680475e-06, "loss": 0.3978, "step": 11970, "task_loss": 0.6131731271743774 }, { "compression_loss": 0.0, "distillation_loss": 0.3169502019882202, "epoch": 4.33, "learning_rate": 3.078927922104888e-06, "loss": 0.3739, "step": 11980, "task_loss": 0.7729911804199219 }, { "compression_loss": 0.0, "distillation_loss": 0.5403449535369873, "epoch": 4.33, "learning_rate": 3.04641517822337e-06, "loss": 0.429, "step": 11990, "task_loss": 0.7878259420394897 }, { "compression_loss": 0.0, "distillation_loss": 0.5041810870170593, "epoch": 4.34, "learning_rate": 3.0140671977701126e-06, "loss": 0.4378, "step": 12000, "task_loss": 0.8172563314437866 }, { "epoch": 4.34, "eval_exact_match": 82.39356669820246, "eval_f1": 89.55316666275424, "step": 12000 }, { "compression_loss": 0.0, "distillation_loss": 0.45394831895828247, "epoch": 4.34, "learning_rate": 2.9818841475422074e-06, "loss": 0.4096, "step": 12010, "task_loss": 0.7580146789550781 }, { "compression_loss": 0.0, "distillation_loss": 0.30688315629959106, "epoch": 4.34, "learning_rate": 2.9498661934862625e-06, "loss": 0.4064, "step": 12020, "task_loss": 0.5445766448974609 }, { "compression_loss": 0.0, "distillation_loss": 0.324246883392334, "epoch": 4.35, "learning_rate": 2.9180135006976253e-06, "loss": 0.402, "step": 12030, "task_loss": 0.7565941214561462 }, { "compression_loss": 0.0, "distillation_loss": 0.37556836009025574, "epoch": 4.35, "learning_rate": 2.8863262334194892e-06, "loss": 0.4264, "step": 12040, "task_loss": 0.7802863121032715 }, { "compression_loss": 0.0, "distillation_loss": 0.3691438138484955, "epoch": 4.35, "learning_rate": 2.854804555042066e-06, "loss": 0.4111, "step": 12050, "task_loss": 0.4967532455921173 }, { "compression_loss": 0.0, "distillation_loss": 0.3941887617111206, "epoch": 4.36, "learning_rate": 2.823448628101714e-06, "loss": 0.4082, "step": 12060, "task_loss": 0.6636197566986084 }, { "compression_loss": 0.0, "distillation_loss": 0.3338344693183899, "epoch": 4.36, "learning_rate": 2.792258614280147e-06, "loss": 0.4258, "step": 12070, "task_loss": 0.4560825824737549 }, { "compression_loss": 0.0, "distillation_loss": 0.39394477009773254, "epoch": 4.37, "learning_rate": 2.761234674403565e-06, "loss": 0.3904, "step": 12080, "task_loss": 0.5815662145614624 }, { "compression_loss": 0.0, "distillation_loss": 0.3504999876022339, "epoch": 4.37, "learning_rate": 2.730376968441837e-06, "loss": 0.3928, "step": 12090, "task_loss": 0.5948781371116638 }, { "compression_loss": 0.0, "distillation_loss": 0.30044350028038025, "epoch": 4.37, "learning_rate": 2.699685655507652e-06, "loss": 0.4128, "step": 12100, "task_loss": 0.3113638758659363 }, { "compression_loss": 0.0, "distillation_loss": 0.4110683500766754, "epoch": 4.38, "learning_rate": 2.6691608938557673e-06, "loss": 0.4011, "step": 12110, "task_loss": 0.7891151309013367 }, { "compression_loss": 0.0, "distillation_loss": 0.39706042408943176, "epoch": 4.38, "learning_rate": 2.638802840882099e-06, "loss": 0.3894, "step": 12120, "task_loss": 0.6502903699874878 }, { "compression_loss": 0.0, "distillation_loss": 0.419207364320755, "epoch": 4.38, "learning_rate": 2.608611653122982e-06, "loss": 0.4157, "step": 12130, "task_loss": 0.9563859701156616 }, { "compression_loss": 0.0, "distillation_loss": 0.3630548119544983, "epoch": 4.39, "learning_rate": 2.5785874862543364e-06, "loss": 0.395, "step": 12140, "task_loss": 0.5200846195220947 }, { "compression_loss": 0.0, "distillation_loss": 0.4006337821483612, "epoch": 4.39, "learning_rate": 2.548730495090867e-06, "loss": 0.4044, "step": 12150, "task_loss": 0.7663620710372925 }, { "compression_loss": 0.0, "distillation_loss": 0.44687414169311523, "epoch": 4.39, "learning_rate": 2.5190408335852423e-06, "loss": 0.3852, "step": 12160, "task_loss": 0.5466241240501404 }, { "compression_loss": 0.0, "distillation_loss": 0.36376717686653137, "epoch": 4.4, "learning_rate": 2.4895186548273553e-06, "loss": 0.4024, "step": 12170, "task_loss": 0.4370715618133545 }, { "compression_loss": 0.0, "distillation_loss": 0.47231391072273254, "epoch": 4.4, "learning_rate": 2.4601641110434842e-06, "loss": 0.4036, "step": 12180, "task_loss": 0.820824921131134 }, { "compression_loss": 0.0, "distillation_loss": 0.5579500198364258, "epoch": 4.41, "learning_rate": 2.430977353595531e-06, "loss": 0.4426, "step": 12190, "task_loss": 0.6717907190322876 }, { "compression_loss": 0.0, "distillation_loss": 0.43694451451301575, "epoch": 4.41, "learning_rate": 2.401958532980213e-06, "loss": 0.4222, "step": 12200, "task_loss": 0.6733720302581787 }, { "compression_loss": 0.0, "distillation_loss": 0.27389103174209595, "epoch": 4.41, "learning_rate": 2.3731077988283558e-06, "loss": 0.3659, "step": 12210, "task_loss": 0.3571043610572815 }, { "compression_loss": 0.0, "distillation_loss": 0.37224647402763367, "epoch": 4.42, "learning_rate": 2.34442529990403e-06, "loss": 0.3988, "step": 12220, "task_loss": 0.3586083948612213 }, { "compression_loss": 0.0, "distillation_loss": 0.35565561056137085, "epoch": 4.42, "learning_rate": 2.315911184103842e-06, "loss": 0.3741, "step": 12230, "task_loss": 0.5137349367141724 }, { "compression_loss": 0.0, "distillation_loss": 0.29059457778930664, "epoch": 4.42, "learning_rate": 2.287565598456169e-06, "loss": 0.3942, "step": 12240, "task_loss": 0.54352867603302 }, { "compression_loss": 0.0, "distillation_loss": 0.2987663745880127, "epoch": 4.43, "learning_rate": 2.259388689120386e-06, "loss": 0.3893, "step": 12250, "task_loss": 0.3862733840942383 }, { "epoch": 4.43, "eval_exact_match": 82.63008514664143, "eval_f1": 89.73722178822344, "step": 12250 }, { "compression_loss": 0.0, "distillation_loss": 0.35016316175460815, "epoch": 4.43, "learning_rate": 2.2313806013861007e-06, "loss": 0.3867, "step": 12260, "task_loss": 0.46879592537879944 }, { "compression_loss": 0.0, "distillation_loss": 0.3823249340057373, "epoch": 4.43, "learning_rate": 2.2035414796724365e-06, "loss": 0.4238, "step": 12270, "task_loss": 0.630598783493042 }, { "compression_loss": 0.0, "distillation_loss": 0.4345713257789612, "epoch": 4.44, "learning_rate": 2.1758714675272663e-06, "loss": 0.4307, "step": 12280, "task_loss": 0.5999336242675781 }, { "compression_loss": 0.0, "distillation_loss": 0.5260065793991089, "epoch": 4.44, "learning_rate": 2.1483707076264827e-06, "loss": 0.4394, "step": 12290, "task_loss": 0.7119778394699097 }, { "compression_loss": 0.0, "distillation_loss": 0.37995320558547974, "epoch": 4.45, "learning_rate": 2.121039341773235e-06, "loss": 0.4377, "step": 12300, "task_loss": 0.5297107696533203 }, { "compression_loss": 0.0, "distillation_loss": 0.4561520218849182, "epoch": 4.45, "learning_rate": 2.0938775108972628e-06, "loss": 0.4092, "step": 12310, "task_loss": 0.6727806329727173 }, { "compression_loss": 0.0, "distillation_loss": 0.326673686504364, "epoch": 4.45, "learning_rate": 2.0668853550540775e-06, "loss": 0.4197, "step": 12320, "task_loss": 0.27783846855163574 }, { "compression_loss": 0.0, "distillation_loss": 0.43794888257980347, "epoch": 4.46, "learning_rate": 2.0400630134243162e-06, "loss": 0.3867, "step": 12330, "task_loss": 0.5385734438896179 }, { "compression_loss": 0.0, "distillation_loss": 0.30467817187309265, "epoch": 4.46, "learning_rate": 2.013410624312997e-06, "loss": 0.3803, "step": 12340, "task_loss": 0.4139289855957031 }, { "compression_loss": 0.0, "distillation_loss": 0.35417941212654114, "epoch": 4.46, "learning_rate": 1.9869283251488022e-06, "loss": 0.3369, "step": 12350, "task_loss": 0.5619122982025146 }, { "compression_loss": 0.0, "distillation_loss": 0.36347198486328125, "epoch": 4.47, "learning_rate": 1.9606162524833615e-06, "loss": 0.4038, "step": 12360, "task_loss": 0.6940585970878601 }, { "compression_loss": 0.0, "distillation_loss": 0.5780143737792969, "epoch": 4.47, "learning_rate": 1.9344745419905726e-06, "loss": 0.4446, "step": 12370, "task_loss": 1.0974355936050415 }, { "compression_loss": 0.0, "distillation_loss": 0.327537477016449, "epoch": 4.47, "learning_rate": 1.9085033284658917e-06, "loss": 0.4227, "step": 12380, "task_loss": 0.4338206648826599 }, { "compression_loss": 0.0, "distillation_loss": 0.5237728357315063, "epoch": 4.48, "learning_rate": 1.8827027458256239e-06, "loss": 0.4358, "step": 12390, "task_loss": 0.8812257051467896 }, { "compression_loss": 0.0, "distillation_loss": 0.45310911536216736, "epoch": 4.48, "learning_rate": 1.857072927106244e-06, "loss": 0.4478, "step": 12400, "task_loss": 0.6072947978973389 }, { "compression_loss": 0.0, "distillation_loss": 0.49674561619758606, "epoch": 4.49, "learning_rate": 1.831614004463733e-06, "loss": 0.4305, "step": 12410, "task_loss": 0.7922726273536682 }, { "compression_loss": 0.0, "distillation_loss": 0.3848080039024353, "epoch": 4.49, "learning_rate": 1.806326109172838e-06, "loss": 0.3823, "step": 12420, "task_loss": 0.37194693088531494 }, { "compression_loss": 0.0, "distillation_loss": 0.3583149015903473, "epoch": 4.49, "learning_rate": 1.781209371626461e-06, "loss": 0.393, "step": 12430, "task_loss": 0.391252726316452 }, { "compression_loss": 0.0, "distillation_loss": 0.5838788151741028, "epoch": 4.5, "learning_rate": 1.75626392133494e-06, "loss": 0.4088, "step": 12440, "task_loss": 0.6965578198432922 }, { "compression_loss": 0.0, "distillation_loss": 0.5408843159675598, "epoch": 4.5, "learning_rate": 1.7314898869254157e-06, "loss": 0.4157, "step": 12450, "task_loss": 0.4436303377151489 }, { "compression_loss": 0.0, "distillation_loss": 0.4076458513736725, "epoch": 4.5, "learning_rate": 1.7068873961411294e-06, "loss": 0.4012, "step": 12460, "task_loss": 0.6320613026618958 }, { "compression_loss": 0.0, "distillation_loss": 0.3804321885108948, "epoch": 4.51, "learning_rate": 1.6824565758408027e-06, "loss": 0.3988, "step": 12470, "task_loss": 0.7089783549308777 }, { "compression_loss": 0.0, "distillation_loss": 0.2783825993537903, "epoch": 4.51, "learning_rate": 1.6581975519979562e-06, "loss": 0.4198, "step": 12480, "task_loss": 0.660349428653717 }, { "compression_loss": 0.0, "distillation_loss": 0.33828794956207275, "epoch": 4.51, "learning_rate": 1.6341104497002816e-06, "loss": 0.3879, "step": 12490, "task_loss": 0.23362329602241516 }, { "compression_loss": 0.0, "distillation_loss": 0.4729890823364258, "epoch": 4.52, "learning_rate": 1.6101953931489643e-06, "loss": 0.3607, "step": 12500, "task_loss": 0.6899687051773071 }, { "epoch": 4.52, "eval_exact_match": 82.639545884579, "eval_f1": 89.7036436184772, "step": 12500 }, { "compression_loss": 0.0, "distillation_loss": 0.3680706024169922, "epoch": 4.52, "learning_rate": 1.5864525056580967e-06, "loss": 0.4391, "step": 12510, "task_loss": 0.5775980949401855 }, { "compression_loss": 0.0, "distillation_loss": 0.4175216555595398, "epoch": 4.52, "learning_rate": 1.5628819096539764e-06, "loss": 0.4019, "step": 12520, "task_loss": 0.9011520743370056 }, { "compression_loss": 0.0, "distillation_loss": 0.4379788041114807, "epoch": 4.53, "learning_rate": 1.5394837266745246e-06, "loss": 0.3794, "step": 12530, "task_loss": 0.49954891204833984 }, { "compression_loss": 0.0, "distillation_loss": 0.4514831304550171, "epoch": 4.53, "learning_rate": 1.5162580773686438e-06, "loss": 0.4316, "step": 12540, "task_loss": 0.4557006359100342 }, { "compression_loss": 0.0, "distillation_loss": 0.41194820404052734, "epoch": 4.54, "learning_rate": 1.4932050814955942e-06, "loss": 0.4016, "step": 12550, "task_loss": 0.3731171488761902 }, { "compression_loss": 0.0, "distillation_loss": 0.32936277985572815, "epoch": 4.54, "learning_rate": 1.470324857924365e-06, "loss": 0.3858, "step": 12560, "task_loss": 0.384751558303833 }, { "compression_loss": 0.0, "distillation_loss": 0.610994815826416, "epoch": 4.54, "learning_rate": 1.4476175246330906e-06, "loss": 0.4325, "step": 12570, "task_loss": 1.0964241027832031 }, { "compression_loss": 0.0, "distillation_loss": 0.41487258672714233, "epoch": 4.55, "learning_rate": 1.4250831987084155e-06, "loss": 0.3689, "step": 12580, "task_loss": 0.9095247387886047 }, { "compression_loss": 0.0, "distillation_loss": 0.39539405703544617, "epoch": 4.55, "learning_rate": 1.4027219963449087e-06, "loss": 0.3798, "step": 12590, "task_loss": 0.6725915670394897 }, { "compression_loss": 0.0, "distillation_loss": 0.31662312150001526, "epoch": 4.55, "learning_rate": 1.3805340328444287e-06, "loss": 0.3512, "step": 12600, "task_loss": 0.5368452072143555 }, { "compression_loss": 0.0, "distillation_loss": 0.47574758529663086, "epoch": 4.56, "learning_rate": 1.358519422615601e-06, "loss": 0.4363, "step": 12610, "task_loss": 0.40942999720573425 }, { "compression_loss": 0.0, "distillation_loss": 0.29873067140579224, "epoch": 4.56, "learning_rate": 1.3366782791731396e-06, "loss": 0.3548, "step": 12620, "task_loss": 0.4708471894264221 }, { "compression_loss": 0.0, "distillation_loss": 0.37882232666015625, "epoch": 4.56, "learning_rate": 1.315010715137333e-06, "loss": 0.3903, "step": 12630, "task_loss": 0.5266144871711731 }, { "compression_loss": 0.0, "distillation_loss": 0.44176942110061646, "epoch": 4.57, "learning_rate": 1.2935168422334307e-06, "loss": 0.4213, "step": 12640, "task_loss": 0.44183528423309326 }, { "compression_loss": 0.0, "distillation_loss": 0.3719322085380554, "epoch": 4.57, "learning_rate": 1.2721967712910648e-06, "loss": 0.4138, "step": 12650, "task_loss": 0.46417778730392456 }, { "compression_loss": 0.0, "distillation_loss": 0.24051040410995483, "epoch": 4.58, "learning_rate": 1.2510506122436886e-06, "loss": 0.3619, "step": 12660, "task_loss": 0.325649619102478 }, { "compression_loss": 0.0, "distillation_loss": 0.46371278166770935, "epoch": 4.58, "learning_rate": 1.2300784741280078e-06, "loss": 0.4077, "step": 12670, "task_loss": 0.7890371084213257 }, { "compression_loss": 0.0, "distillation_loss": 0.31556272506713867, "epoch": 4.58, "learning_rate": 1.209280465083427e-06, "loss": 0.4136, "step": 12680, "task_loss": 0.7762376070022583 }, { "compression_loss": 0.0, "distillation_loss": 0.4381011128425598, "epoch": 4.59, "learning_rate": 1.1886566923514634e-06, "loss": 0.4207, "step": 12690, "task_loss": 0.9917271733283997 }, { "compression_loss": 0.0, "distillation_loss": 0.3886127173900604, "epoch": 4.59, "learning_rate": 1.1682072622752342e-06, "loss": 0.4295, "step": 12700, "task_loss": 0.6097781658172607 }, { "compression_loss": 0.0, "distillation_loss": 0.4845806956291199, "epoch": 4.59, "learning_rate": 1.147932280298877e-06, "loss": 0.4321, "step": 12710, "task_loss": 0.8994347453117371 }, { "compression_loss": 0.0, "distillation_loss": 0.41579073667526245, "epoch": 4.6, "learning_rate": 1.127831850967007e-06, "loss": 0.3906, "step": 12720, "task_loss": 0.45600637793540955 }, { "compression_loss": 0.0, "distillation_loss": 0.45800602436065674, "epoch": 4.6, "learning_rate": 1.1079060779242022e-06, "loss": 0.4268, "step": 12730, "task_loss": 0.7129201889038086 }, { "compression_loss": 0.0, "distillation_loss": 0.4211260676383972, "epoch": 4.6, "learning_rate": 1.0881550639144531e-06, "loss": 0.4522, "step": 12740, "task_loss": 0.772409200668335 }, { "compression_loss": 0.0, "distillation_loss": 0.39386406540870667, "epoch": 4.61, "learning_rate": 1.0685789107806264e-06, "loss": 0.3954, "step": 12750, "task_loss": 0.2882859408855438 }, { "epoch": 4.61, "eval_exact_match": 82.55439924314096, "eval_f1": 89.76019468353418, "step": 12750 }, { "compression_loss": 0.0, "distillation_loss": 0.3273109793663025, "epoch": 4.61, "learning_rate": 1.0491777194639433e-06, "loss": 0.4323, "step": 12760, "task_loss": 0.9542597532272339 }, { "compression_loss": 0.0, "distillation_loss": 0.3854421377182007, "epoch": 4.62, "learning_rate": 1.029951590003487e-06, "loss": 0.4066, "step": 12770, "task_loss": 0.6295945048332214 }, { "compression_loss": 0.0, "distillation_loss": 0.34793558716773987, "epoch": 4.62, "learning_rate": 1.0109006215356314e-06, "loss": 0.4065, "step": 12780, "task_loss": 0.3474682569503784 }, { "compression_loss": 0.0, "distillation_loss": 0.4002053141593933, "epoch": 4.62, "learning_rate": 9.920249122935775e-07, "loss": 0.4288, "step": 12790, "task_loss": 0.3302229046821594 }, { "compression_loss": 0.0, "distillation_loss": 0.31503939628601074, "epoch": 4.63, "learning_rate": 9.733245596068346e-07, "loss": 0.3968, "step": 12800, "task_loss": 0.30184412002563477 }, { "compression_loss": 0.0, "distillation_loss": 0.4567040801048279, "epoch": 4.63, "learning_rate": 9.547996599007135e-07, "loss": 0.4107, "step": 12810, "task_loss": 0.48562997579574585 }, { "compression_loss": 0.0, "distillation_loss": 0.30701467394828796, "epoch": 4.63, "learning_rate": 9.364503086958147e-07, "loss": 0.375, "step": 12820, "task_loss": 0.616033673286438 }, { "compression_loss": 0.0, "distillation_loss": 0.37898629903793335, "epoch": 4.64, "learning_rate": 9.182766006075692e-07, "loss": 0.3696, "step": 12830, "task_loss": 0.42548850178718567 }, { "compression_loss": 0.0, "distillation_loss": 0.35982203483581543, "epoch": 4.64, "learning_rate": 9.002786293457259e-07, "loss": 0.4362, "step": 12840, "task_loss": 0.7381753921508789 }, { "compression_loss": 0.0, "distillation_loss": 0.2988097667694092, "epoch": 4.64, "learning_rate": 8.824564877138851e-07, "loss": 0.423, "step": 12850, "task_loss": 0.6503967642784119 }, { "compression_loss": 0.0, "distillation_loss": 0.4052898585796356, "epoch": 4.65, "learning_rate": 8.648102676089857e-07, "loss": 0.381, "step": 12860, "task_loss": 0.38243943452835083 }, { "compression_loss": 0.0, "distillation_loss": 0.42070329189300537, "epoch": 4.65, "learning_rate": 8.473400600208857e-07, "loss": 0.4464, "step": 12870, "task_loss": 0.5559731721878052 }, { "compression_loss": 0.0, "distillation_loss": 0.45240452885627747, "epoch": 4.65, "learning_rate": 8.30045955031845e-07, "loss": 0.3897, "step": 12880, "task_loss": 0.5374152064323425 }, { "compression_loss": 0.0, "distillation_loss": 0.41727572679519653, "epoch": 4.66, "learning_rate": 8.129280418160867e-07, "loss": 0.3942, "step": 12890, "task_loss": 0.5857281684875488 }, { "compression_loss": 0.0, "distillation_loss": 0.3383082151412964, "epoch": 4.66, "learning_rate": 7.959864086393225e-07, "loss": 0.3923, "step": 12900, "task_loss": 0.2979002594947815 }, { "compression_loss": 0.0, "distillation_loss": 0.2600701153278351, "epoch": 4.67, "learning_rate": 7.792211428583184e-07, "loss": 0.4077, "step": 12910, "task_loss": 0.3120166063308716 }, { "compression_loss": 0.0, "distillation_loss": 0.39484894275665283, "epoch": 4.67, "learning_rate": 7.626323309204157e-07, "loss": 0.3848, "step": 12920, "task_loss": 0.8494405150413513 }, { "compression_loss": 0.0, "distillation_loss": 0.3408588469028473, "epoch": 4.67, "learning_rate": 7.462200583631045e-07, "loss": 0.4072, "step": 12930, "task_loss": 0.6850634813308716 }, { "compression_loss": 0.0, "distillation_loss": 0.4030011296272278, "epoch": 4.68, "learning_rate": 7.299844098135915e-07, "loss": 0.4062, "step": 12940, "task_loss": 0.4810425639152527 }, { "compression_loss": 0.0, "distillation_loss": 0.3025147318840027, "epoch": 4.68, "learning_rate": 7.139254689883305e-07, "loss": 0.3951, "step": 12950, "task_loss": 0.593737006187439 }, { "compression_loss": 0.0, "distillation_loss": 0.3802594542503357, "epoch": 4.68, "learning_rate": 6.980433186926221e-07, "loss": 0.4089, "step": 12960, "task_loss": 0.588676929473877 }, { "compression_loss": 0.0, "distillation_loss": 0.3437938988208771, "epoch": 4.69, "learning_rate": 6.823380408201817e-07, "loss": 0.4062, "step": 12970, "task_loss": 0.3986343741416931 }, { "compression_loss": 0.0, "distillation_loss": 0.3688074052333832, "epoch": 4.69, "learning_rate": 6.668097163526936e-07, "loss": 0.4375, "step": 12980, "task_loss": 0.48088422417640686 }, { "compression_loss": 0.0, "distillation_loss": 0.3709104061126709, "epoch": 4.69, "learning_rate": 6.514584253594218e-07, "loss": 0.4188, "step": 12990, "task_loss": 0.47096890211105347 }, { "compression_loss": 0.0, "distillation_loss": 0.4205417037010193, "epoch": 4.7, "learning_rate": 6.362842469967905e-07, "loss": 0.4339, "step": 13000, "task_loss": 0.3885349631309509 }, { "epoch": 4.7, "eval_exact_match": 82.63008514664143, "eval_f1": 89.77533433718209, "step": 13000 }, { "compression_loss": 0.0, "distillation_loss": 0.33247077465057373, "epoch": 4.7, "learning_rate": 6.212872595079643e-07, "loss": 0.4131, "step": 13010, "task_loss": 0.4318593144416809 }, { "compression_loss": 0.0, "distillation_loss": 0.48946917057037354, "epoch": 4.71, "learning_rate": 6.064675402224444e-07, "loss": 0.4139, "step": 13020, "task_loss": 0.5803698301315308 }, { "compression_loss": 0.0, "distillation_loss": 0.3440093398094177, "epoch": 4.71, "learning_rate": 5.918251655556994e-07, "loss": 0.4133, "step": 13030, "task_loss": 0.7929366827011108 }, { "compression_loss": 0.0, "distillation_loss": 0.30020883679389954, "epoch": 4.71, "learning_rate": 5.773602110087295e-07, "loss": 0.4317, "step": 13040, "task_loss": 0.6749934554100037 }, { "compression_loss": 0.0, "distillation_loss": 0.637604296207428, "epoch": 4.72, "learning_rate": 5.630727511677097e-07, "loss": 0.4341, "step": 13050, "task_loss": 0.8321160078048706 }, { "compression_loss": 0.0, "distillation_loss": 0.37140244245529175, "epoch": 4.72, "learning_rate": 5.489628597035817e-07, "loss": 0.3955, "step": 13060, "task_loss": 0.8422946929931641 }, { "compression_loss": 0.0, "distillation_loss": 0.442553848028183, "epoch": 4.72, "learning_rate": 5.350306093716961e-07, "loss": 0.4234, "step": 13070, "task_loss": 0.6642134785652161 }, { "compression_loss": 0.0, "distillation_loss": 0.43634718656539917, "epoch": 4.73, "learning_rate": 5.212760720114123e-07, "loss": 0.3571, "step": 13080, "task_loss": 0.622330904006958 }, { "compression_loss": 0.0, "distillation_loss": 0.4799298644065857, "epoch": 4.73, "learning_rate": 5.076993185457568e-07, "loss": 0.4098, "step": 13090, "task_loss": 0.46641305088996887 }, { "compression_loss": 0.0, "distillation_loss": 0.35526013374328613, "epoch": 4.73, "learning_rate": 4.943004189810379e-07, "loss": 0.3992, "step": 13100, "task_loss": 0.5122348070144653 }, { "compression_loss": 0.0, "distillation_loss": 0.46897608041763306, "epoch": 4.74, "learning_rate": 4.810794424064813e-07, "loss": 0.4203, "step": 13110, "task_loss": 0.6970722675323486 }, { "compression_loss": 0.0, "distillation_loss": 0.4693143367767334, "epoch": 4.74, "learning_rate": 4.6803645699389105e-07, "loss": 0.4392, "step": 13120, "task_loss": 0.5941569209098816 }, { "compression_loss": 0.0, "distillation_loss": 0.40301787853240967, "epoch": 4.75, "learning_rate": 4.551715299972852e-07, "loss": 0.466, "step": 13130, "task_loss": 0.7612343430519104 }, { "compression_loss": 0.0, "distillation_loss": 0.3860597014427185, "epoch": 4.75, "learning_rate": 4.424847277525534e-07, "loss": 0.4076, "step": 13140, "task_loss": 0.6143934726715088 }, { "compression_loss": 0.0, "distillation_loss": 0.46132004261016846, "epoch": 4.75, "learning_rate": 4.299761156771148e-07, "loss": 0.4, "step": 13150, "task_loss": 0.5275061130523682 }, { "compression_loss": 0.0, "distillation_loss": 0.3260010778903961, "epoch": 4.76, "learning_rate": 4.1764575826957634e-07, "loss": 0.3797, "step": 13160, "task_loss": 0.3990122973918915 }, { "compression_loss": 0.0, "distillation_loss": 0.3616238832473755, "epoch": 4.76, "learning_rate": 4.054937191094143e-07, "loss": 0.4277, "step": 13170, "task_loss": 0.44919294118881226 }, { "compression_loss": 0.0, "distillation_loss": 0.39060574769973755, "epoch": 4.76, "learning_rate": 3.935200608566241e-07, "loss": 0.4464, "step": 13180, "task_loss": 0.495510458946228 }, { "compression_loss": 0.0, "distillation_loss": 0.47807246446609497, "epoch": 4.77, "learning_rate": 3.817248452514138e-07, "loss": 0.4065, "step": 13190, "task_loss": 0.6110713481903076 }, { "compression_loss": 0.0, "distillation_loss": 0.43341243267059326, "epoch": 4.77, "learning_rate": 3.701081331138772e-07, "loss": 0.4318, "step": 13200, "task_loss": 0.6513692140579224 }, { "compression_loss": 0.0, "distillation_loss": 0.40219298005104065, "epoch": 4.77, "learning_rate": 3.586699843436991e-07, "loss": 0.3551, "step": 13210, "task_loss": 0.33632349967956543 }, { "compression_loss": 0.0, "distillation_loss": 0.4364554286003113, "epoch": 4.78, "learning_rate": 3.474104579198128e-07, "loss": 0.4083, "step": 13220, "task_loss": 0.4315539300441742 }, { "compression_loss": 0.0, "distillation_loss": 0.3684564232826233, "epoch": 4.78, "learning_rate": 3.363296119001246e-07, "loss": 0.3922, "step": 13230, "task_loss": 0.7234320640563965 }, { "compression_loss": 0.0, "distillation_loss": 0.35623037815093994, "epoch": 4.78, "learning_rate": 3.254275034212028e-07, "loss": 0.3792, "step": 13240, "task_loss": 0.6207572221755981 }, { "compression_loss": 0.0, "distillation_loss": 0.5407557487487793, "epoch": 4.79, "learning_rate": 3.1470418869799007e-07, "loss": 0.4309, "step": 13250, "task_loss": 0.5729117393493652 }, { "epoch": 4.79, "eval_exact_match": 82.36518448438979, "eval_f1": 89.62599752726967, "step": 13250 }, { "compression_loss": 0.0, "distillation_loss": 0.3366709351539612, "epoch": 4.79, "learning_rate": 3.041597230234888e-07, "loss": 0.3861, "step": 13260, "task_loss": 0.7600011825561523 }, { "compression_loss": 0.0, "distillation_loss": 0.3440953195095062, "epoch": 4.8, "learning_rate": 2.9379416076852405e-07, "loss": 0.3966, "step": 13270, "task_loss": 0.48770636320114136 }, { "compression_loss": 0.0, "distillation_loss": 0.35563838481903076, "epoch": 4.8, "learning_rate": 2.836075553814171e-07, "loss": 0.4175, "step": 13280, "task_loss": 0.3995950520038605 }, { "compression_loss": 0.0, "distillation_loss": 0.5269416570663452, "epoch": 4.8, "learning_rate": 2.735999593877253e-07, "loss": 0.4154, "step": 13290, "task_loss": 0.6878436803817749 }, { "compression_loss": 0.0, "distillation_loss": 0.3388027548789978, "epoch": 4.81, "learning_rate": 2.6377142438998134e-07, "loss": 0.3958, "step": 13300, "task_loss": 0.4817107319831848 }, { "compression_loss": 0.0, "distillation_loss": 0.4826379418373108, "epoch": 4.81, "learning_rate": 2.5412200106742166e-07, "loss": 0.3647, "step": 13310, "task_loss": 1.065226435661316 }, { "compression_loss": 0.0, "distillation_loss": 0.30741727352142334, "epoch": 4.81, "learning_rate": 2.4465173917571023e-07, "loss": 0.3859, "step": 13320, "task_loss": 0.5023446679115295 }, { "compression_loss": 0.0, "distillation_loss": 0.4362955093383789, "epoch": 4.82, "learning_rate": 2.3536068754670568e-07, "loss": 0.4087, "step": 13330, "task_loss": 0.7960480451583862 }, { "compression_loss": 0.0, "distillation_loss": 0.2628311514854431, "epoch": 4.82, "learning_rate": 2.262488940881968e-07, "loss": 0.3649, "step": 13340, "task_loss": 0.4985067844390869 }, { "compression_loss": 0.0, "distillation_loss": 0.3144613802433014, "epoch": 4.82, "learning_rate": 2.1731640578365016e-07, "loss": 0.3909, "step": 13350, "task_loss": 0.2815832495689392 }, { "compression_loss": 0.0, "distillation_loss": 0.37193599343299866, "epoch": 4.83, "learning_rate": 2.0856326869198082e-07, "loss": 0.398, "step": 13360, "task_loss": 0.8057834506034851 }, { "compression_loss": 0.0, "distillation_loss": 0.4489399194717407, "epoch": 4.83, "learning_rate": 1.999895279473074e-07, "loss": 0.4062, "step": 13370, "task_loss": 0.6343175768852234 }, { "compression_loss": 0.0, "distillation_loss": 0.288999080657959, "epoch": 4.84, "learning_rate": 1.9159522775871906e-07, "loss": 0.4113, "step": 13380, "task_loss": 0.7062314748764038 }, { "compression_loss": 0.0, "distillation_loss": 0.46635884046554565, "epoch": 4.84, "learning_rate": 1.83380411410054e-07, "loss": 0.399, "step": 13390, "task_loss": 0.716343879699707 }, { "compression_loss": 0.0, "distillation_loss": 0.33257097005844116, "epoch": 4.84, "learning_rate": 1.7534512125966237e-07, "loss": 0.4012, "step": 13400, "task_loss": 0.857595682144165 }, { "compression_loss": 0.0, "distillation_loss": 0.4126928448677063, "epoch": 4.85, "learning_rate": 1.6748939874020818e-07, "loss": 0.4223, "step": 13410, "task_loss": 0.6019068956375122 }, { "compression_loss": 0.0, "distillation_loss": 0.3106433153152466, "epoch": 4.85, "learning_rate": 1.598132843584321e-07, "loss": 0.4124, "step": 13420, "task_loss": 0.6166244149208069 }, { "compression_loss": 0.0, "distillation_loss": 0.3775056004524231, "epoch": 4.85, "learning_rate": 1.5231681769496517e-07, "loss": 0.4144, "step": 13430, "task_loss": 0.7219608426094055 }, { "compression_loss": 0.0, "distillation_loss": 0.4089081883430481, "epoch": 4.86, "learning_rate": 1.4500003740410715e-07, "loss": 0.39, "step": 13440, "task_loss": 0.619770348072052 }, { "compression_loss": 0.0, "distillation_loss": 0.3215952217578888, "epoch": 4.86, "learning_rate": 1.3786298121364392e-07, "loss": 0.4686, "step": 13450, "task_loss": 0.5262433290481567 }, { "compression_loss": 0.0, "distillation_loss": 0.2864310145378113, "epoch": 4.86, "learning_rate": 1.3090568592462603e-07, "loss": 0.4086, "step": 13460, "task_loss": 0.46462225914001465 }, { "compression_loss": 0.0, "distillation_loss": 0.3616725206375122, "epoch": 4.87, "learning_rate": 1.24128187411221e-07, "loss": 0.4201, "step": 13470, "task_loss": 0.4158579111099243 }, { "compression_loss": 0.0, "distillation_loss": 0.4216560125350952, "epoch": 4.87, "learning_rate": 1.1753052062048018e-07, "loss": 0.3923, "step": 13480, "task_loss": 0.676395058631897 }, { "compression_loss": 0.0, "distillation_loss": 0.45745834708213806, "epoch": 4.88, "learning_rate": 1.1111271957219503e-07, "loss": 0.4328, "step": 13490, "task_loss": 0.7130784392356873 }, { "compression_loss": 0.0, "distillation_loss": 0.46026918292045593, "epoch": 4.88, "learning_rate": 1.0487481735870662e-07, "loss": 0.4589, "step": 13500, "task_loss": 0.6129114627838135 }, { "epoch": 4.88, "eval_exact_match": 82.58278145695364, "eval_f1": 89.74308413444165, "step": 13500 }, { "compression_loss": 0.0, "distillation_loss": 0.36996591091156006, "epoch": 4.88, "learning_rate": 9.881684614473861e-08, "loss": 0.3992, "step": 13510, "task_loss": 0.39129143953323364 }, { "compression_loss": 0.0, "distillation_loss": 0.327798068523407, "epoch": 4.89, "learning_rate": 9.29388371672224e-08, "loss": 0.3677, "step": 13520, "task_loss": 0.5077568292617798 }, { "compression_loss": 0.0, "distillation_loss": 0.38932156562805176, "epoch": 4.89, "learning_rate": 8.724082073515326e-08, "loss": 0.4236, "step": 13530, "task_loss": 0.7245796918869019 }, { "compression_loss": 0.0, "distillation_loss": 0.4578113257884979, "epoch": 4.89, "learning_rate": 8.172282622941562e-08, "loss": 0.39, "step": 13540, "task_loss": 1.0679997205734253 }, { "compression_loss": 0.0, "distillation_loss": 0.42175060510635376, "epoch": 4.9, "learning_rate": 7.638488210265081e-08, "loss": 0.4656, "step": 13550, "task_loss": 0.6959472894668579 }, { "compression_loss": 0.0, "distillation_loss": 0.4087962508201599, "epoch": 4.9, "learning_rate": 7.122701587908619e-08, "loss": 0.3932, "step": 13560, "task_loss": 1.1434520483016968 }, { "compression_loss": 0.0, "distillation_loss": 0.3045280873775482, "epoch": 4.9, "learning_rate": 6.62492541544224e-08, "loss": 0.412, "step": 13570, "task_loss": 0.6849862933158875 }, { "compression_loss": 0.0, "distillation_loss": 0.3511340320110321, "epoch": 4.91, "learning_rate": 6.145162259566628e-08, "loss": 0.458, "step": 13580, "task_loss": 0.5146377086639404 }, { "compression_loss": 0.0, "distillation_loss": 0.45488184690475464, "epoch": 4.91, "learning_rate": 5.683414594102209e-08, "loss": 0.4581, "step": 13590, "task_loss": 0.9683970808982849 }, { "compression_loss": 0.0, "distillation_loss": 0.493674635887146, "epoch": 4.92, "learning_rate": 5.2396847999751635e-08, "loss": 0.3936, "step": 13600, "task_loss": 1.0288453102111816 }, { "compression_loss": 0.0, "distillation_loss": 0.3300265669822693, "epoch": 4.92, "learning_rate": 4.813975165205763e-08, "loss": 0.428, "step": 13610, "task_loss": 0.43383991718292236 }, { "compression_loss": 0.0, "distillation_loss": 0.32362163066864014, "epoch": 4.92, "learning_rate": 4.406287884895943e-08, "loss": 0.4261, "step": 13620, "task_loss": 0.5160417556762695 }, { "compression_loss": 0.0, "distillation_loss": 0.5967044234275818, "epoch": 4.93, "learning_rate": 4.016625061218026e-08, "loss": 0.4086, "step": 13630, "task_loss": 1.1938400268554688 }, { "compression_loss": 0.0, "distillation_loss": 0.3653201758861542, "epoch": 4.93, "learning_rate": 3.644988703405016e-08, "loss": 0.3941, "step": 13640, "task_loss": 0.6336909532546997 }, { "compression_loss": 0.0, "distillation_loss": 0.36608558893203735, "epoch": 4.93, "learning_rate": 3.2913807277385464e-08, "loss": 0.3878, "step": 13650, "task_loss": 0.532042920589447 }, { "compression_loss": 0.0, "distillation_loss": 0.37139713764190674, "epoch": 4.94, "learning_rate": 2.9558029575407204e-08, "loss": 0.4069, "step": 13660, "task_loss": 0.7530549764633179 }, { "compression_loss": 0.0, "distillation_loss": 0.3533708453178406, "epoch": 4.94, "learning_rate": 2.6382571231628456e-08, "loss": 0.397, "step": 13670, "task_loss": 0.47629207372665405 }, { "compression_loss": 0.0, "distillation_loss": 0.4421239495277405, "epoch": 4.94, "learning_rate": 2.3387448619784367e-08, "loss": 0.4319, "step": 13680, "task_loss": 0.5352086424827576 }, { "compression_loss": 0.0, "distillation_loss": 0.5323693156242371, "epoch": 4.95, "learning_rate": 2.0572677183731122e-08, "loss": 0.3947, "step": 13690, "task_loss": 0.48410868644714355 }, { "compression_loss": 0.0, "distillation_loss": 0.5018866062164307, "epoch": 4.95, "learning_rate": 1.7938271437379913e-08, "loss": 0.4427, "step": 13700, "task_loss": 0.4523616433143616 }, { "compression_loss": 0.0, "distillation_loss": 0.3665663003921509, "epoch": 4.95, "learning_rate": 1.5484244964611427e-08, "loss": 0.3782, "step": 13710, "task_loss": 0.48350703716278076 }, { "compression_loss": 0.0, "distillation_loss": 0.4491804242134094, "epoch": 4.96, "learning_rate": 1.3210610419213673e-08, "loss": 0.4083, "step": 13720, "task_loss": 0.6975788474082947 }, { "compression_loss": 0.0, "distillation_loss": 0.3747801184654236, "epoch": 4.96, "learning_rate": 1.1117379524804293e-08, "loss": 0.3877, "step": 13730, "task_loss": 0.46945083141326904 }, { "compression_loss": 0.0, "distillation_loss": 0.3005831837654114, "epoch": 4.97, "learning_rate": 9.204563074791671e-09, "loss": 0.4103, "step": 13740, "task_loss": 0.437521755695343 }, { "compression_loss": 0.0, "distillation_loss": 0.49657440185546875, "epoch": 4.97, "learning_rate": 7.47217093229724e-09, "loss": 0.4278, "step": 13750, "task_loss": 0.9666340947151184 }, { "epoch": 4.97, "eval_exact_match": 82.72469252601702, "eval_f1": 89.75211707399707, "step": 13750 }, { "compression_loss": 0.0, "distillation_loss": 0.5844108462333679, "epoch": 4.97, "learning_rate": 5.9202120301204966e-09, "loss": 0.4205, "step": 13760, "task_loss": 0.8376038074493408 }, { "compression_loss": 0.0, "distillation_loss": 0.5542365312576294, "epoch": 4.98, "learning_rate": 4.548694370680728e-09, "loss": 0.4406, "step": 13770, "task_loss": 0.7455954551696777 }, { "compression_loss": 0.0, "distillation_loss": 0.3821340501308441, "epoch": 4.98, "learning_rate": 3.3576250259859106e-09, "loss": 0.4466, "step": 13780, "task_loss": 0.43582969903945923 }, { "compression_loss": 0.0, "distillation_loss": 0.38751429319381714, "epoch": 4.98, "learning_rate": 2.3470101375860917e-09, "loss": 0.4559, "step": 13790, "task_loss": 0.4809384346008301 }, { "compression_loss": 0.0, "distillation_loss": 0.40226811170578003, "epoch": 4.99, "learning_rate": 1.5168549165578415e-09, "loss": 0.4583, "step": 13800, "task_loss": 0.4811709523200989 }, { "compression_loss": 0.0, "distillation_loss": 0.500964343547821, "epoch": 4.99, "learning_rate": 8.671636434576246e-10, "loss": 0.3806, "step": 13810, "task_loss": 0.6287086009979248 }, { "compression_loss": 0.0, "distillation_loss": 0.3395386338233948, "epoch": 4.99, "learning_rate": 3.97939668314029e-10, "loss": 0.4134, "step": 13820, "task_loss": 0.35841506719589233 }, { "compression_loss": 0.0, "distillation_loss": 0.3570495843887329, "epoch": 5.0, "learning_rate": 1.0918541060056518e-10, "loss": 0.4233, "step": 13830, "task_loss": 0.5482752323150635 }, { "compression_loss": 0.0, "distillation_loss": 0.35532039403915405, "epoch": 5.0, "learning_rate": 9.023592317802808e-13, "loss": 0.3687, "step": 13840, "task_loss": 0.36665064096450806 }, { "compression_loss": 0.0, "distillation_loss": 0.35440778732299805, "epoch": 5.01, "learning_rate": 6.999992690892745e-05, "loss": 0.3737, "step": 13850, "task_loss": 0.6965274810791016 }, { "compression_loss": 0.0, "distillation_loss": 0.48675596714019775, "epoch": 5.01, "learning_rate": 6.999967424882167e-05, "loss": 0.5142, "step": 13860, "task_loss": 0.669815182685852 }, { "compression_loss": 0.0, "distillation_loss": 0.3265790343284607, "epoch": 5.01, "learning_rate": 6.999924111862622e-05, "loss": 0.3724, "step": 13870, "task_loss": 0.48695600032806396 }, { "compression_loss": 0.0, "distillation_loss": 0.5362226366996765, "epoch": 5.02, "learning_rate": 6.999862752057449e-05, "loss": 0.419, "step": 13880, "task_loss": 0.6971112489700317 }, { "compression_loss": 0.0, "distillation_loss": 0.4938550889492035, "epoch": 5.02, "learning_rate": 6.999783345783038e-05, "loss": 0.4973, "step": 13890, "task_loss": 0.6833978891372681 }, { "compression_loss": 0.0, "distillation_loss": 0.4292599558830261, "epoch": 5.02, "learning_rate": 6.999685893448834e-05, "loss": 0.4466, "step": 13900, "task_loss": 0.7468749284744263 }, { "compression_loss": 0.0, "distillation_loss": 0.3701336979866028, "epoch": 5.03, "learning_rate": 6.999570395557335e-05, "loss": 0.4276, "step": 13910, "task_loss": 0.3578958511352539 }, { "compression_loss": 0.0, "distillation_loss": 0.4636634886264801, "epoch": 5.03, "learning_rate": 6.999436852704088e-05, "loss": 0.4046, "step": 13920, "task_loss": 0.5777134895324707 }, { "compression_loss": 0.0, "distillation_loss": 0.34819674491882324, "epoch": 5.03, "learning_rate": 6.999285265577683e-05, "loss": 0.3958, "step": 13930, "task_loss": 0.7134641408920288 }, { "compression_loss": 0.0, "distillation_loss": 0.4702969789505005, "epoch": 5.04, "learning_rate": 6.999115634959755e-05, "loss": 0.4482, "step": 13940, "task_loss": 0.7330292463302612 }, { "compression_loss": 0.0, "distillation_loss": 0.4083574414253235, "epoch": 5.04, "learning_rate": 6.998927961724976e-05, "loss": 0.4519, "step": 13950, "task_loss": 0.5460534691810608 }, { "compression_loss": 0.0, "distillation_loss": 0.40470677614212036, "epoch": 5.05, "learning_rate": 6.998722246841052e-05, "loss": 0.42, "step": 13960, "task_loss": 0.8615512847900391 }, { "compression_loss": 0.0, "distillation_loss": 0.501833438873291, "epoch": 5.05, "learning_rate": 6.99849849136872e-05, "loss": 0.4176, "step": 13970, "task_loss": 0.6687218546867371 }, { "compression_loss": 0.0, "distillation_loss": 0.48426300287246704, "epoch": 5.05, "learning_rate": 6.998256696461737e-05, "loss": 0.4576, "step": 13980, "task_loss": 0.5862292051315308 }, { "compression_loss": 0.0, "distillation_loss": 0.5200551748275757, "epoch": 5.06, "learning_rate": 6.997996863366879e-05, "loss": 0.4555, "step": 13990, "task_loss": 0.7281656861305237 }, { "compression_loss": 0.0, "distillation_loss": 0.5517895817756653, "epoch": 5.06, "learning_rate": 6.997718993423931e-05, "loss": 0.4606, "step": 14000, "task_loss": 0.4926382005214691 }, { "epoch": 5.06, "eval_exact_match": 82.39356669820246, "eval_f1": 89.59166032418017, "step": 14000 }, { "compression_loss": 0.0, "distillation_loss": 0.4386638402938843, "epoch": 5.06, "learning_rate": 6.997423088065688e-05, "loss": 0.4664, "step": 14010, "task_loss": 0.6251969337463379 }, { "compression_loss": 0.0, "distillation_loss": 0.44594234228134155, "epoch": 5.07, "learning_rate": 6.997109148817935e-05, "loss": 0.4358, "step": 14020, "task_loss": 0.4952147305011749 }, { "compression_loss": 0.0, "distillation_loss": 0.3212473690509796, "epoch": 5.07, "learning_rate": 6.996777177299449e-05, "loss": 0.4691, "step": 14030, "task_loss": 0.2904544472694397 }, { "compression_loss": 0.0, "distillation_loss": 0.3446178734302521, "epoch": 5.07, "learning_rate": 6.996427175221984e-05, "loss": 0.414, "step": 14040, "task_loss": 0.5980858206748962 }, { "compression_loss": 0.0, "distillation_loss": 0.3580099642276764, "epoch": 5.08, "learning_rate": 6.996059144390272e-05, "loss": 0.4941, "step": 14050, "task_loss": 0.28045976161956787 }, { "compression_loss": 0.0, "distillation_loss": 0.4789047837257385, "epoch": 5.08, "learning_rate": 6.995673086702003e-05, "loss": 0.5185, "step": 14060, "task_loss": 0.751612663269043 }, { "compression_loss": 0.0, "distillation_loss": 0.33268317580223083, "epoch": 5.08, "learning_rate": 6.99526900414782e-05, "loss": 0.4453, "step": 14070, "task_loss": 0.2582695186138153 }, { "compression_loss": 0.0, "distillation_loss": 0.5172496438026428, "epoch": 5.09, "learning_rate": 6.994846898811308e-05, "loss": 0.4784, "step": 14080, "task_loss": 0.5228416919708252 }, { "compression_loss": 0.0, "distillation_loss": 0.3266606330871582, "epoch": 5.09, "learning_rate": 6.994406772868984e-05, "loss": 0.5077, "step": 14090, "task_loss": 0.6353722214698792 }, { "compression_loss": 0.0, "distillation_loss": 0.46428632736206055, "epoch": 5.1, "learning_rate": 6.993948628590286e-05, "loss": 0.4278, "step": 14100, "task_loss": 0.3377443253993988 }, { "compression_loss": 0.0, "distillation_loss": 0.5082235336303711, "epoch": 5.1, "learning_rate": 6.993472468337559e-05, "loss": 0.447, "step": 14110, "task_loss": 0.6395173072814941 }, { "compression_loss": 0.0, "distillation_loss": 0.3576199412345886, "epoch": 5.1, "learning_rate": 6.992978294566047e-05, "loss": 0.5164, "step": 14120, "task_loss": 0.6690906286239624 }, { "compression_loss": 0.0, "distillation_loss": 0.40793901681900024, "epoch": 5.11, "learning_rate": 6.992466109823875e-05, "loss": 0.4775, "step": 14130, "task_loss": 0.8127427101135254 }, { "compression_loss": 0.0, "distillation_loss": 0.4388144910335541, "epoch": 5.11, "learning_rate": 6.99193591675204e-05, "loss": 0.4678, "step": 14140, "task_loss": 0.5744211673736572 }, { "compression_loss": 0.0, "distillation_loss": 0.551649272441864, "epoch": 5.11, "learning_rate": 6.991387718084395e-05, "loss": 0.4929, "step": 14150, "task_loss": 0.5900485515594482 }, { "compression_loss": 0.0, "distillation_loss": 0.35258394479751587, "epoch": 5.12, "learning_rate": 6.990821516647637e-05, "loss": 0.4451, "step": 14160, "task_loss": 0.7655850648880005 }, { "compression_loss": 0.0, "distillation_loss": 0.41839784383773804, "epoch": 5.12, "learning_rate": 6.990237315361293e-05, "loss": 0.4519, "step": 14170, "task_loss": 0.4728570878505707 }, { "compression_loss": 0.0, "distillation_loss": 0.7037980556488037, "epoch": 5.12, "learning_rate": 6.9896351172377e-05, "loss": 0.4855, "step": 14180, "task_loss": 0.881999135017395 }, { "compression_loss": 0.0, "distillation_loss": 0.4729958474636078, "epoch": 5.13, "learning_rate": 6.98901492538199e-05, "loss": 0.4912, "step": 14190, "task_loss": 0.4268884062767029 }, { "compression_loss": 0.0, "distillation_loss": 0.40473929047584534, "epoch": 5.13, "learning_rate": 6.988376742992088e-05, "loss": 0.4478, "step": 14200, "task_loss": 0.701400876045227 }, { "compression_loss": 0.0, "distillation_loss": 0.3299444913864136, "epoch": 5.14, "learning_rate": 6.987720573358672e-05, "loss": 0.4597, "step": 14210, "task_loss": 0.4115176796913147 }, { "compression_loss": 0.0, "distillation_loss": 0.39635318517684937, "epoch": 5.14, "learning_rate": 6.987046419865176e-05, "loss": 0.4558, "step": 14220, "task_loss": 0.5531545877456665 }, { "compression_loss": 0.0, "distillation_loss": 0.3222476840019226, "epoch": 5.14, "learning_rate": 6.986354285987759e-05, "loss": 0.4689, "step": 14230, "task_loss": 0.35380634665489197 }, { "compression_loss": 0.0, "distillation_loss": 0.39281386137008667, "epoch": 5.15, "learning_rate": 6.985644175295299e-05, "loss": 0.4214, "step": 14240, "task_loss": 0.5921287536621094 }, { "compression_loss": 0.0, "distillation_loss": 0.3763327896595001, "epoch": 5.15, "learning_rate": 6.984916091449365e-05, "loss": 0.4603, "step": 14250, "task_loss": 0.30917519330978394 }, { "epoch": 5.15, "eval_exact_match": 82.19489120151371, "eval_f1": 89.36849233451807, "step": 14250 }, { "compression_loss": 0.0, "distillation_loss": 0.5615275502204895, "epoch": 5.15, "learning_rate": 6.984170038204199e-05, "loss": 0.4981, "step": 14260, "task_loss": 0.6949574947357178 }, { "compression_loss": 0.0, "distillation_loss": 0.47236767411231995, "epoch": 5.16, "learning_rate": 6.983406019406708e-05, "loss": 0.5041, "step": 14270, "task_loss": 0.5212032794952393 }, { "compression_loss": 0.0, "distillation_loss": 0.5434203743934631, "epoch": 5.16, "learning_rate": 6.982624038996425e-05, "loss": 0.4769, "step": 14280, "task_loss": 0.6844668388366699 }, { "compression_loss": 0.0, "distillation_loss": 0.36416149139404297, "epoch": 5.16, "learning_rate": 6.981824101005505e-05, "loss": 0.5013, "step": 14290, "task_loss": 0.5063308477401733 }, { "compression_loss": 0.0, "distillation_loss": 0.46571964025497437, "epoch": 5.17, "learning_rate": 6.9810062095587e-05, "loss": 0.4407, "step": 14300, "task_loss": 0.5314142107963562 }, { "compression_loss": 0.0, "distillation_loss": 0.4478929340839386, "epoch": 5.17, "learning_rate": 6.98017036887333e-05, "loss": 0.4981, "step": 14310, "task_loss": 0.43525004386901855 }, { "compression_loss": 0.0, "distillation_loss": 0.3345271348953247, "epoch": 5.18, "learning_rate": 6.979316583259272e-05, "loss": 0.4351, "step": 14320, "task_loss": 0.249060720205307 }, { "compression_loss": 0.0, "distillation_loss": 0.5977888703346252, "epoch": 5.18, "learning_rate": 6.978444857118933e-05, "loss": 0.5105, "step": 14330, "task_loss": 0.731816828250885 }, { "compression_loss": 0.0, "distillation_loss": 0.3528749942779541, "epoch": 5.18, "learning_rate": 6.977555194947223e-05, "loss": 0.4528, "step": 14340, "task_loss": 0.28791236877441406 }, { "compression_loss": 0.0, "distillation_loss": 0.4057888984680176, "epoch": 5.19, "learning_rate": 6.976647601331542e-05, "loss": 0.4821, "step": 14350, "task_loss": 0.7864526510238647 }, { "compression_loss": 0.0, "distillation_loss": 0.5908790826797485, "epoch": 5.19, "learning_rate": 6.975722080951746e-05, "loss": 0.5164, "step": 14360, "task_loss": 1.0292553901672363 }, { "compression_loss": 0.0, "distillation_loss": 0.5158011317253113, "epoch": 5.19, "learning_rate": 6.97477863858013e-05, "loss": 0.5173, "step": 14370, "task_loss": 0.44501206278800964 }, { "compression_loss": 0.0, "distillation_loss": 0.644179105758667, "epoch": 5.2, "learning_rate": 6.973817279081402e-05, "loss": 0.4935, "step": 14380, "task_loss": 0.6516858339309692 }, { "compression_loss": 0.0, "distillation_loss": 0.3709027171134949, "epoch": 5.2, "learning_rate": 6.972838007412653e-05, "loss": 0.4828, "step": 14390, "task_loss": 0.5542988181114197 }, { "compression_loss": 0.0, "distillation_loss": 0.3429642617702484, "epoch": 5.2, "learning_rate": 6.971840828623337e-05, "loss": 0.4879, "step": 14400, "task_loss": 0.3810282349586487 }, { "compression_loss": 0.0, "distillation_loss": 0.41922885179519653, "epoch": 5.21, "learning_rate": 6.970825747855243e-05, "loss": 0.4552, "step": 14410, "task_loss": 0.7148609757423401 }, { "compression_loss": 0.0, "distillation_loss": 0.47263821959495544, "epoch": 5.21, "learning_rate": 6.96979277034247e-05, "loss": 0.5117, "step": 14420, "task_loss": 0.8885880708694458 }, { "compression_loss": 0.0, "distillation_loss": 0.47650307416915894, "epoch": 5.22, "learning_rate": 6.968847793264618e-05, "loss": 0.5001, "step": 14430, "task_loss": 0.48124372959136963 }, { "compression_loss": 0.0, "distillation_loss": 0.3229578137397766, "epoch": 5.22, "learning_rate": 6.967780826687376e-05, "loss": 0.4864, "step": 14440, "task_loss": 0.2538095712661743 }, { "compression_loss": 0.0, "distillation_loss": 0.4400070309638977, "epoch": 5.22, "learning_rate": 6.966695979066094e-05, "loss": 0.5075, "step": 14450, "task_loss": 0.5561120510101318 }, { "compression_loss": 0.0, "distillation_loss": 0.450109601020813, "epoch": 5.23, "learning_rate": 6.965593255994606e-05, "loss": 0.4603, "step": 14460, "task_loss": 0.457144170999527 }, { "compression_loss": 0.0, "distillation_loss": 0.7270958423614502, "epoch": 5.23, "learning_rate": 6.964472663158928e-05, "loss": 0.5067, "step": 14470, "task_loss": 0.876134991645813 }, { "compression_loss": 0.0, "distillation_loss": 0.4349331855773926, "epoch": 5.23, "learning_rate": 6.96333420633721e-05, "loss": 0.4893, "step": 14480, "task_loss": 0.48338618874549866 }, { "compression_loss": 0.0, "distillation_loss": 0.386316180229187, "epoch": 5.24, "learning_rate": 6.962177891399719e-05, "loss": 0.4693, "step": 14490, "task_loss": 0.5884779095649719 }, { "compression_loss": 0.0, "distillation_loss": 0.5028120875358582, "epoch": 5.24, "learning_rate": 6.961003724308804e-05, "loss": 0.4485, "step": 14500, "task_loss": 0.5481046438217163 }, { "epoch": 5.24, "eval_exact_match": 82.15704824976348, "eval_f1": 89.39737203821956, "step": 14500 }, { "compression_loss": 0.0, "distillation_loss": 0.6019538044929504, "epoch": 5.24, "learning_rate": 6.959811711118866e-05, "loss": 0.5045, "step": 14510, "task_loss": 0.676105260848999 }, { "compression_loss": 0.0, "distillation_loss": 0.45118898153305054, "epoch": 5.25, "learning_rate": 6.958601857976325e-05, "loss": 0.4433, "step": 14520, "task_loss": 0.6423743963241577 }, { "compression_loss": 0.0, "distillation_loss": 0.5065474510192871, "epoch": 5.25, "learning_rate": 6.957374171119591e-05, "loss": 0.5745, "step": 14530, "task_loss": 0.7627721428871155 }, { "compression_loss": 0.0, "distillation_loss": 0.45010310411453247, "epoch": 5.25, "learning_rate": 6.956128656879031e-05, "loss": 0.5029, "step": 14540, "task_loss": 0.8689513206481934 }, { "compression_loss": 0.0, "distillation_loss": 0.45659008622169495, "epoch": 5.26, "learning_rate": 6.954865321676934e-05, "loss": 0.5493, "step": 14550, "task_loss": 0.7313786745071411 }, { "compression_loss": 0.0, "distillation_loss": 0.4748663902282715, "epoch": 5.26, "learning_rate": 6.953584172027481e-05, "loss": 0.4825, "step": 14560, "task_loss": 0.6921977996826172 }, { "compression_loss": 0.0, "distillation_loss": 0.43287229537963867, "epoch": 5.27, "learning_rate": 6.95228521453671e-05, "loss": 0.4728, "step": 14570, "task_loss": 0.41803377866744995 }, { "compression_loss": 0.0, "distillation_loss": 0.5268940329551697, "epoch": 5.27, "learning_rate": 6.950968455902485e-05, "loss": 0.4534, "step": 14580, "task_loss": 0.6837814450263977 }, { "compression_loss": 0.0, "distillation_loss": 0.538477897644043, "epoch": 5.27, "learning_rate": 6.949633902914455e-05, "loss": 0.5323, "step": 14590, "task_loss": 0.6758741736412048 }, { "compression_loss": 0.0, "distillation_loss": 0.3660999834537506, "epoch": 5.28, "learning_rate": 6.948281562454023e-05, "loss": 0.447, "step": 14600, "task_loss": 0.39962196350097656 }, { "compression_loss": 0.0, "distillation_loss": 0.508263885974884, "epoch": 5.28, "learning_rate": 6.946911441494312e-05, "loss": 0.5491, "step": 14610, "task_loss": 0.38159501552581787 }, { "compression_loss": 0.0, "distillation_loss": 0.475387305021286, "epoch": 5.28, "learning_rate": 6.945523547100128e-05, "loss": 0.5048, "step": 14620, "task_loss": 0.46061187982559204 }, { "compression_loss": 0.0, "distillation_loss": 0.6909676790237427, "epoch": 5.29, "learning_rate": 6.944117886427917e-05, "loss": 0.5538, "step": 14630, "task_loss": 1.0875879526138306 }, { "compression_loss": 0.0, "distillation_loss": 0.49635446071624756, "epoch": 5.29, "learning_rate": 6.942694466725742e-05, "loss": 0.5124, "step": 14640, "task_loss": 1.1167693138122559 }, { "compression_loss": 0.0, "distillation_loss": 0.47104009985923767, "epoch": 5.29, "learning_rate": 6.941253295333232e-05, "loss": 0.4429, "step": 14650, "task_loss": 0.5356221199035645 }, { "compression_loss": 0.0, "distillation_loss": 0.4203311502933502, "epoch": 5.3, "learning_rate": 6.939794379681553e-05, "loss": 0.4496, "step": 14660, "task_loss": 0.6668910980224609 }, { "compression_loss": 0.0, "distillation_loss": 0.39971667528152466, "epoch": 5.3, "learning_rate": 6.938317727293362e-05, "loss": 0.4446, "step": 14670, "task_loss": 0.8728127479553223 }, { "compression_loss": 0.0, "distillation_loss": 0.46285662055015564, "epoch": 5.31, "learning_rate": 6.936823345782777e-05, "loss": 0.5239, "step": 14680, "task_loss": 0.6147375106811523 }, { "compression_loss": 0.0, "distillation_loss": 0.44424545764923096, "epoch": 5.31, "learning_rate": 6.935311242855331e-05, "loss": 0.4991, "step": 14690, "task_loss": 0.25694024562835693 }, { "compression_loss": 0.0, "distillation_loss": 0.5805097222328186, "epoch": 5.31, "learning_rate": 6.933781426307934e-05, "loss": 0.4837, "step": 14700, "task_loss": 0.7926748991012573 }, { "compression_loss": 0.0, "distillation_loss": 0.5545125603675842, "epoch": 5.32, "learning_rate": 6.932233904028838e-05, "loss": 0.4692, "step": 14710, "task_loss": 0.8997831344604492 }, { "compression_loss": 0.0, "distillation_loss": 0.3451814651489258, "epoch": 5.32, "learning_rate": 6.930668683997586e-05, "loss": 0.44, "step": 14720, "task_loss": 0.3470460772514343 }, { "compression_loss": 0.0, "distillation_loss": 0.5201274156570435, "epoch": 5.32, "learning_rate": 6.929085774284978e-05, "loss": 0.4791, "step": 14730, "task_loss": 0.5729685425758362 }, { "compression_loss": 0.0, "distillation_loss": 0.6551119089126587, "epoch": 5.33, "learning_rate": 6.92748518305303e-05, "loss": 0.4964, "step": 14740, "task_loss": 0.686562180519104 }, { "compression_loss": 0.0, "distillation_loss": 0.408242791891098, "epoch": 5.33, "learning_rate": 6.925866918554928e-05, "loss": 0.4589, "step": 14750, "task_loss": 0.7477397918701172 }, { "epoch": 5.33, "eval_exact_match": 81.8543046357616, "eval_f1": 88.98935594178413, "step": 14750 }, { "compression_loss": 0.0, "distillation_loss": 0.4235571026802063, "epoch": 5.33, "learning_rate": 6.924230989134988e-05, "loss": 0.5103, "step": 14760, "task_loss": 0.44014132022857666 }, { "compression_loss": 0.0, "distillation_loss": 0.46613240242004395, "epoch": 5.34, "learning_rate": 6.922577403228613e-05, "loss": 0.4946, "step": 14770, "task_loss": 0.4285615086555481 }, { "compression_loss": 0.0, "distillation_loss": 0.44623327255249023, "epoch": 5.34, "learning_rate": 6.920906169362248e-05, "loss": 0.4949, "step": 14780, "task_loss": 0.2965691089630127 }, { "compression_loss": 0.0, "distillation_loss": 0.46191853284835815, "epoch": 5.35, "learning_rate": 6.919217296153334e-05, "loss": 0.4888, "step": 14790, "task_loss": 0.44412630796432495 }, { "compression_loss": 0.0, "distillation_loss": 0.48645418882369995, "epoch": 5.35, "learning_rate": 6.917510792310273e-05, "loss": 0.4859, "step": 14800, "task_loss": 0.7811020612716675 }, { "compression_loss": 0.0, "distillation_loss": 0.5859341621398926, "epoch": 5.35, "learning_rate": 6.915786666632371e-05, "loss": 0.4877, "step": 14810, "task_loss": 0.5216963291168213 }, { "compression_loss": 0.0, "distillation_loss": 0.8215360641479492, "epoch": 5.36, "learning_rate": 6.914044928009801e-05, "loss": 0.5259, "step": 14820, "task_loss": 0.6187966465950012 }, { "compression_loss": 0.0, "distillation_loss": 0.5492468476295471, "epoch": 5.36, "learning_rate": 6.91228558542355e-05, "loss": 0.5066, "step": 14830, "task_loss": 0.8015393614768982 }, { "compression_loss": 0.0, "distillation_loss": 0.5337282419204712, "epoch": 5.36, "learning_rate": 6.910508647945382e-05, "loss": 0.4744, "step": 14840, "task_loss": 0.6190358400344849 }, { "compression_loss": 0.0, "distillation_loss": 0.4521459639072418, "epoch": 5.37, "learning_rate": 6.908714124737785e-05, "loss": 0.5216, "step": 14850, "task_loss": 0.6480435729026794 }, { "compression_loss": 0.0, "distillation_loss": 0.4558612108230591, "epoch": 5.37, "learning_rate": 6.906902025053921e-05, "loss": 0.5544, "step": 14860, "task_loss": 0.7935390472412109 }, { "compression_loss": 0.0, "distillation_loss": 0.4677425026893616, "epoch": 5.37, "learning_rate": 6.905072358237589e-05, "loss": 0.5016, "step": 14870, "task_loss": 0.836277961730957 }, { "compression_loss": 0.0, "distillation_loss": 0.601864218711853, "epoch": 5.38, "learning_rate": 6.903225133723164e-05, "loss": 0.4917, "step": 14880, "task_loss": 0.6987572908401489 }, { "compression_loss": 0.0, "distillation_loss": 0.44784462451934814, "epoch": 5.38, "learning_rate": 6.901360361035558e-05, "loss": 0.5084, "step": 14890, "task_loss": 0.40038225054740906 }, { "compression_loss": 0.0, "distillation_loss": 0.49896252155303955, "epoch": 5.38, "learning_rate": 6.899478049790166e-05, "loss": 0.4782, "step": 14900, "task_loss": 1.08770751953125 }, { "compression_loss": 0.0, "distillation_loss": 0.5893778800964355, "epoch": 5.39, "learning_rate": 6.897578209692816e-05, "loss": 0.5051, "step": 14910, "task_loss": 0.8863139152526855 }, { "compression_loss": 0.0, "distillation_loss": 0.596195638179779, "epoch": 5.39, "learning_rate": 6.895660850539724e-05, "loss": 0.5446, "step": 14920, "task_loss": 0.41577187180519104 }, { "compression_loss": 0.0, "distillation_loss": 0.49143242835998535, "epoch": 5.4, "learning_rate": 6.89372598221744e-05, "loss": 0.4568, "step": 14930, "task_loss": 0.6072136163711548 }, { "compression_loss": 0.0, "distillation_loss": 0.4943245053291321, "epoch": 5.4, "learning_rate": 6.891773614702792e-05, "loss": 0.4862, "step": 14940, "task_loss": 0.7640190124511719 }, { "compression_loss": 0.0, "distillation_loss": 0.660693347454071, "epoch": 5.4, "learning_rate": 6.889803758062846e-05, "loss": 0.5279, "step": 14950, "task_loss": 0.9499300718307495 }, { "compression_loss": 0.0, "distillation_loss": 0.46248817443847656, "epoch": 5.41, "learning_rate": 6.887816422454846e-05, "loss": 0.4533, "step": 14960, "task_loss": 0.48348772525787354 }, { "compression_loss": 0.0, "distillation_loss": 0.6976580023765564, "epoch": 5.41, "learning_rate": 6.885811618126159e-05, "loss": 0.4955, "step": 14970, "task_loss": 0.6008883714675903 }, { "compression_loss": 0.0, "distillation_loss": 0.5361173152923584, "epoch": 5.41, "learning_rate": 6.883789355414233e-05, "loss": 0.5052, "step": 14980, "task_loss": 0.601929783821106 }, { "compression_loss": 0.0, "distillation_loss": 0.45808547735214233, "epoch": 5.42, "learning_rate": 6.881749644746535e-05, "loss": 0.4697, "step": 14990, "task_loss": 0.6224072575569153 }, { "compression_loss": 0.0, "distillation_loss": 0.44869574904441833, "epoch": 5.42, "learning_rate": 6.879692496640498e-05, "loss": 0.5494, "step": 15000, "task_loss": 0.5406436920166016 }, { "epoch": 5.42, "eval_exact_match": 81.91106906338695, "eval_f1": 89.24159681473488, "step": 15000 }, { "compression_loss": 0.0, "distillation_loss": 0.44767728447914124, "epoch": 5.42, "learning_rate": 6.877617921703468e-05, "loss": 0.4827, "step": 15010, "task_loss": 0.668562650680542 }, { "compression_loss": 0.0, "distillation_loss": 0.5660895109176636, "epoch": 5.43, "learning_rate": 6.875525930632653e-05, "loss": 0.5327, "step": 15020, "task_loss": 1.034258246421814 }, { "compression_loss": 0.0, "distillation_loss": 0.35861772298812866, "epoch": 5.43, "learning_rate": 6.873416534215064e-05, "loss": 0.4806, "step": 15030, "task_loss": 0.42596346139907837 }, { "compression_loss": 0.0, "distillation_loss": 0.7044867277145386, "epoch": 5.44, "learning_rate": 6.871289743327455e-05, "loss": 0.5416, "step": 15040, "task_loss": 0.7539388537406921 }, { "compression_loss": 0.0, "distillation_loss": 0.43850019574165344, "epoch": 5.44, "learning_rate": 6.869145568936275e-05, "loss": 0.4962, "step": 15050, "task_loss": 0.5783032178878784 }, { "compression_loss": 0.0, "distillation_loss": 0.4840550124645233, "epoch": 5.44, "learning_rate": 6.866984022097612e-05, "loss": 0.5386, "step": 15060, "task_loss": 0.5743952989578247 }, { "compression_loss": 0.0, "distillation_loss": 0.663632869720459, "epoch": 5.45, "learning_rate": 6.864805113957123e-05, "loss": 0.4495, "step": 15070, "task_loss": 0.8771752119064331 }, { "compression_loss": 0.0, "distillation_loss": 0.41818153858184814, "epoch": 5.45, "learning_rate": 6.862608855749995e-05, "loss": 0.5517, "step": 15080, "task_loss": 0.8001173734664917 }, { "compression_loss": 0.0, "distillation_loss": 0.4510330557823181, "epoch": 5.45, "learning_rate": 6.860395258800871e-05, "loss": 0.5632, "step": 15090, "task_loss": 0.5343620181083679 }, { "compression_loss": 0.0, "distillation_loss": 0.5964381694793701, "epoch": 5.46, "learning_rate": 6.858164334523806e-05, "loss": 0.5241, "step": 15100, "task_loss": 0.8330484628677368 }, { "compression_loss": 0.0, "distillation_loss": 0.5306263566017151, "epoch": 5.46, "learning_rate": 6.85591609442219e-05, "loss": 0.4959, "step": 15110, "task_loss": 0.6922684907913208 }, { "compression_loss": 0.0, "distillation_loss": 0.46480873227119446, "epoch": 5.46, "learning_rate": 6.853650550088709e-05, "loss": 0.5503, "step": 15120, "task_loss": 0.5510062575340271 }, { "compression_loss": 0.0, "distillation_loss": 0.4726712107658386, "epoch": 5.47, "learning_rate": 6.85136771320527e-05, "loss": 0.4861, "step": 15130, "task_loss": 0.7079441547393799 }, { "compression_loss": 0.0, "distillation_loss": 0.635477602481842, "epoch": 5.47, "learning_rate": 6.849067595542946e-05, "loss": 0.5073, "step": 15140, "task_loss": 0.8118971586227417 }, { "compression_loss": 0.0, "distillation_loss": 0.5491691827774048, "epoch": 5.48, "learning_rate": 6.846750208961921e-05, "loss": 0.5041, "step": 15150, "task_loss": 0.645523726940155 }, { "compression_loss": 0.0, "distillation_loss": 0.33065298199653625, "epoch": 5.48, "learning_rate": 6.844415565411416e-05, "loss": 0.4876, "step": 15160, "task_loss": 0.7148600816726685 }, { "compression_loss": 0.0, "distillation_loss": 0.44501185417175293, "epoch": 5.48, "learning_rate": 6.84206367692964e-05, "loss": 0.45, "step": 15170, "task_loss": 0.5103074908256531 }, { "compression_loss": 0.0, "distillation_loss": 0.437028169631958, "epoch": 5.49, "learning_rate": 6.83969455564372e-05, "loss": 0.5025, "step": 15180, "task_loss": 0.6442121863365173 }, { "compression_loss": 0.0, "distillation_loss": 0.46236348152160645, "epoch": 5.49, "learning_rate": 6.837308213769643e-05, "loss": 0.4766, "step": 15190, "task_loss": 0.6244036555290222 }, { "compression_loss": 0.0, "distillation_loss": 0.4882710576057434, "epoch": 5.49, "learning_rate": 6.834904663612188e-05, "loss": 0.5092, "step": 15200, "task_loss": 0.4923517107963562 }, { "compression_loss": 0.0, "distillation_loss": 0.3791254758834839, "epoch": 5.5, "learning_rate": 6.832483917564871e-05, "loss": 0.4735, "step": 15210, "task_loss": 0.476325660943985 }, { "compression_loss": 0.0, "distillation_loss": 0.4039973020553589, "epoch": 5.5, "learning_rate": 6.830045988109869e-05, "loss": 0.5144, "step": 15220, "task_loss": 0.6185945868492126 }, { "compression_loss": 0.0, "distillation_loss": 0.6345975995063782, "epoch": 5.5, "learning_rate": 6.827590887817969e-05, "loss": 0.4869, "step": 15230, "task_loss": 0.6183285713195801 }, { "compression_loss": 0.0, "distillation_loss": 0.5272897481918335, "epoch": 5.51, "learning_rate": 6.825118629348493e-05, "loss": 0.4712, "step": 15240, "task_loss": 0.9741804599761963 }, { "compression_loss": 0.0, "distillation_loss": 0.34770917892456055, "epoch": 5.51, "learning_rate": 6.822629225449237e-05, "loss": 0.4669, "step": 15250, "task_loss": 0.3193807899951935 }, { "epoch": 5.51, "eval_exact_match": 81.79754020813624, "eval_f1": 89.026956018613, "step": 15250 }, { "compression_loss": 0.0, "distillation_loss": 0.5802989602088928, "epoch": 5.51, "learning_rate": 6.820122688956404e-05, "loss": 0.5421, "step": 15260, "task_loss": 0.6691771745681763 }, { "compression_loss": 0.0, "distillation_loss": 0.4500080645084381, "epoch": 5.52, "learning_rate": 6.817599032794539e-05, "loss": 0.478, "step": 15270, "task_loss": 0.7451514005661011 }, { "compression_loss": 0.0, "distillation_loss": 0.5461714267730713, "epoch": 5.52, "learning_rate": 6.815058269976462e-05, "loss": 0.5201, "step": 15280, "task_loss": 0.36617377400398254 }, { "compression_loss": 0.0, "distillation_loss": 0.5489475727081299, "epoch": 5.53, "learning_rate": 6.8125004136032e-05, "loss": 0.5483, "step": 15290, "task_loss": 0.44468170404434204 }, { "compression_loss": 0.0, "distillation_loss": 0.5517098307609558, "epoch": 5.53, "learning_rate": 6.809925476863924e-05, "loss": 0.5487, "step": 15300, "task_loss": 0.901218831539154 }, { "compression_loss": 0.0, "distillation_loss": 0.3634655177593231, "epoch": 5.53, "learning_rate": 6.807333473035868e-05, "loss": 0.4936, "step": 15310, "task_loss": 0.552862823009491 }, { "compression_loss": 0.0, "distillation_loss": 0.6331474184989929, "epoch": 5.54, "learning_rate": 6.804724415484278e-05, "loss": 0.5806, "step": 15320, "task_loss": 0.6623814105987549 }, { "compression_loss": 0.0, "distillation_loss": 0.5808169841766357, "epoch": 5.54, "learning_rate": 6.802098317662334e-05, "loss": 0.5248, "step": 15330, "task_loss": 0.5808233022689819 }, { "compression_loss": 0.0, "distillation_loss": 0.482837051153183, "epoch": 5.54, "learning_rate": 6.799455193111077e-05, "loss": 0.4598, "step": 15340, "task_loss": 0.7158746719360352 }, { "compression_loss": 0.0, "distillation_loss": 0.6607789993286133, "epoch": 5.55, "learning_rate": 6.796795055459347e-05, "loss": 0.5437, "step": 15350, "task_loss": 1.0560665130615234 }, { "compression_loss": 0.0, "distillation_loss": 0.4435341954231262, "epoch": 5.55, "learning_rate": 6.794117918423706e-05, "loss": 0.5636, "step": 15360, "task_loss": 0.648557186126709 }, { "compression_loss": 0.0, "distillation_loss": 0.4403044581413269, "epoch": 5.55, "learning_rate": 6.791423795808378e-05, "loss": 0.4541, "step": 15370, "task_loss": 0.4011836051940918 }, { "compression_loss": 0.0, "distillation_loss": 0.4444364309310913, "epoch": 5.56, "learning_rate": 6.78871270150516e-05, "loss": 0.5244, "step": 15380, "task_loss": 0.5661810636520386 }, { "compression_loss": 0.0, "distillation_loss": 0.7234218716621399, "epoch": 5.56, "learning_rate": 6.785984649493368e-05, "loss": 0.5102, "step": 15390, "task_loss": 1.0226433277130127 }, { "compression_loss": 0.0, "distillation_loss": 0.4961375296115875, "epoch": 5.57, "learning_rate": 6.783239653839755e-05, "loss": 0.4689, "step": 15400, "task_loss": 0.6303238868713379 }, { "compression_loss": 0.0, "distillation_loss": 0.4570309519767761, "epoch": 5.57, "learning_rate": 6.780477728698443e-05, "loss": 0.4629, "step": 15410, "task_loss": 0.5990092158317566 }, { "compression_loss": 0.0, "distillation_loss": 0.4736815094947815, "epoch": 5.57, "learning_rate": 6.777698888310844e-05, "loss": 0.5097, "step": 15420, "task_loss": 0.5480905175209045 }, { "compression_loss": 0.0, "distillation_loss": 0.6659838557243347, "epoch": 5.58, "learning_rate": 6.774903147005596e-05, "loss": 0.4949, "step": 15430, "task_loss": 0.7123420834541321 }, { "compression_loss": 0.0, "distillation_loss": 0.5285578966140747, "epoch": 5.58, "learning_rate": 6.772090519198481e-05, "loss": 0.4649, "step": 15440, "task_loss": 0.6163167953491211 }, { "compression_loss": 0.0, "distillation_loss": 0.564858615398407, "epoch": 5.58, "learning_rate": 6.769261019392352e-05, "loss": 0.4921, "step": 15450, "task_loss": 1.2447395324707031 }, { "compression_loss": 0.0, "distillation_loss": 0.47404301166534424, "epoch": 5.59, "learning_rate": 6.766414662177064e-05, "loss": 0.5179, "step": 15460, "task_loss": 0.5387969613075256 }, { "compression_loss": 0.0, "distillation_loss": 0.47900664806365967, "epoch": 5.59, "learning_rate": 6.76355146222939e-05, "loss": 0.5314, "step": 15470, "task_loss": 0.7243887186050415 }, { "compression_loss": 0.0, "distillation_loss": 0.48273512721061707, "epoch": 5.59, "learning_rate": 6.760671434312953e-05, "loss": 0.4921, "step": 15480, "task_loss": 0.7808024287223816 }, { "compression_loss": 0.0, "distillation_loss": 0.52320396900177, "epoch": 5.6, "learning_rate": 6.757774593278144e-05, "loss": 0.5465, "step": 15490, "task_loss": 0.8334228992462158 }, { "compression_loss": 0.0, "distillation_loss": 0.531326949596405, "epoch": 5.6, "learning_rate": 6.75486095406205e-05, "loss": 0.5283, "step": 15500, "task_loss": 1.0520827770233154 }, { "epoch": 5.6, "eval_exact_match": 82.081362346263, "eval_f1": 89.29942274768753, "step": 15500 }, { "compression_loss": 0.0, "distillation_loss": 0.6806000471115112, "epoch": 5.61, "learning_rate": 6.751930531688376e-05, "loss": 0.5496, "step": 15510, "task_loss": 0.7352679967880249 }, { "compression_loss": 0.0, "distillation_loss": 0.7652064561843872, "epoch": 5.61, "learning_rate": 6.748983341267363e-05, "loss": 0.5732, "step": 15520, "task_loss": 0.9669309854507446 }, { "compression_loss": 0.0, "distillation_loss": 0.3168491721153259, "epoch": 5.61, "learning_rate": 6.746019397995717e-05, "loss": 0.547, "step": 15530, "task_loss": 0.6193458437919617 }, { "compression_loss": 0.0, "distillation_loss": 0.2838849723339081, "epoch": 5.62, "learning_rate": 6.743038717156527e-05, "loss": 0.5339, "step": 15540, "task_loss": 0.47925713658332825 }, { "compression_loss": 0.0, "distillation_loss": 0.5559385418891907, "epoch": 5.62, "learning_rate": 6.740041314119183e-05, "loss": 0.5182, "step": 15550, "task_loss": 0.6141558885574341 }, { "compression_loss": 0.0, "distillation_loss": 0.45640748739242554, "epoch": 5.62, "learning_rate": 6.737027204339306e-05, "loss": 0.519, "step": 15560, "task_loss": 0.5364757776260376 }, { "compression_loss": 0.0, "distillation_loss": 0.4492012858390808, "epoch": 5.63, "learning_rate": 6.733996403358657e-05, "loss": 0.4872, "step": 15570, "task_loss": 0.5102091431617737 }, { "compression_loss": 0.0, "distillation_loss": 0.4823322892189026, "epoch": 5.63, "learning_rate": 6.730948926805068e-05, "loss": 0.4538, "step": 15580, "task_loss": 0.6215452551841736 }, { "compression_loss": 0.0, "distillation_loss": 0.6813780069351196, "epoch": 5.63, "learning_rate": 6.72788479039235e-05, "loss": 0.4924, "step": 15590, "task_loss": 0.8112049698829651 }, { "compression_loss": 0.0, "distillation_loss": 0.4826444387435913, "epoch": 5.64, "learning_rate": 6.724804009920222e-05, "loss": 0.5361, "step": 15600, "task_loss": 0.40426796674728394 }, { "compression_loss": 0.0, "distillation_loss": 0.3255290389060974, "epoch": 5.64, "learning_rate": 6.721706601274225e-05, "loss": 0.5145, "step": 15610, "task_loss": 0.38015216588974 }, { "compression_loss": 0.0, "distillation_loss": 0.44477927684783936, "epoch": 5.65, "learning_rate": 6.718592580425637e-05, "loss": 0.4861, "step": 15620, "task_loss": 0.6369027495384216 }, { "compression_loss": 0.0, "distillation_loss": 0.7055771946907043, "epoch": 5.65, "learning_rate": 6.715461963431398e-05, "loss": 0.5927, "step": 15630, "task_loss": 1.0351436138153076 }, { "compression_loss": 0.0, "distillation_loss": 0.4551340341567993, "epoch": 5.65, "learning_rate": 6.712314766434023e-05, "loss": 0.5257, "step": 15640, "task_loss": 0.7271813154220581 }, { "compression_loss": 0.0, "distillation_loss": 0.621050238609314, "epoch": 5.66, "learning_rate": 6.709151005661517e-05, "loss": 0.4921, "step": 15650, "task_loss": 1.096197247505188 }, { "compression_loss": 0.0, "distillation_loss": 0.5006800293922424, "epoch": 5.66, "learning_rate": 6.705970697427294e-05, "loss": 0.517, "step": 15660, "task_loss": 0.6352694630622864 }, { "compression_loss": 0.0, "distillation_loss": 0.6934675574302673, "epoch": 5.66, "learning_rate": 6.702773858130093e-05, "loss": 0.5508, "step": 15670, "task_loss": 0.721714437007904 }, { "compression_loss": 0.0, "distillation_loss": 0.3144254684448242, "epoch": 5.67, "learning_rate": 6.699560504253894e-05, "loss": 0.4457, "step": 15680, "task_loss": 0.34663382172584534 }, { "compression_loss": 0.0, "distillation_loss": 0.48802831768989563, "epoch": 5.67, "learning_rate": 6.696330652367827e-05, "loss": 0.6041, "step": 15690, "task_loss": 0.6961667537689209 }, { "compression_loss": 0.0, "distillation_loss": 0.4313547611236572, "epoch": 5.67, "learning_rate": 6.693084319126098e-05, "loss": 0.5342, "step": 15700, "task_loss": 0.3994500935077667 }, { "compression_loss": 0.0, "distillation_loss": 0.4418693780899048, "epoch": 5.68, "learning_rate": 6.68982152126789e-05, "loss": 0.5396, "step": 15710, "task_loss": 0.8848820924758911 }, { "compression_loss": 0.0, "distillation_loss": 0.5503214597702026, "epoch": 5.68, "learning_rate": 6.686542275617286e-05, "loss": 0.5216, "step": 15720, "task_loss": 0.5391052961349487 }, { "compression_loss": 0.0, "distillation_loss": 0.53646320104599, "epoch": 5.68, "learning_rate": 6.683246599083182e-05, "loss": 0.5336, "step": 15730, "task_loss": 1.1558949947357178 }, { "compression_loss": 0.0, "distillation_loss": 0.3631494641304016, "epoch": 5.69, "learning_rate": 6.679934508659192e-05, "loss": 0.4783, "step": 15740, "task_loss": 0.4442519545555115 }, { "compression_loss": 0.0, "distillation_loss": 0.5602353811264038, "epoch": 5.69, "learning_rate": 6.676606021423567e-05, "loss": 0.5216, "step": 15750, "task_loss": 0.6735915541648865 }, { "epoch": 5.69, "eval_exact_match": 82.52601702932829, "eval_f1": 89.46075198123665, "step": 15750 }, { "compression_loss": 0.0, "distillation_loss": 0.5539681911468506, "epoch": 5.7, "learning_rate": 6.673261154539109e-05, "loss": 0.4855, "step": 15760, "task_loss": 0.7846614718437195 }, { "compression_loss": 0.0, "distillation_loss": 0.4747917652130127, "epoch": 5.7, "learning_rate": 6.669899925253077e-05, "loss": 0.4877, "step": 15770, "task_loss": 0.5126049518585205 }, { "compression_loss": 0.0, "distillation_loss": 0.4245802164077759, "epoch": 5.7, "learning_rate": 6.666522350897096e-05, "loss": 0.4613, "step": 15780, "task_loss": 0.37426063418388367 }, { "compression_loss": 0.0, "distillation_loss": 0.5295876264572144, "epoch": 5.71, "learning_rate": 6.663128448887077e-05, "loss": 0.5046, "step": 15790, "task_loss": 0.9215235710144043 }, { "compression_loss": 0.0, "distillation_loss": 0.3816160559654236, "epoch": 5.71, "learning_rate": 6.65971823672312e-05, "loss": 0.4756, "step": 15800, "task_loss": 0.7307689189910889 }, { "compression_loss": 0.0, "distillation_loss": 0.38228023052215576, "epoch": 5.71, "learning_rate": 6.656291731989428e-05, "loss": 0.5442, "step": 15810, "task_loss": 0.6373156309127808 }, { "compression_loss": 0.0, "distillation_loss": 0.5242533683776855, "epoch": 5.72, "learning_rate": 6.65319396218302e-05, "loss": 0.5753, "step": 15820, "task_loss": 0.5102771520614624 }, { "compression_loss": 0.0, "distillation_loss": 0.5250728726387024, "epoch": 5.72, "learning_rate": 6.649736550312107e-05, "loss": 0.5102, "step": 15830, "task_loss": 0.2875036597251892 }, { "compression_loss": 0.0, "distillation_loss": 0.6699880957603455, "epoch": 5.72, "learning_rate": 6.646262897340388e-05, "loss": 0.5943, "step": 15840, "task_loss": 0.606027364730835 }, { "compression_loss": 0.0, "distillation_loss": 0.4970390796661377, "epoch": 5.73, "learning_rate": 6.642773021179186e-05, "loss": 0.5663, "step": 15850, "task_loss": 0.7026304006576538 }, { "compression_loss": 0.0, "distillation_loss": 0.47369736433029175, "epoch": 5.73, "learning_rate": 6.639266939823477e-05, "loss": 0.491, "step": 15860, "task_loss": 0.6342217326164246 }, { "compression_loss": 0.0, "distillation_loss": 0.3805849254131317, "epoch": 5.74, "learning_rate": 6.635744671351794e-05, "loss": 0.4797, "step": 15870, "task_loss": 0.6766672730445862 }, { "compression_loss": 0.0, "distillation_loss": 0.502840518951416, "epoch": 5.74, "learning_rate": 6.632206233926139e-05, "loss": 0.5094, "step": 15880, "task_loss": 0.918179452419281 }, { "compression_loss": 0.0, "distillation_loss": 0.36127620935440063, "epoch": 5.74, "learning_rate": 6.628651645791885e-05, "loss": 0.462, "step": 15890, "task_loss": 0.4396474361419678 }, { "compression_loss": 0.0, "distillation_loss": 0.6621323823928833, "epoch": 5.75, "learning_rate": 6.625080925277681e-05, "loss": 0.557, "step": 15900, "task_loss": 0.8044998049736023 }, { "compression_loss": 0.0, "distillation_loss": 0.38126713037490845, "epoch": 5.75, "learning_rate": 6.621494090795367e-05, "loss": 0.54, "step": 15910, "task_loss": 0.8128173351287842 }, { "compression_loss": 0.0, "distillation_loss": 0.4991961121559143, "epoch": 5.75, "learning_rate": 6.617891160839865e-05, "loss": 0.478, "step": 15920, "task_loss": 0.7899810671806335 }, { "compression_loss": 0.0, "distillation_loss": 0.36616766452789307, "epoch": 5.76, "learning_rate": 6.614272153989095e-05, "loss": 0.5037, "step": 15930, "task_loss": 0.43909940123558044 }, { "compression_loss": 0.0, "distillation_loss": 0.4640388488769531, "epoch": 5.76, "learning_rate": 6.61063708890387e-05, "loss": 0.5291, "step": 15940, "task_loss": 0.7651124000549316 }, { "compression_loss": 0.0, "distillation_loss": 0.43500208854675293, "epoch": 5.76, "learning_rate": 6.606985984327813e-05, "loss": 0.5923, "step": 15950, "task_loss": 0.37657132744789124 }, { "compression_loss": 0.0, "distillation_loss": 0.4381626844406128, "epoch": 5.77, "learning_rate": 6.603318859087243e-05, "loss": 0.4844, "step": 15960, "task_loss": 0.4404764771461487 }, { "compression_loss": 0.0, "distillation_loss": 0.4864192306995392, "epoch": 5.77, "learning_rate": 6.599635732091092e-05, "loss": 0.456, "step": 15970, "task_loss": 0.7914941310882568 }, { "compression_loss": 0.0, "distillation_loss": 0.49039989709854126, "epoch": 5.78, "learning_rate": 6.595936622330802e-05, "loss": 0.5051, "step": 15980, "task_loss": 0.4711025357246399 }, { "compression_loss": 0.0, "distillation_loss": 0.36692336201667786, "epoch": 5.78, "learning_rate": 6.592221548880224e-05, "loss": 0.4975, "step": 15990, "task_loss": 0.5510057210922241 }, { "compression_loss": 0.0, "distillation_loss": 0.6534066200256348, "epoch": 5.78, "learning_rate": 6.58849053089553e-05, "loss": 0.4932, "step": 16000, "task_loss": 0.8516380786895752 }, { "epoch": 5.78, "eval_exact_match": 82.42194891201514, "eval_f1": 89.36242276057637, "step": 16000 }, { "compression_loss": 0.0, "distillation_loss": 0.7806046605110168, "epoch": 5.79, "learning_rate": 6.584743587615102e-05, "loss": 0.5337, "step": 16010, "task_loss": 0.8906459808349609 }, { "compression_loss": 0.0, "distillation_loss": 0.4783743619918823, "epoch": 5.79, "learning_rate": 6.580980738359438e-05, "loss": 0.4975, "step": 16020, "task_loss": 0.3754185140132904 }, { "compression_loss": 0.0, "distillation_loss": 0.5132753849029541, "epoch": 5.79, "learning_rate": 6.577202002531056e-05, "loss": 0.4836, "step": 16030, "task_loss": 0.824865460395813 }, { "compression_loss": 0.0, "distillation_loss": 0.708114743232727, "epoch": 5.8, "learning_rate": 6.573407399614388e-05, "loss": 0.5799, "step": 16040, "task_loss": 0.8528615832328796 }, { "compression_loss": 0.0, "distillation_loss": 0.8225008845329285, "epoch": 5.8, "learning_rate": 6.569596949175681e-05, "loss": 0.5178, "step": 16050, "task_loss": 1.1864490509033203 }, { "compression_loss": 0.0, "distillation_loss": 0.663002073764801, "epoch": 5.8, "learning_rate": 6.5657706708629e-05, "loss": 0.5239, "step": 16060, "task_loss": 0.9480224251747131 }, { "compression_loss": 0.0, "distillation_loss": 0.7957673072814941, "epoch": 5.81, "learning_rate": 6.561928584405624e-05, "loss": 0.5899, "step": 16070, "task_loss": 0.778226912021637 }, { "compression_loss": 0.0, "distillation_loss": 0.47262027859687805, "epoch": 5.81, "learning_rate": 6.558070709614942e-05, "loss": 0.5108, "step": 16080, "task_loss": 0.9569321870803833 }, { "compression_loss": 0.0, "distillation_loss": 0.5292235612869263, "epoch": 5.81, "learning_rate": 6.55419706638335e-05, "loss": 0.5378, "step": 16090, "task_loss": 0.8886809349060059 }, { "compression_loss": 0.0, "distillation_loss": 0.40345680713653564, "epoch": 5.82, "learning_rate": 6.550307674684662e-05, "loss": 0.503, "step": 16100, "task_loss": 0.5904554128646851 }, { "compression_loss": 0.0, "distillation_loss": 0.41043514013290405, "epoch": 5.82, "learning_rate": 6.546402554573885e-05, "loss": 0.4817, "step": 16110, "task_loss": 0.40754806995391846 }, { "compression_loss": 0.0, "distillation_loss": 0.5127359628677368, "epoch": 5.83, "learning_rate": 6.54248172618713e-05, "loss": 0.5225, "step": 16120, "task_loss": 0.7525652647018433 }, { "compression_loss": 0.0, "distillation_loss": 0.7493129968643188, "epoch": 5.83, "learning_rate": 6.538545209741511e-05, "loss": 0.4626, "step": 16130, "task_loss": 0.5919231176376343 }, { "compression_loss": 0.0, "distillation_loss": 0.5420757532119751, "epoch": 5.83, "learning_rate": 6.534593025535028e-05, "loss": 0.5729, "step": 16140, "task_loss": 0.7412921190261841 }, { "compression_loss": 0.0, "distillation_loss": 0.6038966178894043, "epoch": 5.84, "learning_rate": 6.530625193946472e-05, "loss": 0.5041, "step": 16150, "task_loss": 0.8964495658874512 }, { "compression_loss": 0.0, "distillation_loss": 0.46763065457344055, "epoch": 5.84, "learning_rate": 6.526641735435317e-05, "loss": 0.4891, "step": 16160, "task_loss": 0.6115823984146118 }, { "compression_loss": 0.0, "distillation_loss": 0.5795122385025024, "epoch": 5.84, "learning_rate": 6.522642670541613e-05, "loss": 0.5359, "step": 16170, "task_loss": 0.57799232006073 }, { "compression_loss": 0.0, "distillation_loss": 0.40503954887390137, "epoch": 5.85, "learning_rate": 6.518628019885888e-05, "loss": 0.4867, "step": 16180, "task_loss": 0.34715238213539124 }, { "compression_loss": 0.0, "distillation_loss": 0.5611152648925781, "epoch": 5.85, "learning_rate": 6.514597804169025e-05, "loss": 0.5164, "step": 16190, "task_loss": 0.9298588633537292 }, { "compression_loss": 0.0, "distillation_loss": 0.6309351325035095, "epoch": 5.85, "learning_rate": 6.510552044172176e-05, "loss": 0.4337, "step": 16200, "task_loss": 0.5857481956481934 }, { "compression_loss": 0.0, "distillation_loss": 0.5015100240707397, "epoch": 5.86, "learning_rate": 6.506490760756639e-05, "loss": 0.4809, "step": 16210, "task_loss": 0.5657186508178711 }, { "compression_loss": 0.0, "distillation_loss": 0.44960838556289673, "epoch": 5.86, "learning_rate": 6.502413974863753e-05, "loss": 0.4203, "step": 16220, "task_loss": 0.8054425120353699 }, { "compression_loss": 0.0, "distillation_loss": 0.40969163179397583, "epoch": 5.87, "learning_rate": 6.498321707514802e-05, "loss": 0.448, "step": 16230, "task_loss": 0.49577945470809937 }, { "compression_loss": 0.0, "distillation_loss": 0.6154443025588989, "epoch": 5.87, "learning_rate": 6.494213979810891e-05, "loss": 0.5039, "step": 16240, "task_loss": 0.8134857416152954 }, { "compression_loss": 0.0, "distillation_loss": 0.5586275458335876, "epoch": 5.87, "learning_rate": 6.490090812932844e-05, "loss": 0.5517, "step": 16250, "task_loss": 0.3241579532623291 }, { "epoch": 5.87, "eval_exact_match": 81.8070009460738, "eval_f1": 89.10508584535329, "step": 16250 }, { "compression_loss": 0.0, "distillation_loss": 0.5156681537628174, "epoch": 5.88, "learning_rate": 6.485952228141097e-05, "loss": 0.4911, "step": 16260, "task_loss": 0.801060140132904 }, { "compression_loss": 0.0, "distillation_loss": 0.5016605854034424, "epoch": 5.88, "learning_rate": 6.481798246775586e-05, "loss": 0.5218, "step": 16270, "task_loss": 0.8665897846221924 }, { "compression_loss": 0.0, "distillation_loss": 0.36766764521598816, "epoch": 5.88, "learning_rate": 6.477628890255634e-05, "loss": 0.5, "step": 16280, "task_loss": 0.3850083351135254 }, { "compression_loss": 0.0, "distillation_loss": 0.44555163383483887, "epoch": 5.89, "learning_rate": 6.473444180079845e-05, "loss": 0.4808, "step": 16290, "task_loss": 0.6322591304779053 }, { "compression_loss": 0.0, "distillation_loss": 0.5872722268104553, "epoch": 5.89, "learning_rate": 6.469244137825993e-05, "loss": 0.4952, "step": 16300, "task_loss": 0.8481529951095581 }, { "compression_loss": 0.0, "distillation_loss": 0.4090435802936554, "epoch": 5.89, "learning_rate": 6.465028785150908e-05, "loss": 0.6102, "step": 16310, "task_loss": 0.7143661975860596 }, { "compression_loss": 0.0, "distillation_loss": 0.3229040503501892, "epoch": 5.9, "learning_rate": 6.460798143790366e-05, "loss": 0.5282, "step": 16320, "task_loss": 0.5130259990692139 }, { "compression_loss": 0.0, "distillation_loss": 0.6251620650291443, "epoch": 5.9, "learning_rate": 6.456552235558976e-05, "loss": 0.5402, "step": 16330, "task_loss": 0.684100866317749 }, { "compression_loss": 0.0, "distillation_loss": 0.48610326647758484, "epoch": 5.91, "learning_rate": 6.452291082350068e-05, "loss": 0.4923, "step": 16340, "task_loss": 0.6871728897094727 }, { "compression_loss": 0.0, "distillation_loss": 0.42472466826438904, "epoch": 5.91, "learning_rate": 6.448014706135582e-05, "loss": 0.4976, "step": 16350, "task_loss": 0.5411844253540039 }, { "compression_loss": 0.0, "distillation_loss": 0.4997144937515259, "epoch": 5.91, "learning_rate": 6.443723128965951e-05, "loss": 0.5019, "step": 16360, "task_loss": 0.5415253639221191 }, { "compression_loss": 0.0, "distillation_loss": 0.4492264986038208, "epoch": 5.92, "learning_rate": 6.439416372969992e-05, "loss": 0.5249, "step": 16370, "task_loss": 0.951138973236084 }, { "compression_loss": 0.0, "distillation_loss": 0.6077766418457031, "epoch": 5.92, "learning_rate": 6.435094460354784e-05, "loss": 0.5132, "step": 16380, "task_loss": 0.8307206630706787 }, { "compression_loss": 0.0, "distillation_loss": 0.5573058724403381, "epoch": 5.92, "learning_rate": 6.430757413405562e-05, "loss": 0.5879, "step": 16390, "task_loss": 0.5353271961212158 }, { "compression_loss": 0.0, "distillation_loss": 0.4729651212692261, "epoch": 5.93, "learning_rate": 6.426405254485603e-05, "loss": 0.5174, "step": 16400, "task_loss": 0.5413171052932739 }, { "compression_loss": 0.0, "distillation_loss": 0.5121825337409973, "epoch": 5.93, "learning_rate": 6.422038006036097e-05, "loss": 0.5453, "step": 16410, "task_loss": 0.8983778357505798 }, { "compression_loss": 0.0, "distillation_loss": 0.4299796223640442, "epoch": 5.93, "learning_rate": 6.417655690576046e-05, "loss": 0.6098, "step": 16420, "task_loss": 0.8138757944107056 }, { "compression_loss": 0.0, "distillation_loss": 0.7405078411102295, "epoch": 5.94, "learning_rate": 6.413258330702145e-05, "loss": 0.5537, "step": 16430, "task_loss": 0.8167030811309814 }, { "compression_loss": 0.0, "distillation_loss": 0.4909617304801941, "epoch": 5.94, "learning_rate": 6.408845949088657e-05, "loss": 0.4987, "step": 16440, "task_loss": 0.5977470874786377 }, { "compression_loss": 0.0, "distillation_loss": 0.49547016620635986, "epoch": 5.95, "learning_rate": 6.404418568487308e-05, "loss": 0.5069, "step": 16450, "task_loss": 0.8176012635231018 }, { "compression_loss": 0.0, "distillation_loss": 0.3618711531162262, "epoch": 5.95, "learning_rate": 6.39997621172716e-05, "loss": 0.4346, "step": 16460, "task_loss": 0.3624705672264099 }, { "compression_loss": 0.0, "distillation_loss": 0.4783799648284912, "epoch": 5.95, "learning_rate": 6.395518901714497e-05, "loss": 0.5274, "step": 16470, "task_loss": 0.9623761177062988 }, { "compression_loss": 0.0, "distillation_loss": 0.33645719289779663, "epoch": 5.96, "learning_rate": 6.391046661432711e-05, "loss": 0.5243, "step": 16480, "task_loss": 0.6531622409820557 }, { "compression_loss": 0.0, "distillation_loss": 0.4208140969276428, "epoch": 5.96, "learning_rate": 6.386559513942175e-05, "loss": 0.4774, "step": 16490, "task_loss": 0.6000040769577026 }, { "compression_loss": 0.0, "distillation_loss": 0.6129087209701538, "epoch": 5.96, "learning_rate": 6.38205748238013e-05, "loss": 0.5298, "step": 16500, "task_loss": 0.7589020729064941 }, { "epoch": 5.96, "eval_exact_match": 81.72185430463576, "eval_f1": 89.09431894529595, "step": 16500 }, { "compression_loss": 0.0, "distillation_loss": 0.560535192489624, "epoch": 5.97, "learning_rate": 6.377540589960567e-05, "loss": 0.5601, "step": 16510, "task_loss": 0.5373163819313049 }, { "compression_loss": 0.0, "distillation_loss": 0.5602094531059265, "epoch": 5.97, "learning_rate": 6.373008859974099e-05, "loss": 0.5261, "step": 16520, "task_loss": 1.0255897045135498 }, { "compression_loss": 0.0, "distillation_loss": 0.4283807873725891, "epoch": 5.97, "learning_rate": 6.36846231578785e-05, "loss": 0.5167, "step": 16530, "task_loss": 0.655482292175293 }, { "compression_loss": 0.0, "distillation_loss": 0.4438017010688782, "epoch": 5.98, "learning_rate": 6.363900980845333e-05, "loss": 0.4951, "step": 16540, "task_loss": 0.4216102361679077 }, { "compression_loss": 0.0, "distillation_loss": 0.5425772070884705, "epoch": 5.98, "learning_rate": 6.359324878666324e-05, "loss": 0.5276, "step": 16550, "task_loss": 0.9083492755889893 }, { "compression_loss": 0.0, "distillation_loss": 0.610127329826355, "epoch": 5.98, "learning_rate": 6.354734032846744e-05, "loss": 0.5713, "step": 16560, "task_loss": 1.105069875717163 }, { "compression_loss": 0.0, "distillation_loss": 0.40713953971862793, "epoch": 5.99, "learning_rate": 6.350128467058539e-05, "loss": 0.4681, "step": 16570, "task_loss": 0.43503206968307495 }, { "compression_loss": 0.0, "distillation_loss": 0.7117682695388794, "epoch": 5.99, "learning_rate": 6.345508205049552e-05, "loss": 0.5597, "step": 16580, "task_loss": 0.6516414880752563 }, { "compression_loss": 0.0, "distillation_loss": 0.7078899145126343, "epoch": 6.0, "learning_rate": 6.340873270643411e-05, "loss": 0.5851, "step": 16590, "task_loss": 0.6792216300964355 }, { "compression_loss": 0.0, "distillation_loss": 0.6714178323745728, "epoch": 6.0, "learning_rate": 6.336223687739394e-05, "loss": 0.5065, "step": 16600, "task_loss": 0.7536165118217468 }, { "compression_loss": 0.0, "distillation_loss": 0.32467812299728394, "epoch": 6.0, "learning_rate": 6.331559480312315e-05, "loss": 0.4598, "step": 16610, "task_loss": 0.392375111579895 }, { "compression_loss": 0.0, "distillation_loss": 0.4250606298446655, "epoch": 6.01, "learning_rate": 6.326880672412396e-05, "loss": 0.3817, "step": 16620, "task_loss": 0.5436099171638489 }, { "compression_loss": 0.0, "distillation_loss": 0.40928852558135986, "epoch": 6.01, "learning_rate": 6.322187288165144e-05, "loss": 0.4347, "step": 16630, "task_loss": 0.4154428243637085 }, { "compression_loss": 0.0, "distillation_loss": 0.593374490737915, "epoch": 6.01, "learning_rate": 6.317479351771226e-05, "loss": 0.4448, "step": 16640, "task_loss": 1.1337848901748657 }, { "compression_loss": 0.0, "distillation_loss": 0.6083024144172668, "epoch": 6.02, "learning_rate": 6.312756887506345e-05, "loss": 0.4901, "step": 16650, "task_loss": 1.052654504776001 }, { "compression_loss": 0.0, "distillation_loss": 0.39533329010009766, "epoch": 6.02, "learning_rate": 6.308019919721113e-05, "loss": 0.4397, "step": 16660, "task_loss": 0.5021288394927979 }, { "compression_loss": 0.0, "distillation_loss": 0.42493703961372375, "epoch": 6.02, "learning_rate": 6.303268472840934e-05, "loss": 0.4398, "step": 16670, "task_loss": 0.6849949359893799 }, { "compression_loss": 0.0, "distillation_loss": 0.39546722173690796, "epoch": 6.03, "learning_rate": 6.29850257136586e-05, "loss": 0.4657, "step": 16680, "task_loss": 0.6547192931175232 }, { "compression_loss": 0.0, "distillation_loss": 0.57149338722229, "epoch": 6.03, "learning_rate": 6.293722239870485e-05, "loss": 0.4044, "step": 16690, "task_loss": 0.834121584892273 }, { "compression_loss": 0.0, "distillation_loss": 0.3922726511955261, "epoch": 6.04, "learning_rate": 6.288927503003805e-05, "loss": 0.429, "step": 16700, "task_loss": 0.40333420038223267 }, { "compression_loss": 0.0, "distillation_loss": 0.3624475598335266, "epoch": 6.04, "learning_rate": 6.284118385489095e-05, "loss": 0.4963, "step": 16710, "task_loss": 0.7349679470062256 }, { "compression_loss": 0.0, "distillation_loss": 0.40686216950416565, "epoch": 6.04, "learning_rate": 6.279294912123784e-05, "loss": 0.409, "step": 16720, "task_loss": 0.7947590947151184 }, { "compression_loss": 0.0, "distillation_loss": 0.395913302898407, "epoch": 6.05, "learning_rate": 6.27445710777932e-05, "loss": 0.3941, "step": 16730, "task_loss": 0.6178956031799316 }, { "compression_loss": 0.0, "distillation_loss": 0.42935115098953247, "epoch": 6.05, "learning_rate": 6.269604997401051e-05, "loss": 0.4276, "step": 16740, "task_loss": 0.6020127534866333 }, { "compression_loss": 0.0, "distillation_loss": 0.41655611991882324, "epoch": 6.05, "learning_rate": 6.264738606008087e-05, "loss": 0.4512, "step": 16750, "task_loss": 0.4740205407142639 }, { "epoch": 6.05, "eval_exact_match": 82.19489120151371, "eval_f1": 89.3732801354867, "step": 16750 }, { "compression_loss": 0.0, "distillation_loss": 0.4346901774406433, "epoch": 6.06, "learning_rate": 6.259857958693182e-05, "loss": 0.3777, "step": 16760, "task_loss": 0.8214448690414429 }, { "compression_loss": 0.0, "distillation_loss": 0.5657368302345276, "epoch": 6.06, "learning_rate": 6.254963080622591e-05, "loss": 0.4877, "step": 16770, "task_loss": 0.7357831001281738 }, { "compression_loss": 0.0, "distillation_loss": 0.5232205986976624, "epoch": 6.06, "learning_rate": 6.250053997035956e-05, "loss": 0.4555, "step": 16780, "task_loss": 1.022857904434204 }, { "compression_loss": 0.0, "distillation_loss": 0.43613356351852417, "epoch": 6.07, "learning_rate": 6.245130733246159e-05, "loss": 0.4097, "step": 16790, "task_loss": 1.0982863903045654 }, { "compression_loss": 0.0, "distillation_loss": 0.4957405924797058, "epoch": 6.07, "learning_rate": 6.240193314639205e-05, "loss": 0.4478, "step": 16800, "task_loss": 1.0710291862487793 }, { "compression_loss": 0.0, "distillation_loss": 0.3766104280948639, "epoch": 6.08, "learning_rate": 6.235241766674084e-05, "loss": 0.4634, "step": 16810, "task_loss": 0.608417272567749 }, { "compression_loss": 0.0, "distillation_loss": 0.38640034198760986, "epoch": 6.08, "learning_rate": 6.230276114882642e-05, "loss": 0.3841, "step": 16820, "task_loss": 0.48826298117637634 }, { "compression_loss": 0.0, "distillation_loss": 0.4803047776222229, "epoch": 6.08, "learning_rate": 6.225296384869451e-05, "loss": 0.4249, "step": 16830, "task_loss": 0.7844042778015137 }, { "compression_loss": 0.0, "distillation_loss": 0.3516848683357239, "epoch": 6.09, "learning_rate": 6.220302602311674e-05, "loss": 0.4653, "step": 16840, "task_loss": 0.49346923828125 }, { "compression_loss": 0.0, "distillation_loss": 0.499299556016922, "epoch": 6.09, "learning_rate": 6.21529479295893e-05, "loss": 0.4503, "step": 16850, "task_loss": 0.5536627769470215 }, { "compression_loss": 0.0, "distillation_loss": 0.29132914543151855, "epoch": 6.09, "learning_rate": 6.21027298263317e-05, "loss": 0.3977, "step": 16860, "task_loss": 0.4496367275714874 }, { "compression_loss": 0.0, "distillation_loss": 0.4155663847923279, "epoch": 6.1, "learning_rate": 6.205237197228537e-05, "loss": 0.4114, "step": 16870, "task_loss": 0.8739005327224731 }, { "compression_loss": 0.0, "distillation_loss": 0.4325769245624542, "epoch": 6.1, "learning_rate": 6.200187462711232e-05, "loss": 0.457, "step": 16880, "task_loss": 0.3728763461112976 }, { "compression_loss": 0.0, "distillation_loss": 0.3581392168998718, "epoch": 6.1, "learning_rate": 6.195123805119386e-05, "loss": 0.4158, "step": 16890, "task_loss": 0.6587432622909546 }, { "compression_loss": 0.0, "distillation_loss": 0.34714964032173157, "epoch": 6.11, "learning_rate": 6.19004625056292e-05, "loss": 0.4553, "step": 16900, "task_loss": 0.939296305179596 }, { "compression_loss": 0.0, "distillation_loss": 0.46273887157440186, "epoch": 6.11, "learning_rate": 6.184954825223412e-05, "loss": 0.444, "step": 16910, "task_loss": 0.7927017211914062 }, { "compression_loss": 0.0, "distillation_loss": 0.3514648973941803, "epoch": 6.11, "learning_rate": 6.179849555353966e-05, "loss": 0.4455, "step": 16920, "task_loss": 0.5540809631347656 }, { "compression_loss": 0.0, "distillation_loss": 0.33696162700653076, "epoch": 6.12, "learning_rate": 6.174730467279065e-05, "loss": 0.438, "step": 16930, "task_loss": 0.6107865571975708 }, { "compression_loss": 0.0, "distillation_loss": 0.5959919095039368, "epoch": 6.12, "learning_rate": 6.169597587394453e-05, "loss": 0.4469, "step": 16940, "task_loss": 0.7320927977561951 }, { "compression_loss": 0.0, "distillation_loss": 0.616447389125824, "epoch": 6.13, "learning_rate": 6.164450942166983e-05, "loss": 0.4048, "step": 16950, "task_loss": 0.6176183819770813 }, { "compression_loss": 0.0, "distillation_loss": 0.5620734691619873, "epoch": 6.13, "learning_rate": 6.159290558134487e-05, "loss": 0.4278, "step": 16960, "task_loss": 0.6595306396484375 }, { "compression_loss": 0.0, "distillation_loss": 0.3424335718154907, "epoch": 6.13, "learning_rate": 6.154116461905642e-05, "loss": 0.4275, "step": 16970, "task_loss": 0.6462994813919067 }, { "compression_loss": 0.0, "distillation_loss": 0.6205884218215942, "epoch": 6.14, "learning_rate": 6.14892868015983e-05, "loss": 0.4362, "step": 16980, "task_loss": 0.7719211578369141 }, { "compression_loss": 0.0, "distillation_loss": 0.35494518280029297, "epoch": 6.14, "learning_rate": 6.143727239646995e-05, "loss": 0.3998, "step": 16990, "task_loss": 0.2818894386291504 }, { "compression_loss": 0.0, "distillation_loss": 0.5920026302337646, "epoch": 6.14, "learning_rate": 6.138512167187514e-05, "loss": 0.4274, "step": 17000, "task_loss": 0.43748384714126587 }, { "epoch": 6.14, "eval_exact_match": 82.16650898770104, "eval_f1": 89.3841314898369, "step": 17000 }, { "compression_loss": 0.0, "distillation_loss": 0.6196434497833252, "epoch": 6.15, "learning_rate": 6.133283489672054e-05, "loss": 0.4509, "step": 17010, "task_loss": 1.4450292587280273 }, { "compression_loss": 0.0, "distillation_loss": 0.49087828397750854, "epoch": 6.15, "learning_rate": 6.128041234061437e-05, "loss": 0.4569, "step": 17020, "task_loss": 0.7537893056869507 }, { "compression_loss": 0.0, "distillation_loss": 0.42802369594573975, "epoch": 6.15, "learning_rate": 6.122785427386493e-05, "loss": 0.3961, "step": 17030, "task_loss": 0.43074288964271545 }, { "compression_loss": 0.0, "distillation_loss": 0.43579283356666565, "epoch": 6.16, "learning_rate": 6.117516096747929e-05, "loss": 0.4457, "step": 17040, "task_loss": 0.842292070388794 }, { "compression_loss": 0.0, "distillation_loss": 0.5460898876190186, "epoch": 6.16, "learning_rate": 6.112233269316187e-05, "loss": 0.4811, "step": 17050, "task_loss": 0.6467231512069702 }, { "compression_loss": 0.0, "distillation_loss": 0.4218035638332367, "epoch": 6.17, "learning_rate": 6.106936972331298e-05, "loss": 0.4519, "step": 17060, "task_loss": 0.5069524645805359 }, { "compression_loss": 0.0, "distillation_loss": 0.5733527541160583, "epoch": 6.17, "learning_rate": 6.101627233102756e-05, "loss": 0.5014, "step": 17070, "task_loss": 1.1166313886642456 }, { "compression_loss": 0.0, "distillation_loss": 0.29548192024230957, "epoch": 6.17, "learning_rate": 6.096304079009358e-05, "loss": 0.4181, "step": 17080, "task_loss": 0.5228471159934998 }, { "compression_loss": 0.0, "distillation_loss": 0.39930418133735657, "epoch": 6.18, "learning_rate": 6.090967537499077e-05, "loss": 0.4604, "step": 17090, "task_loss": 0.322740763425827 }, { "compression_loss": 0.0, "distillation_loss": 0.3291262984275818, "epoch": 6.18, "learning_rate": 6.085617636088917e-05, "loss": 0.4097, "step": 17100, "task_loss": 0.35267138481140137 }, { "compression_loss": 0.0, "distillation_loss": 0.4520864188671112, "epoch": 6.18, "learning_rate": 6.080254402364767e-05, "loss": 0.4308, "step": 17110, "task_loss": 0.6222145557403564 }, { "compression_loss": 0.0, "distillation_loss": 0.39761024713516235, "epoch": 6.19, "learning_rate": 6.074877863981264e-05, "loss": 0.3809, "step": 17120, "task_loss": 0.6330628395080566 }, { "compression_loss": 0.0, "distillation_loss": 0.38448137044906616, "epoch": 6.19, "learning_rate": 6.06948804866165e-05, "loss": 0.3893, "step": 17130, "task_loss": 0.5772709846496582 }, { "compression_loss": 0.0, "distillation_loss": 0.39714741706848145, "epoch": 6.19, "learning_rate": 6.0640849841976206e-05, "loss": 0.4316, "step": 17140, "task_loss": 0.3851231336593628 }, { "compression_loss": 0.0, "distillation_loss": 0.34323185682296753, "epoch": 6.2, "learning_rate": 6.058668698449197e-05, "loss": 0.4345, "step": 17150, "task_loss": 0.42983704805374146 }, { "compression_loss": 0.0, "distillation_loss": 0.5064968466758728, "epoch": 6.2, "learning_rate": 6.0532392193445684e-05, "loss": 0.4824, "step": 17160, "task_loss": 0.749971866607666 }, { "compression_loss": 0.0, "distillation_loss": 0.30737966299057007, "epoch": 6.21, "learning_rate": 6.047796574879955e-05, "loss": 0.4194, "step": 17170, "task_loss": 0.5119467973709106 }, { "compression_loss": 0.0, "distillation_loss": 0.531447172164917, "epoch": 6.21, "learning_rate": 6.0423407931194616e-05, "loss": 0.465, "step": 17180, "task_loss": 0.43263059854507446 }, { "compression_loss": 0.0, "distillation_loss": 0.33223626017570496, "epoch": 6.21, "learning_rate": 6.036871902194934e-05, "loss": 0.3791, "step": 17190, "task_loss": 0.6570825576782227 }, { "compression_loss": 0.0, "distillation_loss": 0.5735246539115906, "epoch": 6.22, "learning_rate": 6.031389930305813e-05, "loss": 0.4322, "step": 17200, "task_loss": 0.7190378308296204 }, { "compression_loss": 0.0, "distillation_loss": 0.47174426913261414, "epoch": 6.22, "learning_rate": 6.025894905718988e-05, "loss": 0.4194, "step": 17210, "task_loss": 0.8839192390441895 }, { "compression_loss": 0.0, "distillation_loss": 0.5682074427604675, "epoch": 6.22, "learning_rate": 6.0203868567686544e-05, "loss": 0.4753, "step": 17220, "task_loss": 0.7814291715621948 }, { "compression_loss": 0.0, "distillation_loss": 0.5652934908866882, "epoch": 6.23, "learning_rate": 6.014865811856164e-05, "loss": 0.4415, "step": 17230, "task_loss": 0.7998194694519043 }, { "compression_loss": 0.0, "distillation_loss": 0.3965378999710083, "epoch": 6.23, "learning_rate": 6.009331799449884e-05, "loss": 0.4436, "step": 17240, "task_loss": 0.5939300656318665 }, { "compression_loss": 0.0, "distillation_loss": 0.35603681206703186, "epoch": 6.23, "learning_rate": 6.003784848085037e-05, "loss": 0.4256, "step": 17250, "task_loss": 0.35019153356552124 }, { "epoch": 6.23, "eval_exact_match": 82.64900662251655, "eval_f1": 89.77520373181453, "step": 17250 }, { "compression_loss": 0.0, "distillation_loss": 0.49196216464042664, "epoch": 6.24, "learning_rate": 5.998224986363576e-05, "loss": 0.4834, "step": 17260, "task_loss": 1.102785587310791 }, { "compression_loss": 0.0, "distillation_loss": 0.5928846597671509, "epoch": 6.24, "learning_rate": 5.992652242954014e-05, "loss": 0.4438, "step": 17270, "task_loss": 0.4663623571395874 }, { "compression_loss": 0.0, "distillation_loss": 0.45873433351516724, "epoch": 6.25, "learning_rate": 5.987066646591291e-05, "loss": 0.4328, "step": 17280, "task_loss": 0.49007028341293335 }, { "compression_loss": 0.0, "distillation_loss": 0.3176501989364624, "epoch": 6.25, "learning_rate": 5.9814682260766194e-05, "loss": 0.4403, "step": 17290, "task_loss": 0.5457800030708313 }, { "compression_loss": 0.0, "distillation_loss": 0.3774525225162506, "epoch": 6.25, "learning_rate": 5.975857010277339e-05, "loss": 0.3944, "step": 17300, "task_loss": 0.7043144106864929 }, { "compression_loss": 0.0, "distillation_loss": 0.34644702076911926, "epoch": 6.26, "learning_rate": 5.9702330281267646e-05, "loss": 0.4241, "step": 17310, "task_loss": 0.34404587745666504 }, { "compression_loss": 0.0, "distillation_loss": 0.3558131754398346, "epoch": 6.26, "learning_rate": 5.9645963086240405e-05, "loss": 0.389, "step": 17320, "task_loss": 0.5505707263946533 }, { "compression_loss": 0.0, "distillation_loss": 0.4252021908760071, "epoch": 6.26, "learning_rate": 5.958946880833986e-05, "loss": 0.4379, "step": 17330, "task_loss": 0.6420973539352417 }, { "compression_loss": 0.0, "distillation_loss": 0.36581557989120483, "epoch": 6.27, "learning_rate": 5.953284773886952e-05, "loss": 0.4012, "step": 17340, "task_loss": 0.37061429023742676 }, { "compression_loss": 0.0, "distillation_loss": 0.48756957054138184, "epoch": 6.27, "learning_rate": 5.947610016978664e-05, "loss": 0.4317, "step": 17350, "task_loss": 0.8574424982070923 }, { "compression_loss": 0.0, "distillation_loss": 0.45216867327690125, "epoch": 6.27, "learning_rate": 5.94192263937008e-05, "loss": 0.4094, "step": 17360, "task_loss": 0.5074952840805054 }, { "compression_loss": 0.0, "distillation_loss": 0.6077759265899658, "epoch": 6.28, "learning_rate": 5.936222670387228e-05, "loss": 0.438, "step": 17370, "task_loss": 0.6022177338600159 }, { "compression_loss": 0.0, "distillation_loss": 0.46468818187713623, "epoch": 6.28, "learning_rate": 5.930510139421068e-05, "loss": 0.4133, "step": 17380, "task_loss": 0.602958619594574 }, { "compression_loss": 0.0, "distillation_loss": 0.2893716096878052, "epoch": 6.28, "learning_rate": 5.924785075927328e-05, "loss": 0.4087, "step": 17390, "task_loss": 0.3538118600845337 }, { "compression_loss": 0.0, "distillation_loss": 0.36932480335235596, "epoch": 6.29, "learning_rate": 5.919047509426362e-05, "loss": 0.4407, "step": 17400, "task_loss": 0.4979381561279297 }, { "compression_loss": 0.0, "distillation_loss": 0.6854021549224854, "epoch": 6.29, "learning_rate": 5.913297469502991e-05, "loss": 0.443, "step": 17410, "task_loss": 0.5967050790786743 }, { "compression_loss": 0.0, "distillation_loss": 0.5585907697677612, "epoch": 6.3, "learning_rate": 5.907534985806355e-05, "loss": 0.4198, "step": 17420, "task_loss": 0.7241256833076477 }, { "compression_loss": 0.0, "distillation_loss": 0.33727091550827026, "epoch": 6.3, "learning_rate": 5.901760088049758e-05, "loss": 0.4343, "step": 17430, "task_loss": 0.4912257492542267 }, { "compression_loss": 0.0, "distillation_loss": 0.3864668309688568, "epoch": 6.3, "learning_rate": 5.895972806010516e-05, "loss": 0.4325, "step": 17440, "task_loss": 0.8334698677062988 }, { "compression_loss": 0.0, "distillation_loss": 0.36405840516090393, "epoch": 6.31, "learning_rate": 5.890173169529798e-05, "loss": 0.4259, "step": 17450, "task_loss": 0.6634342670440674 }, { "compression_loss": 0.0, "distillation_loss": 0.30496692657470703, "epoch": 6.31, "learning_rate": 5.884361208512483e-05, "loss": 0.443, "step": 17460, "task_loss": 0.5642157196998596 }, { "compression_loss": 0.0, "distillation_loss": 0.42527127265930176, "epoch": 6.31, "learning_rate": 5.878536952926994e-05, "loss": 0.4407, "step": 17470, "task_loss": 0.6014546751976013 }, { "compression_loss": 0.0, "distillation_loss": 0.4635869860649109, "epoch": 6.32, "learning_rate": 5.872700432805154e-05, "loss": 0.4449, "step": 17480, "task_loss": 0.7708749175071716 }, { "compression_loss": 0.0, "distillation_loss": 0.6213729977607727, "epoch": 6.32, "learning_rate": 5.866851678242021e-05, "loss": 0.4247, "step": 17490, "task_loss": 0.6365109086036682 }, { "compression_loss": 0.0, "distillation_loss": 0.3652482032775879, "epoch": 6.32, "learning_rate": 5.86099071939574e-05, "loss": 0.4398, "step": 17500, "task_loss": 0.36544913053512573 }, { "epoch": 6.32, "eval_exact_match": 82.37464522232735, "eval_f1": 89.41607657285384, "step": 17500 }, { "compression_loss": 0.0, "distillation_loss": 0.3291020393371582, "epoch": 6.33, "learning_rate": 5.855117586487388e-05, "loss": 0.3939, "step": 17510, "task_loss": 0.3664117455482483 }, { "compression_loss": 0.0, "distillation_loss": 0.4412246346473694, "epoch": 6.33, "learning_rate": 5.849232309800812e-05, "loss": 0.4223, "step": 17520, "task_loss": 0.7607033252716064 }, { "compression_loss": 0.0, "distillation_loss": 0.5140312910079956, "epoch": 6.34, "learning_rate": 5.8433349196824764e-05, "loss": 0.3941, "step": 17530, "task_loss": 1.1523401737213135 }, { "compression_loss": 0.0, "distillation_loss": 0.39518868923187256, "epoch": 6.34, "learning_rate": 5.83742544654131e-05, "loss": 0.4747, "step": 17540, "task_loss": 0.605819582939148 }, { "compression_loss": 0.0, "distillation_loss": 0.6016864776611328, "epoch": 6.34, "learning_rate": 5.831503920848542e-05, "loss": 0.4245, "step": 17550, "task_loss": 0.6810322403907776 }, { "compression_loss": 0.0, "distillation_loss": 0.4904579222202301, "epoch": 6.35, "learning_rate": 5.825570373137551e-05, "loss": 0.4252, "step": 17560, "task_loss": 0.44462671875953674 }, { "compression_loss": 0.0, "distillation_loss": 0.5831754207611084, "epoch": 6.35, "learning_rate": 5.819624834003702e-05, "loss": 0.4582, "step": 17570, "task_loss": 0.7443471550941467 }, { "compression_loss": 0.0, "distillation_loss": 0.5073849558830261, "epoch": 6.35, "learning_rate": 5.8136673341041975e-05, "loss": 0.4428, "step": 17580, "task_loss": 0.38186776638031006 }, { "compression_loss": 0.0, "distillation_loss": 0.41992390155792236, "epoch": 6.36, "learning_rate": 5.807697904157908e-05, "loss": 0.4408, "step": 17590, "task_loss": 0.7133923172950745 }, { "compression_loss": 0.0, "distillation_loss": 0.3772296905517578, "epoch": 6.36, "learning_rate": 5.801716574945222e-05, "loss": 0.4687, "step": 17600, "task_loss": 0.5094341039657593 }, { "compression_loss": 0.0, "distillation_loss": 0.47077786922454834, "epoch": 6.36, "learning_rate": 5.795723377307885e-05, "loss": 0.4438, "step": 17610, "task_loss": 0.4625624418258667 }, { "compression_loss": 0.0, "distillation_loss": 0.3527754545211792, "epoch": 6.37, "learning_rate": 5.789718342148839e-05, "loss": 0.3928, "step": 17620, "task_loss": 0.2926297187805176 }, { "compression_loss": 0.0, "distillation_loss": 0.5881116390228271, "epoch": 6.37, "learning_rate": 5.783701500432064e-05, "loss": 0.4462, "step": 17630, "task_loss": 0.8188558220863342 }, { "compression_loss": 0.0, "distillation_loss": 0.4231293797492981, "epoch": 6.38, "learning_rate": 5.777672883182419e-05, "loss": 0.451, "step": 17640, "task_loss": 0.427749902009964 }, { "compression_loss": 0.0, "distillation_loss": 0.4589058756828308, "epoch": 6.38, "learning_rate": 5.771632521485482e-05, "loss": 0.4511, "step": 17650, "task_loss": 0.6190907955169678 }, { "compression_loss": 0.0, "distillation_loss": 0.4041414260864258, "epoch": 6.38, "learning_rate": 5.76558044648739e-05, "loss": 0.442, "step": 17660, "task_loss": 0.3975062370300293 }, { "compression_loss": 0.0, "distillation_loss": 0.4810088276863098, "epoch": 6.39, "learning_rate": 5.7595166893946776e-05, "loss": 0.4074, "step": 17670, "task_loss": 0.8788394927978516 }, { "compression_loss": 0.0, "distillation_loss": 0.3534824252128601, "epoch": 6.39, "learning_rate": 5.7534412814741126e-05, "loss": 0.4732, "step": 17680, "task_loss": 0.41670382022857666 }, { "compression_loss": 0.0, "distillation_loss": 0.36095911264419556, "epoch": 6.39, "learning_rate": 5.747354254052542e-05, "loss": 0.4306, "step": 17690, "task_loss": 0.6991243958473206 }, { "compression_loss": 0.0, "distillation_loss": 0.3893577754497528, "epoch": 6.4, "learning_rate": 5.741255638516727e-05, "loss": 0.4119, "step": 17700, "task_loss": 0.6416240930557251 }, { "compression_loss": 0.0, "distillation_loss": 0.33806878328323364, "epoch": 6.4, "learning_rate": 5.7351454663131803e-05, "loss": 0.4067, "step": 17710, "task_loss": 0.6782964468002319 }, { "compression_loss": 0.0, "distillation_loss": 0.3822578191757202, "epoch": 6.4, "learning_rate": 5.729023768948003e-05, "loss": 0.4106, "step": 17720, "task_loss": 0.5011782050132751 }, { "compression_loss": 0.0, "distillation_loss": 0.5085767507553101, "epoch": 6.41, "learning_rate": 5.7228905779867264e-05, "loss": 0.3913, "step": 17730, "task_loss": 0.5804252624511719 }, { "compression_loss": 0.0, "distillation_loss": 0.5053203105926514, "epoch": 6.41, "learning_rate": 5.7167459250541455e-05, "loss": 0.457, "step": 17740, "task_loss": 0.5962353944778442 }, { "compression_loss": 0.0, "distillation_loss": 0.4938843846321106, "epoch": 6.41, "learning_rate": 5.710589841834156e-05, "loss": 0.449, "step": 17750, "task_loss": 0.9457273483276367 }, { "epoch": 6.41, "eval_exact_match": 82.39356669820246, "eval_f1": 89.4370835165859, "step": 17750 }, { "compression_loss": 0.0, "distillation_loss": 0.3040074110031128, "epoch": 6.42, "learning_rate": 5.704422360069595e-05, "loss": 0.4009, "step": 17760, "task_loss": 0.37006545066833496 }, { "compression_loss": 0.0, "distillation_loss": 0.289045125246048, "epoch": 6.42, "learning_rate": 5.69824351156207e-05, "loss": 0.4099, "step": 17770, "task_loss": 0.9054403305053711 }, { "compression_loss": 0.0, "distillation_loss": 0.3423129916191101, "epoch": 6.43, "learning_rate": 5.692053328171803e-05, "loss": 0.3902, "step": 17780, "task_loss": 0.7568475008010864 }, { "compression_loss": 0.0, "distillation_loss": 0.4408140480518341, "epoch": 6.43, "learning_rate": 5.685851841817462e-05, "loss": 0.4457, "step": 17790, "task_loss": 0.9160474538803101 }, { "compression_loss": 0.0, "distillation_loss": 0.3867258131504059, "epoch": 6.43, "learning_rate": 5.679639084475993e-05, "loss": 0.4964, "step": 17800, "task_loss": 0.3529086112976074 }, { "compression_loss": 0.0, "distillation_loss": 0.3333200216293335, "epoch": 6.44, "learning_rate": 5.6734150881824656e-05, "loss": 0.3877, "step": 17810, "task_loss": 0.3891972303390503 }, { "compression_loss": 0.0, "distillation_loss": 0.38562119007110596, "epoch": 6.44, "learning_rate": 5.667179885029895e-05, "loss": 0.4839, "step": 17820, "task_loss": 0.43628987669944763 }, { "compression_loss": 0.0, "distillation_loss": 0.28627991676330566, "epoch": 6.44, "learning_rate": 5.660933507169086e-05, "loss": 0.4551, "step": 17830, "task_loss": 0.36466485261917114 }, { "compression_loss": 0.0, "distillation_loss": 0.36773768067359924, "epoch": 6.45, "learning_rate": 5.654675986808465e-05, "loss": 0.4461, "step": 17840, "task_loss": 0.4903709292411804 }, { "compression_loss": 0.0, "distillation_loss": 0.4122188091278076, "epoch": 6.45, "learning_rate": 5.6484073562139083e-05, "loss": 0.4621, "step": 17850, "task_loss": 0.27144575119018555 }, { "compression_loss": 0.0, "distillation_loss": 0.3822740614414215, "epoch": 6.45, "learning_rate": 5.642127647708586e-05, "loss": 0.4331, "step": 17860, "task_loss": 0.3758048713207245 }, { "compression_loss": 0.0, "distillation_loss": 0.4019128978252411, "epoch": 6.46, "learning_rate": 5.635836893672784e-05, "loss": 0.464, "step": 17870, "task_loss": 0.2744655907154083 }, { "compression_loss": 0.0, "distillation_loss": 0.5253816246986389, "epoch": 6.46, "learning_rate": 5.6295351265437475e-05, "loss": 0.4224, "step": 17880, "task_loss": 0.9804195165634155 }, { "compression_loss": 0.0, "distillation_loss": 0.42899399995803833, "epoch": 6.47, "learning_rate": 5.6232223788155074e-05, "loss": 0.4357, "step": 17890, "task_loss": 0.5716034173965454 }, { "compression_loss": 0.0, "distillation_loss": 0.390700101852417, "epoch": 6.47, "learning_rate": 5.616898683038712e-05, "loss": 0.4725, "step": 17900, "task_loss": 0.6366195678710938 }, { "compression_loss": 0.0, "distillation_loss": 0.328586220741272, "epoch": 6.47, "learning_rate": 5.610564071820462e-05, "loss": 0.4537, "step": 17910, "task_loss": 0.6518372893333435 }, { "compression_loss": 0.0, "distillation_loss": 0.38455188274383545, "epoch": 6.48, "learning_rate": 5.6042185778241474e-05, "loss": 0.4551, "step": 17920, "task_loss": 0.21707725524902344 }, { "compression_loss": 0.0, "distillation_loss": 0.5620262622833252, "epoch": 6.48, "learning_rate": 5.5978622337692656e-05, "loss": 0.4955, "step": 17930, "task_loss": 1.0111944675445557 }, { "compression_loss": 0.0, "distillation_loss": 0.44680696725845337, "epoch": 6.48, "learning_rate": 5.5914950724312644e-05, "loss": 0.4513, "step": 17940, "task_loss": 0.4735015630722046 }, { "compression_loss": 0.0, "distillation_loss": 0.5101405382156372, "epoch": 6.49, "learning_rate": 5.5851171266413694e-05, "loss": 0.4333, "step": 17950, "task_loss": 0.7333424687385559 }, { "compression_loss": 0.0, "distillation_loss": 0.46911144256591797, "epoch": 6.49, "learning_rate": 5.578728429286414e-05, "loss": 0.4418, "step": 17960, "task_loss": 0.9456169605255127 }, { "compression_loss": 0.0, "distillation_loss": 0.40388286113739014, "epoch": 6.49, "learning_rate": 5.5723290133086686e-05, "loss": 0.4104, "step": 17970, "task_loss": 0.5725729465484619 }, { "compression_loss": 0.0, "distillation_loss": 0.3859924077987671, "epoch": 6.5, "learning_rate": 5.5659189117056755e-05, "loss": 0.4469, "step": 17980, "task_loss": 0.5803688764572144 }, { "compression_loss": 0.0, "distillation_loss": 0.443365216255188, "epoch": 6.5, "learning_rate": 5.5594981575300746e-05, "loss": 0.4438, "step": 17990, "task_loss": 0.5173107385635376 }, { "compression_loss": 0.0, "distillation_loss": 0.36923152208328247, "epoch": 6.51, "learning_rate": 5.5530667838894306e-05, "loss": 0.4254, "step": 18000, "task_loss": 0.6575000286102295 }, { "epoch": 6.51, "eval_exact_match": 82.64900662251655, "eval_f1": 89.58216558481634, "step": 18000 }, { "compression_loss": 0.0, "distillation_loss": 0.43531203269958496, "epoch": 6.51, "learning_rate": 5.546624823946071e-05, "loss": 0.4421, "step": 18010, "task_loss": 0.6014666557312012 }, { "compression_loss": 0.0, "distillation_loss": 0.41215750575065613, "epoch": 6.51, "learning_rate": 5.5401723109169074e-05, "loss": 0.4351, "step": 18020, "task_loss": 0.6380587816238403 }, { "compression_loss": 0.0, "distillation_loss": 0.508256196975708, "epoch": 6.52, "learning_rate": 5.5337092780732664e-05, "loss": 0.4143, "step": 18030, "task_loss": 0.6583020687103271 }, { "compression_loss": 0.0, "distillation_loss": 0.31025072932243347, "epoch": 6.52, "learning_rate": 5.5272357587407176e-05, "loss": 0.4536, "step": 18040, "task_loss": 0.39846885204315186 }, { "compression_loss": 0.0, "distillation_loss": 0.42146748304367065, "epoch": 6.52, "learning_rate": 5.520751786298905e-05, "loss": 0.4195, "step": 18050, "task_loss": 0.45628345012664795 }, { "compression_loss": 0.0, "distillation_loss": 0.35557764768600464, "epoch": 6.53, "learning_rate": 5.51425739418137e-05, "loss": 0.4741, "step": 18060, "task_loss": 0.46955522894859314 }, { "compression_loss": 0.0, "distillation_loss": 0.46768325567245483, "epoch": 6.53, "learning_rate": 5.507752615875383e-05, "loss": 0.4104, "step": 18070, "task_loss": 0.7435535192489624 }, { "compression_loss": 0.0, "distillation_loss": 0.43830177187919617, "epoch": 6.53, "learning_rate": 5.501237484921767e-05, "loss": 0.5065, "step": 18080, "task_loss": 0.5534304976463318 }, { "compression_loss": 0.0, "distillation_loss": 0.41444259881973267, "epoch": 6.54, "learning_rate": 5.494712034914728e-05, "loss": 0.478, "step": 18090, "task_loss": 0.4837718605995178 }, { "compression_loss": 0.0, "distillation_loss": 0.3985566198825836, "epoch": 6.54, "learning_rate": 5.488176299501683e-05, "loss": 0.4515, "step": 18100, "task_loss": 0.48695647716522217 }, { "compression_loss": 0.0, "distillation_loss": 0.38144785165786743, "epoch": 6.54, "learning_rate": 5.4816303123830796e-05, "loss": 0.4537, "step": 18110, "task_loss": 0.7126217484474182 }, { "compression_loss": 0.0, "distillation_loss": 0.46962636709213257, "epoch": 6.55, "learning_rate": 5.4750741073122284e-05, "loss": 0.4182, "step": 18120, "task_loss": 0.6314877867698669 }, { "compression_loss": 0.0, "distillation_loss": 0.532020628452301, "epoch": 6.55, "learning_rate": 5.4685077180951276e-05, "loss": 0.4502, "step": 18130, "task_loss": 0.4621773660182953 }, { "compression_loss": 0.0, "distillation_loss": 0.3801581561565399, "epoch": 6.56, "learning_rate": 5.461931178590289e-05, "loss": 0.4401, "step": 18140, "task_loss": 0.508671224117279 }, { "compression_loss": 0.0, "distillation_loss": 0.4196373522281647, "epoch": 6.56, "learning_rate": 5.4553445227085605e-05, "loss": 0.4309, "step": 18150, "task_loss": 0.39120665192604065 }, { "compression_loss": 0.0, "distillation_loss": 0.34238317608833313, "epoch": 6.56, "learning_rate": 5.4487477844129556e-05, "loss": 0.407, "step": 18160, "task_loss": 0.4414762854576111 }, { "compression_loss": 0.0, "distillation_loss": 0.37008988857269287, "epoch": 6.57, "learning_rate": 5.442802127595373e-05, "loss": 0.4509, "step": 18170, "task_loss": 0.2949051260948181 }, { "compression_loss": 0.0, "distillation_loss": 0.42825210094451904, "epoch": 6.57, "learning_rate": 5.4361863264675574e-05, "loss": 0.424, "step": 18180, "task_loss": 0.5720166563987732 }, { "compression_loss": 0.0, "distillation_loss": 0.44686269760131836, "epoch": 6.57, "learning_rate": 5.4295605417119656e-05, "loss": 0.4321, "step": 18190, "task_loss": 0.8985435366630554 }, { "compression_loss": 0.0, "distillation_loss": 0.5419995784759521, "epoch": 6.58, "learning_rate": 5.422924807493375e-05, "loss": 0.3999, "step": 18200, "task_loss": 0.8705387115478516 }, { "compression_loss": 0.0, "distillation_loss": 0.44984179735183716, "epoch": 6.58, "learning_rate": 5.41627915802786e-05, "loss": 0.4632, "step": 18210, "task_loss": 0.6009798645973206 }, { "compression_loss": 0.0, "distillation_loss": 0.4739626944065094, "epoch": 6.58, "learning_rate": 5.409623627582625e-05, "loss": 0.4673, "step": 18220, "task_loss": 0.7192704677581787 }, { "compression_loss": 0.0, "distillation_loss": 0.3950083255767822, "epoch": 6.59, "learning_rate": 5.402958250475823e-05, "loss": 0.4619, "step": 18230, "task_loss": 0.44106048345565796 }, { "compression_loss": 0.0, "distillation_loss": 0.23980626463890076, "epoch": 6.59, "learning_rate": 5.396283061076379e-05, "loss": 0.404, "step": 18240, "task_loss": 0.3170390725135803 }, { "compression_loss": 0.0, "distillation_loss": 0.4144637882709503, "epoch": 6.6, "learning_rate": 5.389598093803816e-05, "loss": 0.4758, "step": 18250, "task_loss": 0.45880258083343506 }, { "epoch": 6.6, "eval_exact_match": 82.47871333964049, "eval_f1": 89.40149266393212, "step": 18250 }, { "compression_loss": 0.0, "distillation_loss": 0.5653073191642761, "epoch": 6.6, "learning_rate": 5.3829033831280716e-05, "loss": 0.4511, "step": 18260, "task_loss": 0.917737603187561 }, { "compression_loss": 0.0, "distillation_loss": 0.36813783645629883, "epoch": 6.6, "learning_rate": 5.376198963569325e-05, "loss": 0.4015, "step": 18270, "task_loss": 0.44203007221221924 }, { "compression_loss": 0.0, "distillation_loss": 0.38101470470428467, "epoch": 6.61, "learning_rate": 5.36948486969782e-05, "loss": 0.4641, "step": 18280, "task_loss": 0.5209448337554932 }, { "compression_loss": 0.0, "distillation_loss": 0.3684188425540924, "epoch": 6.61, "learning_rate": 5.36276113613368e-05, "loss": 0.3796, "step": 18290, "task_loss": 0.3748064935207367 }, { "compression_loss": 0.0, "distillation_loss": 0.4009804129600525, "epoch": 6.61, "learning_rate": 5.356027797546736e-05, "loss": 0.4367, "step": 18300, "task_loss": 0.4126998484134674 }, { "compression_loss": 0.0, "distillation_loss": 0.4069993495941162, "epoch": 6.62, "learning_rate": 5.349284888656348e-05, "loss": 0.4818, "step": 18310, "task_loss": 0.4426262080669403 }, { "compression_loss": 0.0, "distillation_loss": 0.5031300187110901, "epoch": 6.62, "learning_rate": 5.342532444231222e-05, "loss": 0.4451, "step": 18320, "task_loss": 0.9891221523284912 }, { "compression_loss": 0.0, "distillation_loss": 0.38253137469291687, "epoch": 6.62, "learning_rate": 5.33577049908923e-05, "loss": 0.4383, "step": 18330, "task_loss": 0.5382572412490845 }, { "compression_loss": 0.0, "distillation_loss": 0.3650605380535126, "epoch": 6.63, "learning_rate": 5.328999088097236e-05, "loss": 0.4475, "step": 18340, "task_loss": 0.4127558171749115 }, { "compression_loss": 0.0, "distillation_loss": 0.3485070466995239, "epoch": 6.63, "learning_rate": 5.322218246170911e-05, "loss": 0.4303, "step": 18350, "task_loss": 0.5886161923408508 }, { "compression_loss": 0.0, "distillation_loss": 0.442766010761261, "epoch": 6.64, "learning_rate": 5.315428008274558e-05, "loss": 0.4622, "step": 18360, "task_loss": 0.5299726128578186 }, { "compression_loss": 0.0, "distillation_loss": 0.45492517948150635, "epoch": 6.64, "learning_rate": 5.308628409420925e-05, "loss": 0.4739, "step": 18370, "task_loss": 0.6444842219352722 }, { "compression_loss": 0.0, "distillation_loss": 0.49286532402038574, "epoch": 6.64, "learning_rate": 5.301819484671032e-05, "loss": 0.4572, "step": 18380, "task_loss": 0.6773360967636108 }, { "compression_loss": 0.0, "distillation_loss": 0.4356333017349243, "epoch": 6.65, "learning_rate": 5.295001269133984e-05, "loss": 0.3974, "step": 18390, "task_loss": 0.6576051115989685 }, { "compression_loss": 0.0, "distillation_loss": 0.3914899230003357, "epoch": 6.65, "learning_rate": 5.2881737979667926e-05, "loss": 0.4884, "step": 18400, "task_loss": 0.7598779797554016 }, { "compression_loss": 0.0, "distillation_loss": 0.4002273678779602, "epoch": 6.65, "learning_rate": 5.2813371063741945e-05, "loss": 0.4135, "step": 18410, "task_loss": 0.5003056526184082 }, { "compression_loss": 0.0, "distillation_loss": 0.5130934715270996, "epoch": 6.66, "learning_rate": 5.2744912296084714e-05, "loss": 0.5041, "step": 18420, "task_loss": 0.7868928909301758 }, { "compression_loss": 0.0, "distillation_loss": 0.42638254165649414, "epoch": 6.66, "learning_rate": 5.2676362029692664e-05, "loss": 0.4332, "step": 18430, "task_loss": 0.7337071895599365 }, { "compression_loss": 0.0, "distillation_loss": 0.5077611207962036, "epoch": 6.66, "learning_rate": 5.260772061803401e-05, "loss": 0.434, "step": 18440, "task_loss": 0.7167735695838928 }, { "compression_loss": 0.0, "distillation_loss": 0.3096952438354492, "epoch": 6.67, "learning_rate": 5.2538988415046944e-05, "loss": 0.3743, "step": 18450, "task_loss": 0.235990971326828 }, { "compression_loss": 0.0, "distillation_loss": 0.40252023935317993, "epoch": 6.67, "learning_rate": 5.247016577513784e-05, "loss": 0.4, "step": 18460, "task_loss": 0.6203321814537048 }, { "compression_loss": 0.0, "distillation_loss": 0.4693078398704529, "epoch": 6.68, "learning_rate": 5.2401253053179365e-05, "loss": 0.4146, "step": 18470, "task_loss": 0.6614182591438293 }, { "compression_loss": 0.0, "distillation_loss": 0.27100706100463867, "epoch": 6.68, "learning_rate": 5.2332250604508675e-05, "loss": 0.4092, "step": 18480, "task_loss": 0.3299637734889984 }, { "compression_loss": 0.0, "distillation_loss": 0.3161439895629883, "epoch": 6.68, "learning_rate": 5.226315878492561e-05, "loss": 0.4102, "step": 18490, "task_loss": 0.8195310235023499 }, { "compression_loss": 0.0, "distillation_loss": 0.48115524649620056, "epoch": 6.69, "learning_rate": 5.219397795069082e-05, "loss": 0.4006, "step": 18500, "task_loss": 0.6623936891555786 }, { "epoch": 6.69, "eval_exact_match": 82.81929990539263, "eval_f1": 89.71049915364028, "step": 18500 }, { "compression_loss": 0.0, "distillation_loss": 0.3616160750389099, "epoch": 6.69, "learning_rate": 5.212470845852394e-05, "loss": 0.4761, "step": 18510, "task_loss": 0.5478072762489319 }, { "compression_loss": 0.0, "distillation_loss": 0.3384724259376526, "epoch": 6.69, "learning_rate": 5.205535066560179e-05, "loss": 0.4039, "step": 18520, "task_loss": 0.8222288489341736 }, { "compression_loss": 0.0, "distillation_loss": 0.4261634051799774, "epoch": 6.7, "learning_rate": 5.198590492955645e-05, "loss": 0.4239, "step": 18530, "task_loss": 0.7383376359939575 }, { "compression_loss": 0.0, "distillation_loss": 0.4749411344528198, "epoch": 6.7, "learning_rate": 5.1916371608473494e-05, "loss": 0.4562, "step": 18540, "task_loss": 0.7253599762916565 }, { "compression_loss": 0.0, "distillation_loss": 0.4970089793205261, "epoch": 6.7, "learning_rate": 5.1846751060890114e-05, "loss": 0.3964, "step": 18550, "task_loss": 0.7168070077896118 }, { "compression_loss": 0.0, "distillation_loss": 0.4212992787361145, "epoch": 6.71, "learning_rate": 5.1777043645793254e-05, "loss": 0.4283, "step": 18560, "task_loss": 0.71849524974823 }, { "compression_loss": 0.0, "distillation_loss": 0.3591970205307007, "epoch": 6.71, "learning_rate": 5.17072497226178e-05, "loss": 0.4409, "step": 18570, "task_loss": 0.37877827882766724 }, { "compression_loss": 0.0, "distillation_loss": 0.4818565249443054, "epoch": 6.71, "learning_rate": 5.1637369651244666e-05, "loss": 0.3911, "step": 18580, "task_loss": 0.7044258117675781 }, { "compression_loss": 0.0, "distillation_loss": 0.3378729820251465, "epoch": 6.72, "learning_rate": 5.156740379199902e-05, "loss": 0.4253, "step": 18590, "task_loss": 0.3735387921333313 }, { "compression_loss": 0.0, "distillation_loss": 0.5539082884788513, "epoch": 6.72, "learning_rate": 5.149735250564836e-05, "loss": 0.4326, "step": 18600, "task_loss": 0.6768410205841064 }, { "compression_loss": 0.0, "distillation_loss": 0.4786291718482971, "epoch": 6.73, "learning_rate": 5.142721615340066e-05, "loss": 0.4563, "step": 18610, "task_loss": 0.5143426656723022 }, { "compression_loss": 0.0, "distillation_loss": 0.47057053446769714, "epoch": 6.73, "learning_rate": 5.135699509690255e-05, "loss": 0.461, "step": 18620, "task_loss": 0.5533486604690552 }, { "compression_loss": 0.0, "distillation_loss": 0.3846384584903717, "epoch": 6.73, "learning_rate": 5.128668969823739e-05, "loss": 0.4523, "step": 18630, "task_loss": 0.45162439346313477 }, { "compression_loss": 0.0, "distillation_loss": 0.372709721326828, "epoch": 6.74, "learning_rate": 5.121630031992348e-05, "loss": 0.4113, "step": 18640, "task_loss": 0.47227999567985535 }, { "compression_loss": 0.0, "distillation_loss": 0.3604407012462616, "epoch": 6.74, "learning_rate": 5.114582732491213e-05, "loss": 0.445, "step": 18650, "task_loss": 0.5071597099304199 }, { "compression_loss": 0.0, "distillation_loss": 0.45643842220306396, "epoch": 6.74, "learning_rate": 5.107527107658576e-05, "loss": 0.4598, "step": 18660, "task_loss": 0.6165838241577148 }, { "compression_loss": 0.0, "distillation_loss": 0.5041736960411072, "epoch": 6.75, "learning_rate": 5.1004631938756154e-05, "loss": 0.4796, "step": 18670, "task_loss": 0.7193949222564697 }, { "compression_loss": 0.0, "distillation_loss": 0.31500351428985596, "epoch": 6.75, "learning_rate": 5.093391027566244e-05, "loss": 0.4015, "step": 18680, "task_loss": 0.5422650575637817 }, { "compression_loss": 0.0, "distillation_loss": 0.47139471769332886, "epoch": 6.75, "learning_rate": 5.086310645196928e-05, "loss": 0.4027, "step": 18690, "task_loss": 0.46124646067619324 }, { "compression_loss": 0.0, "distillation_loss": 0.35555168986320496, "epoch": 6.76, "learning_rate": 5.079222083276504e-05, "loss": 0.4077, "step": 18700, "task_loss": 0.37314948439598083 }, { "compression_loss": 0.0, "distillation_loss": 0.35326719284057617, "epoch": 6.76, "learning_rate": 5.072125378355978e-05, "loss": 0.4241, "step": 18710, "task_loss": 0.5533696413040161 }, { "compression_loss": 0.0, "distillation_loss": 0.42909830808639526, "epoch": 6.77, "learning_rate": 5.0650205670283475e-05, "loss": 0.4068, "step": 18720, "task_loss": 0.39843881130218506 }, { "compression_loss": 0.0, "distillation_loss": 0.41295886039733887, "epoch": 6.77, "learning_rate": 5.057907685928408e-05, "loss": 0.4629, "step": 18730, "task_loss": 0.5897053480148315 }, { "compression_loss": 0.0, "distillation_loss": 0.43410900235176086, "epoch": 6.77, "learning_rate": 5.050786771732567e-05, "loss": 0.438, "step": 18740, "task_loss": 0.6660215854644775 }, { "compression_loss": 0.0, "distillation_loss": 0.3357173800468445, "epoch": 6.78, "learning_rate": 5.043657861158653e-05, "loss": 0.4601, "step": 18750, "task_loss": 0.6065893173217773 }, { "epoch": 6.78, "eval_exact_match": 82.45033112582782, "eval_f1": 89.46843856351111, "step": 18750 }, { "compression_loss": 0.0, "distillation_loss": 0.4543573558330536, "epoch": 6.78, "learning_rate": 5.036520990965726e-05, "loss": 0.4089, "step": 18760, "task_loss": 0.6887750625610352 }, { "compression_loss": 0.0, "distillation_loss": 0.39767420291900635, "epoch": 6.78, "learning_rate": 5.029376197953888e-05, "loss": 0.4149, "step": 18770, "task_loss": 0.6786099672317505 }, { "compression_loss": 0.0, "distillation_loss": 0.38811343908309937, "epoch": 6.79, "learning_rate": 5.022223518964095e-05, "loss": 0.4349, "step": 18780, "task_loss": 0.4405578374862671 }, { "compression_loss": 0.0, "distillation_loss": 0.47207069396972656, "epoch": 6.79, "learning_rate": 5.015062990877964e-05, "loss": 0.4619, "step": 18790, "task_loss": 0.5872414112091064 }, { "compression_loss": 0.0, "distillation_loss": 0.3899841904640198, "epoch": 6.79, "learning_rate": 5.007894650617588e-05, "loss": 0.4326, "step": 18800, "task_loss": 0.42872917652130127 }, { "compression_loss": 0.0, "distillation_loss": 0.46909916400909424, "epoch": 6.8, "learning_rate": 5.0007185351453374e-05, "loss": 0.4121, "step": 18810, "task_loss": 0.6106085181236267 }, { "compression_loss": 0.0, "distillation_loss": 0.45315465331077576, "epoch": 6.8, "learning_rate": 4.9935346814636785e-05, "loss": 0.3855, "step": 18820, "task_loss": 0.7110905647277832 }, { "compression_loss": 0.0, "distillation_loss": 0.4155175983905792, "epoch": 6.81, "learning_rate": 4.9863431266149745e-05, "loss": 0.441, "step": 18830, "task_loss": 0.6808761954307556 }, { "compression_loss": 0.0, "distillation_loss": 0.37727808952331543, "epoch": 6.81, "learning_rate": 4.979143907681301e-05, "loss": 0.4181, "step": 18840, "task_loss": 0.44022470712661743 }, { "compression_loss": 0.0, "distillation_loss": 0.6061004996299744, "epoch": 6.81, "learning_rate": 4.9719370617842544e-05, "loss": 0.51, "step": 18850, "task_loss": 0.6300066113471985 }, { "compression_loss": 0.0, "distillation_loss": 0.47508275508880615, "epoch": 6.82, "learning_rate": 4.964722626084752e-05, "loss": 0.4558, "step": 18860, "task_loss": 0.6476293206214905 }, { "compression_loss": 0.0, "distillation_loss": 0.48660221695899963, "epoch": 6.82, "learning_rate": 4.9575006377828535e-05, "loss": 0.3868, "step": 18870, "task_loss": 0.5755065679550171 }, { "compression_loss": 0.0, "distillation_loss": 0.527796745300293, "epoch": 6.82, "learning_rate": 4.9502711341175553e-05, "loss": 0.4417, "step": 18880, "task_loss": 0.6073551774024963 }, { "compression_loss": 0.0, "distillation_loss": 0.34590059518814087, "epoch": 6.83, "learning_rate": 4.9430341523666124e-05, "loss": 0.4138, "step": 18890, "task_loss": 0.6458975672721863 }, { "compression_loss": 0.0, "distillation_loss": 0.3756251335144043, "epoch": 6.83, "learning_rate": 4.935789729846335e-05, "loss": 0.4016, "step": 18900, "task_loss": 0.5915954113006592 }, { "compression_loss": 0.0, "distillation_loss": 0.4537370204925537, "epoch": 6.83, "learning_rate": 4.928537903911402e-05, "loss": 0.3831, "step": 18910, "task_loss": 0.7734125852584839 }, { "compression_loss": 0.0, "distillation_loss": 0.4709266722202301, "epoch": 6.84, "learning_rate": 4.921278711954666e-05, "loss": 0.4286, "step": 18920, "task_loss": 0.9293255805969238 }, { "compression_loss": 0.0, "distillation_loss": 0.4475264251232147, "epoch": 6.84, "learning_rate": 4.9140121914069626e-05, "loss": 0.4389, "step": 18930, "task_loss": 0.5489974021911621 }, { "compression_loss": 0.0, "distillation_loss": 0.33601874113082886, "epoch": 6.84, "learning_rate": 4.9067383797369146e-05, "loss": 0.3989, "step": 18940, "task_loss": 0.38476261496543884 }, { "compression_loss": 0.0, "distillation_loss": 0.3520219326019287, "epoch": 6.85, "learning_rate": 4.899457314450742e-05, "loss": 0.3766, "step": 18950, "task_loss": 0.6894028186798096 }, { "compression_loss": 0.0, "distillation_loss": 0.38159820437431335, "epoch": 6.85, "learning_rate": 4.8921690330920637e-05, "loss": 0.4241, "step": 18960, "task_loss": 0.7299265265464783 }, { "compression_loss": 0.0, "distillation_loss": 0.47799068689346313, "epoch": 6.86, "learning_rate": 4.884873573241711e-05, "loss": 0.4292, "step": 18970, "task_loss": 0.6850833892822266 }, { "compression_loss": 0.0, "distillation_loss": 0.6151865720748901, "epoch": 6.86, "learning_rate": 4.8775709725175277e-05, "loss": 0.418, "step": 18980, "task_loss": 0.6462631225585938 }, { "compression_loss": 0.0, "distillation_loss": 0.47672033309936523, "epoch": 6.86, "learning_rate": 4.870261268574178e-05, "loss": 0.444, "step": 18990, "task_loss": 0.8364094495773315 }, { "compression_loss": 0.0, "distillation_loss": 0.3276512920856476, "epoch": 6.87, "learning_rate": 4.8629444991029544e-05, "loss": 0.4532, "step": 19000, "task_loss": 0.3989708423614502 }, { "epoch": 6.87, "eval_exact_match": 82.45979186376537, "eval_f1": 89.5439109204433, "step": 19000 }, { "compression_loss": 0.0, "distillation_loss": 0.44405293464660645, "epoch": 6.87, "learning_rate": 4.8556207018315793e-05, "loss": 0.3897, "step": 19010, "task_loss": 0.5321621894836426 }, { "compression_loss": 0.0, "distillation_loss": 0.35056599974632263, "epoch": 6.87, "learning_rate": 4.848289914524017e-05, "loss": 0.444, "step": 19020, "task_loss": 0.3490481674671173 }, { "compression_loss": 0.0, "distillation_loss": 0.4751790761947632, "epoch": 6.88, "learning_rate": 4.840952174980269e-05, "loss": 0.3982, "step": 19030, "task_loss": 0.663070797920227 }, { "compression_loss": 0.0, "distillation_loss": 0.45868778228759766, "epoch": 6.88, "learning_rate": 4.8336075210361884e-05, "loss": 0.4686, "step": 19040, "task_loss": 0.5684219598770142 }, { "compression_loss": 0.0, "distillation_loss": 0.5517913699150085, "epoch": 6.88, "learning_rate": 4.826255990563282e-05, "loss": 0.4379, "step": 19050, "task_loss": 0.8500944972038269 }, { "compression_loss": 0.0, "distillation_loss": 0.36011481285095215, "epoch": 6.89, "learning_rate": 4.8188976214685125e-05, "loss": 0.4354, "step": 19060, "task_loss": 0.45963573455810547 }, { "compression_loss": 0.0, "distillation_loss": 0.4780333638191223, "epoch": 6.89, "learning_rate": 4.81226927362005e-05, "loss": 0.4268, "step": 19070, "task_loss": 0.5274924039840698 }, { "compression_loss": 0.0, "distillation_loss": 0.3727762699127197, "epoch": 6.9, "learning_rate": 4.804898015703557e-05, "loss": 0.418, "step": 19080, "task_loss": 0.5746783018112183 }, { "compression_loss": 0.0, "distillation_loss": 0.24984151124954224, "epoch": 6.9, "learning_rate": 4.797520029294102e-05, "loss": 0.378, "step": 19090, "task_loss": 0.3299369812011719 }, { "compression_loss": 0.0, "distillation_loss": 0.3473876416683197, "epoch": 6.9, "learning_rate": 4.7901353524350616e-05, "loss": 0.4693, "step": 19100, "task_loss": 0.7796521186828613 }, { "compression_loss": 0.0, "distillation_loss": 0.4735308885574341, "epoch": 6.91, "learning_rate": 4.7827440232043156e-05, "loss": 0.4624, "step": 19110, "task_loss": 0.862983226776123 }, { "compression_loss": 0.0, "distillation_loss": 0.4470568895339966, "epoch": 6.91, "learning_rate": 4.77534607971404e-05, "loss": 0.469, "step": 19120, "task_loss": 0.3181147575378418 }, { "compression_loss": 0.0, "distillation_loss": 0.42912188172340393, "epoch": 6.91, "learning_rate": 4.767941560110523e-05, "loss": 0.4015, "step": 19130, "task_loss": 0.623262882232666 }, { "compression_loss": 0.0, "distillation_loss": 0.4887217581272125, "epoch": 6.92, "learning_rate": 4.7605305025739525e-05, "loss": 0.4325, "step": 19140, "task_loss": 0.5894296765327454 }, { "compression_loss": 0.0, "distillation_loss": 0.34331122040748596, "epoch": 6.92, "learning_rate": 4.753112945318238e-05, "loss": 0.3854, "step": 19150, "task_loss": 0.4348488450050354 }, { "compression_loss": 0.0, "distillation_loss": 0.4002523720264435, "epoch": 6.92, "learning_rate": 4.745688926590796e-05, "loss": 0.4437, "step": 19160, "task_loss": 0.5534040927886963 }, { "compression_loss": 0.0, "distillation_loss": 0.612992525100708, "epoch": 6.93, "learning_rate": 4.738258484672365e-05, "loss": 0.4264, "step": 19170, "task_loss": 0.8351370096206665 }, { "compression_loss": 0.0, "distillation_loss": 0.40844202041625977, "epoch": 6.93, "learning_rate": 4.730821657876802e-05, "loss": 0.4385, "step": 19180, "task_loss": 0.5529130697250366 }, { "compression_loss": 0.0, "distillation_loss": 0.41695636510849, "epoch": 6.94, "learning_rate": 4.7233784845508855e-05, "loss": 0.456, "step": 19190, "task_loss": 0.2549833059310913 }, { "compression_loss": 0.0, "distillation_loss": 0.5138141512870789, "epoch": 6.94, "learning_rate": 4.715929003074122e-05, "loss": 0.4156, "step": 19200, "task_loss": 0.43483638763427734 }, { "compression_loss": 0.0, "distillation_loss": 0.4193929433822632, "epoch": 6.94, "learning_rate": 4.7084732518585406e-05, "loss": 0.4429, "step": 19210, "task_loss": 0.6967915296554565 }, { "compression_loss": 0.0, "distillation_loss": 0.4237646460533142, "epoch": 6.95, "learning_rate": 4.701011269348504e-05, "loss": 0.4296, "step": 19220, "task_loss": 1.0524966716766357 }, { "compression_loss": 0.0, "distillation_loss": 0.3706055283546448, "epoch": 6.95, "learning_rate": 4.6935430940205026e-05, "loss": 0.4508, "step": 19230, "task_loss": 0.848009467124939 }, { "compression_loss": 0.0, "distillation_loss": 0.5015447735786438, "epoch": 6.95, "learning_rate": 4.686068764382958e-05, "loss": 0.4599, "step": 19240, "task_loss": 0.8581211566925049 }, { "compression_loss": 0.0, "distillation_loss": 0.4016169309616089, "epoch": 6.96, "learning_rate": 4.67858831897603e-05, "loss": 0.4012, "step": 19250, "task_loss": 0.6866574883460999 }, { "epoch": 6.96, "eval_exact_match": 82.77199621570483, "eval_f1": 89.62177861298487, "step": 19250 }, { "compression_loss": 0.0, "distillation_loss": 0.37276118993759155, "epoch": 6.96, "learning_rate": 4.671101796371409e-05, "loss": 0.4105, "step": 19260, "task_loss": 0.38845476508140564 }, { "compression_loss": 0.0, "distillation_loss": 0.37246400117874146, "epoch": 6.96, "learning_rate": 4.663609235172122e-05, "loss": 0.4296, "step": 19270, "task_loss": 0.49262505769729614 }, { "compression_loss": 0.0, "distillation_loss": 0.426718533039093, "epoch": 6.97, "learning_rate": 4.6561106740123356e-05, "loss": 0.4424, "step": 19280, "task_loss": 0.4604375958442688 }, { "compression_loss": 0.0, "distillation_loss": 0.47683387994766235, "epoch": 6.97, "learning_rate": 4.648606151557153e-05, "loss": 0.367, "step": 19290, "task_loss": 0.4319005012512207 }, { "compression_loss": 0.0, "distillation_loss": 0.5281187891960144, "epoch": 6.98, "learning_rate": 4.641095706502413e-05, "loss": 0.4282, "step": 19300, "task_loss": 0.8619099259376526 }, { "compression_loss": 0.0, "distillation_loss": 0.3414451777935028, "epoch": 6.98, "learning_rate": 4.633579377574499e-05, "loss": 0.4111, "step": 19310, "task_loss": 0.3021755814552307 }, { "compression_loss": 0.0, "distillation_loss": 0.3465237021446228, "epoch": 6.98, "learning_rate": 4.6260572035301296e-05, "loss": 0.3609, "step": 19320, "task_loss": 0.49031707644462585 }, { "compression_loss": 0.0, "distillation_loss": 0.3119080066680908, "epoch": 6.99, "learning_rate": 4.6185292231561614e-05, "loss": 0.4461, "step": 19330, "task_loss": 0.6686820387840271 }, { "compression_loss": 0.0, "distillation_loss": 0.40698978304862976, "epoch": 6.99, "learning_rate": 4.610995475269395e-05, "loss": 0.38, "step": 19340, "task_loss": 0.751046895980835 }, { "compression_loss": 0.0, "distillation_loss": 0.4363614320755005, "epoch": 6.99, "learning_rate": 4.603455998716366e-05, "loss": 0.4459, "step": 19350, "task_loss": 0.6054350137710571 }, { "compression_loss": 0.0, "distillation_loss": 0.3699437975883484, "epoch": 7.0, "learning_rate": 4.595910832373155e-05, "loss": 0.4081, "step": 19360, "task_loss": 0.47203072905540466 }, { "compression_loss": 0.0, "distillation_loss": 0.476010262966156, "epoch": 7.0, "learning_rate": 4.58836001514517e-05, "loss": 0.4631, "step": 19370, "task_loss": 0.6500152349472046 }, { "compression_loss": 0.0, "distillation_loss": 0.25550246238708496, "epoch": 7.0, "learning_rate": 4.58080358596697e-05, "loss": 0.3536, "step": 19380, "task_loss": 0.41859322786331177 }, { "compression_loss": 0.0, "distillation_loss": 0.37610524892807007, "epoch": 7.01, "learning_rate": 4.57324158380204e-05, "loss": 0.3568, "step": 19390, "task_loss": 0.3900749087333679 }, { "compression_loss": 0.0, "distillation_loss": 0.2958787977695465, "epoch": 7.01, "learning_rate": 4.56567404764261e-05, "loss": 0.3335, "step": 19400, "task_loss": 0.531352698802948 }, { "compression_loss": 0.0, "distillation_loss": 0.3521242141723633, "epoch": 7.01, "learning_rate": 4.558101016509439e-05, "loss": 0.3586, "step": 19410, "task_loss": 0.43806248903274536 }, { "compression_loss": 0.0, "distillation_loss": 0.32949671149253845, "epoch": 7.02, "learning_rate": 4.550522529451622e-05, "loss": 0.3663, "step": 19420, "task_loss": 0.31022578477859497 }, { "compression_loss": 0.0, "distillation_loss": 0.3833150565624237, "epoch": 7.02, "learning_rate": 4.542938625546389e-05, "loss": 0.3462, "step": 19430, "task_loss": 0.7389860153198242 }, { "compression_loss": 0.0, "distillation_loss": 0.4690796136856079, "epoch": 7.03, "learning_rate": 4.5353493438988946e-05, "loss": 0.3397, "step": 19440, "task_loss": 0.8385846018791199 }, { "compression_loss": 0.0, "distillation_loss": 0.2842397093772888, "epoch": 7.03, "learning_rate": 4.527754723642031e-05, "loss": 0.3836, "step": 19450, "task_loss": 0.49422913789749146 }, { "compression_loss": 0.0, "distillation_loss": 0.24541881680488586, "epoch": 7.03, "learning_rate": 4.520154803936211e-05, "loss": 0.3828, "step": 19460, "task_loss": 0.4191051721572876 }, { "compression_loss": 0.0, "distillation_loss": 0.3875623345375061, "epoch": 7.04, "learning_rate": 4.512549623969179e-05, "loss": 0.379, "step": 19470, "task_loss": 0.6146583557128906 }, { "compression_loss": 0.0, "distillation_loss": 0.31408095359802246, "epoch": 7.04, "learning_rate": 4.5049392229557984e-05, "loss": 0.3425, "step": 19480, "task_loss": 0.7459331750869751 }, { "compression_loss": 0.0, "distillation_loss": 0.3380616009235382, "epoch": 7.04, "learning_rate": 4.497323640137856e-05, "loss": 0.358, "step": 19490, "task_loss": 0.5370668172836304 }, { "compression_loss": 0.0, "distillation_loss": 0.3334946036338806, "epoch": 7.05, "learning_rate": 4.489702914783859e-05, "loss": 0.3863, "step": 19500, "task_loss": 0.6622374057769775 }, { "epoch": 7.05, "eval_exact_match": 82.37464522232735, "eval_f1": 89.41990224068103, "step": 19500 }, { "compression_loss": 0.0, "distillation_loss": 0.342751681804657, "epoch": 7.05, "learning_rate": 4.482077086188828e-05, "loss": 0.3715, "step": 19510, "task_loss": 0.6515509486198425 }, { "compression_loss": 0.0, "distillation_loss": 0.3435029983520508, "epoch": 7.05, "learning_rate": 4.4744461936741006e-05, "loss": 0.3807, "step": 19520, "task_loss": 0.5341594219207764 }, { "compression_loss": 0.0, "distillation_loss": 0.3486357033252716, "epoch": 7.06, "learning_rate": 4.466810276587125e-05, "loss": 0.3861, "step": 19530, "task_loss": 0.5828717350959778 }, { "compression_loss": 0.0, "distillation_loss": 0.27860820293426514, "epoch": 7.06, "learning_rate": 4.459169374301257e-05, "loss": 0.3819, "step": 19540, "task_loss": 0.3259779214859009 }, { "compression_loss": 0.0, "distillation_loss": 0.2861538529396057, "epoch": 7.07, "learning_rate": 4.4515235262155585e-05, "loss": 0.3759, "step": 19550, "task_loss": 0.5208934545516968 }, { "compression_loss": 0.0, "distillation_loss": 0.2555454969406128, "epoch": 7.07, "learning_rate": 4.443872771754593e-05, "loss": 0.3568, "step": 19560, "task_loss": 0.6315025091171265 }, { "compression_loss": 0.0, "distillation_loss": 0.4431646466255188, "epoch": 7.07, "learning_rate": 4.436217150368223e-05, "loss": 0.3419, "step": 19570, "task_loss": 0.617892861366272 }, { "compression_loss": 0.0, "distillation_loss": 0.2903032600879669, "epoch": 7.08, "learning_rate": 4.4285567015314056e-05, "loss": 0.3874, "step": 19580, "task_loss": 0.39440637826919556 }, { "compression_loss": 0.0, "distillation_loss": 0.3276922106742859, "epoch": 7.08, "learning_rate": 4.4208914647439934e-05, "loss": 0.3447, "step": 19590, "task_loss": 0.7188665270805359 }, { "compression_loss": 0.0, "distillation_loss": 0.3929433822631836, "epoch": 7.08, "learning_rate": 4.413221479530524e-05, "loss": 0.3543, "step": 19600, "task_loss": 0.48741626739501953 }, { "compression_loss": 0.0, "distillation_loss": 0.4890502393245697, "epoch": 7.09, "learning_rate": 4.40554678544002e-05, "loss": 0.3848, "step": 19610, "task_loss": 0.8010128736495972 }, { "compression_loss": 0.0, "distillation_loss": 0.28317737579345703, "epoch": 7.09, "learning_rate": 4.397867422045783e-05, "loss": 0.3642, "step": 19620, "task_loss": 0.4237869679927826 }, { "compression_loss": 0.0, "distillation_loss": 0.32048285007476807, "epoch": 7.09, "learning_rate": 4.390183428945197e-05, "loss": 0.3484, "step": 19630, "task_loss": 0.42599737644195557 }, { "compression_loss": 0.0, "distillation_loss": 0.30844658613204956, "epoch": 7.1, "learning_rate": 4.38249484575951e-05, "loss": 0.3503, "step": 19640, "task_loss": 0.29508674144744873 }, { "compression_loss": 0.0, "distillation_loss": 0.3091406226158142, "epoch": 7.1, "learning_rate": 4.374801712133647e-05, "loss": 0.353, "step": 19650, "task_loss": 0.5975150465965271 }, { "compression_loss": 0.0, "distillation_loss": 0.39048248529434204, "epoch": 7.11, "learning_rate": 4.367104067735987e-05, "loss": 0.3897, "step": 19660, "task_loss": 0.770504355430603 }, { "compression_loss": 0.0, "distillation_loss": 0.38784366846084595, "epoch": 7.11, "learning_rate": 4.359401952258176e-05, "loss": 0.3453, "step": 19670, "task_loss": 0.5350626707077026 }, { "compression_loss": 0.0, "distillation_loss": 0.3483542203903198, "epoch": 7.11, "learning_rate": 4.351695405414913e-05, "loss": 0.3466, "step": 19680, "task_loss": 0.7391943335533142 }, { "compression_loss": 0.0, "distillation_loss": 0.3177662491798401, "epoch": 7.12, "learning_rate": 4.3439844669437396e-05, "loss": 0.3558, "step": 19690, "task_loss": 0.3970339298248291 }, { "compression_loss": 0.0, "distillation_loss": 0.3653816878795624, "epoch": 7.12, "learning_rate": 4.336269176604852e-05, "loss": 0.3538, "step": 19700, "task_loss": 0.48417383432388306 }, { "compression_loss": 0.0, "distillation_loss": 0.5231342315673828, "epoch": 7.12, "learning_rate": 4.32854957418088e-05, "loss": 0.3773, "step": 19710, "task_loss": 0.5919955372810364 }, { "compression_loss": 0.0, "distillation_loss": 0.2906436026096344, "epoch": 7.13, "learning_rate": 4.3208256994766895e-05, "loss": 0.3596, "step": 19720, "task_loss": 0.7230117321014404 }, { "compression_loss": 0.0, "distillation_loss": 0.31748613715171814, "epoch": 7.13, "learning_rate": 4.313097592319176e-05, "loss": 0.3458, "step": 19730, "task_loss": 0.3181889057159424 }, { "compression_loss": 0.0, "distillation_loss": 0.33267971873283386, "epoch": 7.13, "learning_rate": 4.305365292557059e-05, "loss": 0.3904, "step": 19740, "task_loss": 0.5057215094566345 }, { "compression_loss": 0.0, "distillation_loss": 0.3165643811225891, "epoch": 7.14, "learning_rate": 4.297628840060676e-05, "loss": 0.3824, "step": 19750, "task_loss": 0.43284088373184204 }, { "epoch": 7.14, "eval_exact_match": 82.87606433301798, "eval_f1": 89.81716000383479, "step": 19750 }, { "compression_loss": 0.0, "distillation_loss": 0.2485005110502243, "epoch": 7.14, "learning_rate": 4.2898882747217766e-05, "loss": 0.3264, "step": 19760, "task_loss": 0.29056209325790405 }, { "compression_loss": 0.0, "distillation_loss": 0.3244897127151489, "epoch": 7.14, "learning_rate": 4.2821436364533194e-05, "loss": 0.3762, "step": 19770, "task_loss": 0.5531866550445557 }, { "compression_loss": 0.0, "distillation_loss": 0.3738449513912201, "epoch": 7.15, "learning_rate": 4.274394965189265e-05, "loss": 0.342, "step": 19780, "task_loss": 0.4225583076477051 }, { "compression_loss": 0.0, "distillation_loss": 0.302204966545105, "epoch": 7.15, "learning_rate": 4.266642300884366e-05, "loss": 0.3548, "step": 19790, "task_loss": 0.776357114315033 }, { "compression_loss": 0.0, "distillation_loss": 0.36438536643981934, "epoch": 7.16, "learning_rate": 4.258885683513967e-05, "loss": 0.3736, "step": 19800, "task_loss": 0.48018282651901245 }, { "compression_loss": 0.0, "distillation_loss": 0.2947632670402527, "epoch": 7.16, "learning_rate": 4.2511251530737956e-05, "loss": 0.4031, "step": 19810, "task_loss": 0.49369704723358154 }, { "compression_loss": 0.0, "distillation_loss": 0.3188723921775818, "epoch": 7.16, "learning_rate": 4.243360749579756e-05, "loss": 0.377, "step": 19820, "task_loss": 0.4446810483932495 }, { "compression_loss": 0.0, "distillation_loss": 0.2981301248073578, "epoch": 7.17, "learning_rate": 4.2355925130677255e-05, "loss": 0.3348, "step": 19830, "task_loss": 0.25408676266670227 }, { "compression_loss": 0.0, "distillation_loss": 0.35322049260139465, "epoch": 7.17, "learning_rate": 4.22782048359334e-05, "loss": 0.3721, "step": 19840, "task_loss": 0.41666746139526367 }, { "compression_loss": 0.0, "distillation_loss": 0.2933061718940735, "epoch": 7.17, "learning_rate": 4.2200447012318006e-05, "loss": 0.4241, "step": 19850, "task_loss": 0.7377125024795532 }, { "compression_loss": 0.0, "distillation_loss": 0.2902827262878418, "epoch": 7.18, "learning_rate": 4.212265206077655e-05, "loss": 0.3631, "step": 19860, "task_loss": 0.4684586524963379 }, { "compression_loss": 0.0, "distillation_loss": 0.3492077589035034, "epoch": 7.18, "learning_rate": 4.204482038244594e-05, "loss": 0.3792, "step": 19870, "task_loss": 0.5240708589553833 }, { "compression_loss": 0.0, "distillation_loss": 0.3437349200248718, "epoch": 7.18, "learning_rate": 4.1966952378652515e-05, "loss": 0.3532, "step": 19880, "task_loss": 0.39789509773254395 }, { "compression_loss": 0.0, "distillation_loss": 0.2762902081012726, "epoch": 7.19, "learning_rate": 4.1889048450909854e-05, "loss": 0.3529, "step": 19890, "task_loss": 0.5627302527427673 }, { "compression_loss": 0.0, "distillation_loss": 0.3159010410308838, "epoch": 7.19, "learning_rate": 4.181110900091682e-05, "loss": 0.3713, "step": 19900, "task_loss": 0.6435883045196533 }, { "compression_loss": 0.0, "distillation_loss": 0.3086598515510559, "epoch": 7.2, "learning_rate": 4.173313443055545e-05, "loss": 0.3744, "step": 19910, "task_loss": 0.38989460468292236 }, { "compression_loss": 0.0, "distillation_loss": 0.44235122203826904, "epoch": 7.2, "learning_rate": 4.165512514188881e-05, "loss": 0.3728, "step": 19920, "task_loss": 0.6314876079559326 }, { "compression_loss": 0.0, "distillation_loss": 0.2719871997833252, "epoch": 7.2, "learning_rate": 4.157708153715904e-05, "loss": 0.3254, "step": 19930, "task_loss": 0.3361320495605469 }, { "compression_loss": 0.0, "distillation_loss": 0.3495417833328247, "epoch": 7.21, "learning_rate": 4.1499004018785207e-05, "loss": 0.3469, "step": 19940, "task_loss": 0.40324363112449646 }, { "compression_loss": 0.0, "distillation_loss": 0.31385642290115356, "epoch": 7.21, "learning_rate": 4.1420892989361255e-05, "loss": 0.352, "step": 19950, "task_loss": 0.46830520033836365 }, { "compression_loss": 0.0, "distillation_loss": 0.386014461517334, "epoch": 7.21, "learning_rate": 4.134274885165392e-05, "loss": 0.3731, "step": 19960, "task_loss": 0.8087267875671387 }, { "compression_loss": 0.0, "distillation_loss": 0.37941837310791016, "epoch": 7.22, "learning_rate": 4.1264572008600645e-05, "loss": 0.3761, "step": 19970, "task_loss": 0.23961874842643738 }, { "compression_loss": 0.0, "distillation_loss": 0.29875701665878296, "epoch": 7.22, "learning_rate": 4.118636286330753e-05, "loss": 0.3597, "step": 19980, "task_loss": 0.43934816122055054 }, { "compression_loss": 0.0, "distillation_loss": 0.3207720220088959, "epoch": 7.22, "learning_rate": 4.110812181904722e-05, "loss": 0.3418, "step": 19990, "task_loss": 0.5765659809112549 }, { "compression_loss": 0.0, "distillation_loss": 0.2805137634277344, "epoch": 7.23, "learning_rate": 4.102984927925685e-05, "loss": 0.3795, "step": 20000, "task_loss": 0.48947030305862427 }, { "epoch": 7.23, "eval_exact_match": 82.61116367076632, "eval_f1": 89.78752648269042, "step": 20000 }, { "compression_loss": 0.0, "distillation_loss": 0.35584986209869385, "epoch": 7.23, "learning_rate": 4.095154564753595e-05, "loss": 0.3677, "step": 20010, "task_loss": 0.5476221442222595 }, { "compression_loss": 0.0, "distillation_loss": 0.4366397261619568, "epoch": 7.24, "learning_rate": 4.0873211327644396e-05, "loss": 0.3743, "step": 20020, "task_loss": 1.0048655271530151 }, { "compression_loss": 0.0, "distillation_loss": 0.36649996042251587, "epoch": 7.24, "learning_rate": 4.079484672350027e-05, "loss": 0.3642, "step": 20030, "task_loss": 0.49900856614112854 }, { "compression_loss": 0.0, "distillation_loss": 0.26360058784484863, "epoch": 7.24, "learning_rate": 4.0716452239177816e-05, "loss": 0.3499, "step": 20040, "task_loss": 0.32680177688598633 }, { "compression_loss": 0.0, "distillation_loss": 0.49425458908081055, "epoch": 7.25, "learning_rate": 4.0638028278905354e-05, "loss": 0.3466, "step": 20050, "task_loss": 0.7145361304283142 }, { "compression_loss": 0.0, "distillation_loss": 0.31015101075172424, "epoch": 7.25, "learning_rate": 4.055957524706322e-05, "loss": 0.3908, "step": 20060, "task_loss": 0.5189313888549805 }, { "compression_loss": 0.0, "distillation_loss": 0.3288698196411133, "epoch": 7.25, "learning_rate": 4.048109354818161e-05, "loss": 0.378, "step": 20070, "task_loss": 0.42466723918914795 }, { "compression_loss": 0.0, "distillation_loss": 0.3703783452510834, "epoch": 7.26, "learning_rate": 4.040258358693856e-05, "loss": 0.3968, "step": 20080, "task_loss": 0.9877618551254272 }, { "compression_loss": 0.0, "distillation_loss": 0.41583582758903503, "epoch": 7.26, "learning_rate": 4.032404576815781e-05, "loss": 0.3541, "step": 20090, "task_loss": 0.7476043701171875 }, { "compression_loss": 0.0, "distillation_loss": 0.310701847076416, "epoch": 7.26, "learning_rate": 4.024548049680678e-05, "loss": 0.3608, "step": 20100, "task_loss": 0.7368062138557434 }, { "compression_loss": 0.0, "distillation_loss": 0.42681705951690674, "epoch": 7.27, "learning_rate": 4.016688817799444e-05, "loss": 0.3524, "step": 20110, "task_loss": 0.6136888861656189 }, { "compression_loss": 0.0, "distillation_loss": 0.3238581120967865, "epoch": 7.27, "learning_rate": 4.0088269216969186e-05, "loss": 0.3522, "step": 20120, "task_loss": 0.47326260805130005 }, { "compression_loss": 0.0, "distillation_loss": 0.3412288725376129, "epoch": 7.28, "learning_rate": 4.0009624019116845e-05, "loss": 0.3413, "step": 20130, "task_loss": 0.6561700105667114 }, { "compression_loss": 0.0, "distillation_loss": 0.24474942684173584, "epoch": 7.28, "learning_rate": 3.993095298995848e-05, "loss": 0.3571, "step": 20140, "task_loss": 0.2822563648223877 }, { "compression_loss": 0.0, "distillation_loss": 0.33786123991012573, "epoch": 7.28, "learning_rate": 3.985225653514839e-05, "loss": 0.3548, "step": 20150, "task_loss": 0.5039522647857666 }, { "compression_loss": 0.0, "distillation_loss": 0.26680850982666016, "epoch": 7.29, "learning_rate": 3.977353506047195e-05, "loss": 0.3808, "step": 20160, "task_loss": 0.4330366849899292 }, { "compression_loss": 0.0, "distillation_loss": 0.3075251579284668, "epoch": 7.29, "learning_rate": 3.9694788971843556e-05, "loss": 0.3602, "step": 20170, "task_loss": 0.4514367878437042 }, { "compression_loss": 0.0, "distillation_loss": 0.26794716715812683, "epoch": 7.29, "learning_rate": 3.961601867530452e-05, "loss": 0.3607, "step": 20180, "task_loss": 0.46283090114593506 }, { "compression_loss": 0.0, "distillation_loss": 0.2926703095436096, "epoch": 7.3, "learning_rate": 3.953722457702098e-05, "loss": 0.3783, "step": 20190, "task_loss": 0.5470341444015503 }, { "compression_loss": 0.0, "distillation_loss": 0.3019060492515564, "epoch": 7.3, "learning_rate": 3.9458407083281805e-05, "loss": 0.3466, "step": 20200, "task_loss": 0.4646940529346466 }, { "compression_loss": 0.0, "distillation_loss": 0.3612298369407654, "epoch": 7.3, "learning_rate": 3.93795666004965e-05, "loss": 0.3733, "step": 20210, "task_loss": 0.6525784730911255 }, { "compression_loss": 0.0, "distillation_loss": 0.41849541664123535, "epoch": 7.31, "learning_rate": 3.930070353519308e-05, "loss": 0.358, "step": 20220, "task_loss": 0.6312863826751709 }, { "compression_loss": 0.0, "distillation_loss": 0.33530235290527344, "epoch": 7.31, "learning_rate": 3.9221818294016066e-05, "loss": 0.3541, "step": 20230, "task_loss": 0.5233701467514038 }, { "compression_loss": 0.0, "distillation_loss": 0.28744643926620483, "epoch": 7.31, "learning_rate": 3.914291128372426e-05, "loss": 0.3271, "step": 20240, "task_loss": 0.5557354092597961 }, { "compression_loss": 0.0, "distillation_loss": 0.4122638404369354, "epoch": 7.32, "learning_rate": 3.9063982911188756e-05, "loss": 0.3687, "step": 20250, "task_loss": 0.5771534442901611 }, { "epoch": 7.32, "eval_exact_match": 82.98013245033113, "eval_f1": 89.94478162383577, "step": 20250 }, { "compression_loss": 0.0, "distillation_loss": 0.40526461601257324, "epoch": 7.32, "learning_rate": 3.8985033583390775e-05, "loss": 0.388, "step": 20260, "task_loss": 0.8766403198242188 }, { "compression_loss": 0.0, "distillation_loss": 0.5623233318328857, "epoch": 7.33, "learning_rate": 3.8906063707419606e-05, "loss": 0.3682, "step": 20270, "task_loss": 0.39787179231643677 }, { "compression_loss": 0.0, "distillation_loss": 0.2818659543991089, "epoch": 7.33, "learning_rate": 3.882707369047048e-05, "loss": 0.3711, "step": 20280, "task_loss": 0.590981125831604 }, { "compression_loss": 0.0, "distillation_loss": 0.468313992023468, "epoch": 7.33, "learning_rate": 3.874806393984249e-05, "loss": 0.3643, "step": 20290, "task_loss": 0.6314665079116821 }, { "compression_loss": 0.0, "distillation_loss": 0.2927176356315613, "epoch": 7.34, "learning_rate": 3.866903486293648e-05, "loss": 0.3748, "step": 20300, "task_loss": 0.6189347505569458 }, { "compression_loss": 0.0, "distillation_loss": 0.32553645968437195, "epoch": 7.34, "learning_rate": 3.8589986867252935e-05, "loss": 0.3557, "step": 20310, "task_loss": 0.8069854974746704 }, { "compression_loss": 0.0, "distillation_loss": 0.287088006734848, "epoch": 7.34, "learning_rate": 3.851092036038991e-05, "loss": 0.3733, "step": 20320, "task_loss": 0.851733922958374 }, { "compression_loss": 0.0, "distillation_loss": 0.41035404801368713, "epoch": 7.35, "learning_rate": 3.843183575004091e-05, "loss": 0.3983, "step": 20330, "task_loss": 0.53264319896698 }, { "compression_loss": 0.0, "distillation_loss": 0.39696526527404785, "epoch": 7.35, "learning_rate": 3.835273344399275e-05, "loss": 0.3934, "step": 20340, "task_loss": 0.460975706577301 }, { "compression_loss": 0.0, "distillation_loss": 0.3248482048511505, "epoch": 7.35, "learning_rate": 3.827361385012354e-05, "loss": 0.3619, "step": 20350, "task_loss": 0.5883316993713379 }, { "compression_loss": 0.0, "distillation_loss": 0.488412082195282, "epoch": 7.36, "learning_rate": 3.819447737640053e-05, "loss": 0.4008, "step": 20360, "task_loss": 0.699428141117096 }, { "compression_loss": 0.0, "distillation_loss": 0.3935892581939697, "epoch": 7.36, "learning_rate": 3.8115324430877966e-05, "loss": 0.3865, "step": 20370, "task_loss": 0.6628360748291016 }, { "compression_loss": 0.0, "distillation_loss": 0.29902222752571106, "epoch": 7.37, "learning_rate": 3.8036155421695045e-05, "loss": 0.3458, "step": 20380, "task_loss": 0.6145927309989929 }, { "compression_loss": 0.0, "distillation_loss": 0.30101636052131653, "epoch": 7.37, "learning_rate": 3.7956970757073814e-05, "loss": 0.3705, "step": 20390, "task_loss": 0.4143185019493103 }, { "compression_loss": 0.0, "distillation_loss": 0.22915220260620117, "epoch": 7.37, "learning_rate": 3.787777084531704e-05, "loss": 0.3822, "step": 20400, "task_loss": 0.35247260332107544 }, { "compression_loss": 0.0, "distillation_loss": 0.3068666458129883, "epoch": 7.38, "learning_rate": 3.779855609480608e-05, "loss": 0.3304, "step": 20410, "task_loss": 0.7493861317634583 }, { "compression_loss": 0.0, "distillation_loss": 0.3240140974521637, "epoch": 7.38, "learning_rate": 3.771932691399883e-05, "loss": 0.3831, "step": 20420, "task_loss": 0.4175964593887329 }, { "compression_loss": 0.0, "distillation_loss": 0.30583804845809937, "epoch": 7.38, "learning_rate": 3.76400837114276e-05, "loss": 0.352, "step": 20430, "task_loss": 0.42632582783699036 }, { "compression_loss": 0.0, "distillation_loss": 0.3739269971847534, "epoch": 7.39, "learning_rate": 3.7560826895696975e-05, "loss": 0.3465, "step": 20440, "task_loss": 0.38233160972595215 }, { "compression_loss": 0.0, "distillation_loss": 0.32636404037475586, "epoch": 7.39, "learning_rate": 3.748155687548175e-05, "loss": 0.3609, "step": 20450, "task_loss": 0.560936689376831 }, { "compression_loss": 0.0, "distillation_loss": 0.2921903729438782, "epoch": 7.39, "learning_rate": 3.740227405952482e-05, "loss": 0.3442, "step": 20460, "task_loss": 0.7394105792045593 }, { "compression_loss": 0.0, "distillation_loss": 0.28189751505851746, "epoch": 7.4, "learning_rate": 3.732297885663502e-05, "loss": 0.3615, "step": 20470, "task_loss": 0.3475574851036072 }, { "compression_loss": 0.0, "distillation_loss": 0.32151737809181213, "epoch": 7.4, "learning_rate": 3.7243671675685105e-05, "loss": 0.3768, "step": 20480, "task_loss": 0.35703375935554504 }, { "compression_loss": 0.0, "distillation_loss": 0.4484822750091553, "epoch": 7.41, "learning_rate": 3.716435292560956e-05, "loss": 0.3516, "step": 20490, "task_loss": 0.8057897090911865 }, { "compression_loss": 0.0, "distillation_loss": 0.2865402400493622, "epoch": 7.41, "learning_rate": 3.708502301540252e-05, "loss": 0.3487, "step": 20500, "task_loss": 0.7167019248008728 }, { "epoch": 7.41, "eval_exact_match": 82.96121097445601, "eval_f1": 89.79021405051186, "step": 20500 }, { "compression_loss": 0.0, "distillation_loss": 0.29780441522598267, "epoch": 7.41, "learning_rate": 3.70056823541157e-05, "loss": 0.4029, "step": 20510, "task_loss": 0.36201319098472595 }, { "compression_loss": 0.0, "distillation_loss": 0.47126126289367676, "epoch": 7.42, "learning_rate": 3.692633135085621e-05, "loss": 0.3639, "step": 20520, "task_loss": 0.5977838039398193 }, { "compression_loss": 0.0, "distillation_loss": 0.2689071297645569, "epoch": 7.42, "learning_rate": 3.684697041478451e-05, "loss": 0.358, "step": 20530, "task_loss": 0.38315659761428833 }, { "compression_loss": 0.0, "distillation_loss": 0.28154638409614563, "epoch": 7.42, "learning_rate": 3.6767599955112285e-05, "loss": 0.3681, "step": 20540, "task_loss": 0.4523980915546417 }, { "compression_loss": 0.0, "distillation_loss": 0.33390653133392334, "epoch": 7.43, "learning_rate": 3.66882203811003e-05, "loss": 0.3368, "step": 20550, "task_loss": 0.3338510990142822 }, { "compression_loss": 0.0, "distillation_loss": 0.36619308590888977, "epoch": 7.43, "learning_rate": 3.6608832102056345e-05, "loss": 0.3908, "step": 20560, "task_loss": 0.49176403880119324 }, { "compression_loss": 0.0, "distillation_loss": 0.4774571657180786, "epoch": 7.43, "learning_rate": 3.6529435527333065e-05, "loss": 0.3801, "step": 20570, "task_loss": 0.9242376089096069 }, { "compression_loss": 0.0, "distillation_loss": 0.3975795805454254, "epoch": 7.44, "learning_rate": 3.645003106632591e-05, "loss": 0.3505, "step": 20580, "task_loss": 0.6916407346725464 }, { "compression_loss": 0.0, "distillation_loss": 0.3709920048713684, "epoch": 7.44, "learning_rate": 3.637061912847099e-05, "loss": 0.3534, "step": 20590, "task_loss": 0.495505690574646 }, { "compression_loss": 0.0, "distillation_loss": 0.2470513880252838, "epoch": 7.44, "learning_rate": 3.629120012324294e-05, "loss": 0.3301, "step": 20600, "task_loss": 0.14731258153915405 }, { "compression_loss": 0.0, "distillation_loss": 0.2766827344894409, "epoch": 7.45, "learning_rate": 3.621177446015287e-05, "loss": 0.3582, "step": 20610, "task_loss": 0.2590910494327545 }, { "compression_loss": 0.0, "distillation_loss": 0.34083378314971924, "epoch": 7.45, "learning_rate": 3.61323425487462e-05, "loss": 0.3602, "step": 20620, "task_loss": 0.4858570396900177 }, { "compression_loss": 0.0, "distillation_loss": 0.27848386764526367, "epoch": 7.46, "learning_rate": 3.605290479860057e-05, "loss": 0.38, "step": 20630, "task_loss": 0.314744770526886 }, { "compression_loss": 0.0, "distillation_loss": 0.2510620951652527, "epoch": 7.46, "learning_rate": 3.597346161932376e-05, "loss": 0.3617, "step": 20640, "task_loss": 0.33013665676116943 }, { "compression_loss": 0.0, "distillation_loss": 0.31609711050987244, "epoch": 7.46, "learning_rate": 3.5894013420551494e-05, "loss": 0.3399, "step": 20650, "task_loss": 0.42119982838630676 }, { "compression_loss": 0.0, "distillation_loss": 0.40757670998573303, "epoch": 7.47, "learning_rate": 3.58145606119454e-05, "loss": 0.3584, "step": 20660, "task_loss": 0.5284794569015503 }, { "compression_loss": 0.0, "distillation_loss": 0.25075843930244446, "epoch": 7.47, "learning_rate": 3.573510360319088e-05, "loss": 0.3632, "step": 20670, "task_loss": 0.2154819518327713 }, { "compression_loss": 0.0, "distillation_loss": 0.3524804413318634, "epoch": 7.47, "learning_rate": 3.565564280399499e-05, "loss": 0.3769, "step": 20680, "task_loss": 0.5062075257301331 }, { "compression_loss": 0.0, "distillation_loss": 0.2267530858516693, "epoch": 7.48, "learning_rate": 3.557617862408434e-05, "loss": 0.3603, "step": 20690, "task_loss": 0.3778666853904724 }, { "compression_loss": 0.0, "distillation_loss": 0.2886289954185486, "epoch": 7.48, "learning_rate": 3.5496711473202944e-05, "loss": 0.3234, "step": 20700, "task_loss": 0.4648386240005493 }, { "compression_loss": 0.0, "distillation_loss": 0.30359941720962524, "epoch": 7.48, "learning_rate": 3.541724176111018e-05, "loss": 0.3388, "step": 20710, "task_loss": 0.5805556178092957 }, { "compression_loss": 0.0, "distillation_loss": 0.390267550945282, "epoch": 7.49, "learning_rate": 3.533776989757858e-05, "loss": 0.3511, "step": 20720, "task_loss": 0.9671446084976196 }, { "compression_loss": 0.0, "distillation_loss": 0.39545518159866333, "epoch": 7.49, "learning_rate": 3.52582962923918e-05, "loss": 0.3803, "step": 20730, "task_loss": 0.4439846873283386 }, { "compression_loss": 0.0, "distillation_loss": 0.25991690158843994, "epoch": 7.5, "learning_rate": 3.517882135534249e-05, "loss": 0.3708, "step": 20740, "task_loss": 0.40725821256637573 }, { "compression_loss": 0.0, "distillation_loss": 0.3476290702819824, "epoch": 7.5, "learning_rate": 3.509934549623014e-05, "loss": 0.3794, "step": 20750, "task_loss": 0.40585631132125854 }, { "epoch": 7.5, "eval_exact_match": 83.14096499526963, "eval_f1": 90.06066489311088, "step": 20750 }, { "compression_loss": 0.0, "distillation_loss": 0.26795804500579834, "epoch": 7.5, "learning_rate": 3.501986912485901e-05, "loss": 0.3313, "step": 20760, "task_loss": 0.318131685256958 }, { "compression_loss": 0.0, "distillation_loss": 0.27528253197669983, "epoch": 7.51, "learning_rate": 3.494039265103597e-05, "loss": 0.3785, "step": 20770, "task_loss": 0.6479158401489258 }, { "compression_loss": 0.0, "distillation_loss": 0.4581003189086914, "epoch": 7.51, "learning_rate": 3.486091648456848e-05, "loss": 0.3914, "step": 20780, "task_loss": 0.5169153213500977 }, { "compression_loss": 0.0, "distillation_loss": 0.28237971663475037, "epoch": 7.51, "learning_rate": 3.478144103526236e-05, "loss": 0.3394, "step": 20790, "task_loss": 0.4905843734741211 }, { "compression_loss": 0.0, "distillation_loss": 0.35639041662216187, "epoch": 7.52, "learning_rate": 3.470196671291976e-05, "loss": 0.3705, "step": 20800, "task_loss": 0.5656434297561646 }, { "compression_loss": 0.0, "distillation_loss": 0.3621225953102112, "epoch": 7.52, "learning_rate": 3.4622493927337014e-05, "loss": 0.3687, "step": 20810, "task_loss": 0.799970269203186 }, { "compression_loss": 0.0, "distillation_loss": 0.2872578501701355, "epoch": 7.52, "learning_rate": 3.4543023088302515e-05, "loss": 0.3459, "step": 20820, "task_loss": 0.2272077053785324 }, { "compression_loss": 0.0, "distillation_loss": 0.37445396184921265, "epoch": 7.53, "learning_rate": 3.4463554605594636e-05, "loss": 0.3734, "step": 20830, "task_loss": 0.6421491503715515 }, { "compression_loss": 0.0, "distillation_loss": 0.3490419387817383, "epoch": 7.53, "learning_rate": 3.438408888897961e-05, "loss": 0.3361, "step": 20840, "task_loss": 0.5615388751029968 }, { "compression_loss": 0.0, "distillation_loss": 0.3245730400085449, "epoch": 7.54, "learning_rate": 3.4304626348209364e-05, "loss": 0.3551, "step": 20850, "task_loss": 0.3907738924026489 }, { "compression_loss": 0.0, "distillation_loss": 0.3850998282432556, "epoch": 7.54, "learning_rate": 3.4225167393019515e-05, "loss": 0.397, "step": 20860, "task_loss": 0.6859105825424194 }, { "compression_loss": 0.0, "distillation_loss": 0.37591278553009033, "epoch": 7.54, "learning_rate": 3.4145712433127126e-05, "loss": 0.3667, "step": 20870, "task_loss": 0.4931633472442627 }, { "compression_loss": 0.0, "distillation_loss": 0.35777273774147034, "epoch": 7.55, "learning_rate": 3.4066261878228706e-05, "loss": 0.3744, "step": 20880, "task_loss": 0.4974115490913391 }, { "compression_loss": 0.0, "distillation_loss": 0.36851346492767334, "epoch": 7.55, "learning_rate": 3.398681613799803e-05, "loss": 0.3769, "step": 20890, "task_loss": 0.4662131071090698 }, { "compression_loss": 0.0, "distillation_loss": 0.33928361535072327, "epoch": 7.55, "learning_rate": 3.390737562208405e-05, "loss": 0.4119, "step": 20900, "task_loss": 0.5498688220977783 }, { "compression_loss": 0.0, "distillation_loss": 0.4270703196525574, "epoch": 7.56, "learning_rate": 3.3827940740108764e-05, "loss": 0.3709, "step": 20910, "task_loss": 0.481827050447464 }, { "compression_loss": 0.0, "distillation_loss": 0.43292000889778137, "epoch": 7.56, "learning_rate": 3.374851190166516e-05, "loss": 0.3787, "step": 20920, "task_loss": 0.9506334066390991 }, { "compression_loss": 0.0, "distillation_loss": 0.2657639980316162, "epoch": 7.56, "learning_rate": 3.366908951631501e-05, "loss": 0.3494, "step": 20930, "task_loss": 0.4249715209007263 }, { "compression_loss": 0.0, "distillation_loss": 0.394223690032959, "epoch": 7.57, "learning_rate": 3.3589673993586866e-05, "loss": 0.3941, "step": 20940, "task_loss": 0.5982447266578674 }, { "compression_loss": 0.0, "distillation_loss": 0.2714296579360962, "epoch": 7.57, "learning_rate": 3.351026574297384e-05, "loss": 0.3427, "step": 20950, "task_loss": 0.6363648176193237 }, { "compression_loss": 0.0, "distillation_loss": 0.3336118459701538, "epoch": 7.57, "learning_rate": 3.343086517393162e-05, "loss": 0.394, "step": 20960, "task_loss": 0.7720279693603516 }, { "compression_loss": 0.0, "distillation_loss": 0.30867213010787964, "epoch": 7.58, "learning_rate": 3.335147269587619e-05, "loss": 0.3789, "step": 20970, "task_loss": 0.40268540382385254 }, { "compression_loss": 0.0, "distillation_loss": 0.4145679771900177, "epoch": 7.58, "learning_rate": 3.327208871818189e-05, "loss": 0.3476, "step": 20980, "task_loss": 0.7717650532722473 }, { "compression_loss": 0.0, "distillation_loss": 0.31620293855667114, "epoch": 7.59, "learning_rate": 3.3192713650179204e-05, "loss": 0.3984, "step": 20990, "task_loss": 0.5271072387695312 }, { "compression_loss": 0.0, "distillation_loss": 0.3817560076713562, "epoch": 7.59, "learning_rate": 3.311334790115267e-05, "loss": 0.3455, "step": 21000, "task_loss": 0.6588694453239441 }, { "epoch": 7.59, "eval_exact_match": 83.01797540208136, "eval_f1": 90.0024701105002, "step": 21000 }, { "compression_loss": 0.0, "distillation_loss": 0.304983913898468, "epoch": 7.59, "learning_rate": 3.303399188033876e-05, "loss": 0.3438, "step": 21010, "task_loss": 0.49317359924316406 }, { "compression_loss": 0.0, "distillation_loss": 0.3328720033168793, "epoch": 7.6, "learning_rate": 3.295464599692384e-05, "loss": 0.3604, "step": 21020, "task_loss": 0.6254655718803406 }, { "compression_loss": 0.0, "distillation_loss": 0.31305211782455444, "epoch": 7.6, "learning_rate": 3.287531066004193e-05, "loss": 0.3649, "step": 21030, "task_loss": 0.5049108266830444 }, { "compression_loss": 0.0, "distillation_loss": 0.38831958174705505, "epoch": 7.6, "learning_rate": 3.279598627877273e-05, "loss": 0.368, "step": 21040, "task_loss": 0.3947518467903137 }, { "compression_loss": 0.0, "distillation_loss": 0.29545751214027405, "epoch": 7.61, "learning_rate": 3.2716673262139395e-05, "loss": 0.3442, "step": 21050, "task_loss": 0.9915268421173096 }, { "compression_loss": 0.0, "distillation_loss": 0.33492806553840637, "epoch": 7.61, "learning_rate": 3.2637372019106546e-05, "loss": 0.3608, "step": 21060, "task_loss": 0.49239182472229004 }, { "compression_loss": 0.0, "distillation_loss": 0.31485992670059204, "epoch": 7.61, "learning_rate": 3.255808295857803e-05, "loss": 0.3431, "step": 21070, "task_loss": 0.811964750289917 }, { "compression_loss": 0.0, "distillation_loss": 0.32304200530052185, "epoch": 7.62, "learning_rate": 3.247880648939492e-05, "loss": 0.3626, "step": 21080, "task_loss": 0.3488319218158722 }, { "compression_loss": 0.0, "distillation_loss": 0.4019070267677307, "epoch": 7.62, "learning_rate": 3.239954302033335e-05, "loss": 0.3459, "step": 21090, "task_loss": 0.6527318954467773 }, { "compression_loss": 0.0, "distillation_loss": 0.35567885637283325, "epoch": 7.63, "learning_rate": 3.232029296010241e-05, "loss": 0.3526, "step": 21100, "task_loss": 0.4953433871269226 }, { "compression_loss": 0.0, "distillation_loss": 0.248754620552063, "epoch": 7.63, "learning_rate": 3.224105671734206e-05, "loss": 0.3247, "step": 21110, "task_loss": 0.5917302370071411 }, { "compression_loss": 0.0, "distillation_loss": 0.3231223523616791, "epoch": 7.63, "learning_rate": 3.2161834700621026e-05, "loss": 0.3262, "step": 21120, "task_loss": 0.6599835157394409 }, { "compression_loss": 0.0, "distillation_loss": 0.3281194567680359, "epoch": 7.64, "learning_rate": 3.2082627318434634e-05, "loss": 0.3392, "step": 21130, "task_loss": 0.7458435297012329 }, { "compression_loss": 0.0, "distillation_loss": 0.37381210923194885, "epoch": 7.64, "learning_rate": 3.2003434979202836e-05, "loss": 0.3412, "step": 21140, "task_loss": 0.6678844690322876 }, { "compression_loss": 0.0, "distillation_loss": 0.22763706743717194, "epoch": 7.64, "learning_rate": 3.19242580912679e-05, "loss": 0.3519, "step": 21150, "task_loss": 0.42863303422927856 }, { "compression_loss": 0.0, "distillation_loss": 0.2760619819164276, "epoch": 7.65, "learning_rate": 3.1845097062892545e-05, "loss": 0.3365, "step": 21160, "task_loss": 0.6848987340927124 }, { "compression_loss": 0.0, "distillation_loss": 0.38911378383636475, "epoch": 7.65, "learning_rate": 3.1765952302257604e-05, "loss": 0.3308, "step": 21170, "task_loss": 0.2971108555793762 }, { "compression_loss": 0.0, "distillation_loss": 0.39150434732437134, "epoch": 7.65, "learning_rate": 3.1686824217460105e-05, "loss": 0.3895, "step": 21180, "task_loss": 0.7148730158805847 }, { "compression_loss": 0.0, "distillation_loss": 0.48236069083213806, "epoch": 7.66, "learning_rate": 3.160771321651105e-05, "loss": 0.3615, "step": 21190, "task_loss": 0.5340760350227356 }, { "compression_loss": 0.0, "distillation_loss": 0.2843457758426666, "epoch": 7.66, "learning_rate": 3.152861970733336e-05, "loss": 0.3503, "step": 21200, "task_loss": 0.5471694469451904 }, { "compression_loss": 0.0, "distillation_loss": 0.37405431270599365, "epoch": 7.67, "learning_rate": 3.144954409775978e-05, "loss": 0.353, "step": 21210, "task_loss": 0.5435091853141785 }, { "compression_loss": 0.0, "distillation_loss": 0.3254290223121643, "epoch": 7.67, "learning_rate": 3.1370486795530724e-05, "loss": 0.3726, "step": 21220, "task_loss": 0.44784238934516907 }, { "compression_loss": 0.0, "distillation_loss": 0.3736199736595154, "epoch": 7.67, "learning_rate": 3.129144820829223e-05, "loss": 0.3367, "step": 21230, "task_loss": 0.684989869594574 }, { "compression_loss": 0.0, "distillation_loss": 0.2720557749271393, "epoch": 7.68, "learning_rate": 3.1212428743593856e-05, "loss": 0.3924, "step": 21240, "task_loss": 0.43417856097221375 }, { "compression_loss": 0.0, "distillation_loss": 0.3352609872817993, "epoch": 7.68, "learning_rate": 3.113342880888649e-05, "loss": 0.3355, "step": 21250, "task_loss": 0.5457616448402405 }, { "epoch": 7.68, "eval_exact_match": 82.97067171239357, "eval_f1": 89.75270243191726, "step": 21250 }, { "compression_loss": 0.0, "distillation_loss": 0.2917085886001587, "epoch": 7.68, "learning_rate": 3.10544488115204e-05, "loss": 0.3251, "step": 21260, "task_loss": 0.4778927266597748 }, { "compression_loss": 0.0, "distillation_loss": 0.3367916941642761, "epoch": 7.69, "learning_rate": 3.097548915874299e-05, "loss": 0.3507, "step": 21270, "task_loss": 0.5044569373130798 }, { "compression_loss": 0.0, "distillation_loss": 0.3709861636161804, "epoch": 7.69, "learning_rate": 3.08965502576968e-05, "loss": 0.4027, "step": 21280, "task_loss": 0.5584628582000732 }, { "compression_loss": 0.0, "distillation_loss": 0.35419756174087524, "epoch": 7.69, "learning_rate": 3.081763251541732e-05, "loss": 0.3693, "step": 21290, "task_loss": 0.6167153120040894 }, { "compression_loss": 0.0, "distillation_loss": 0.35388609766960144, "epoch": 7.7, "learning_rate": 3.0738736338830997e-05, "loss": 0.3349, "step": 21300, "task_loss": 0.420735239982605 }, { "compression_loss": 0.0, "distillation_loss": 0.28986817598342896, "epoch": 7.7, "learning_rate": 3.0659862134753025e-05, "loss": 0.3477, "step": 21310, "task_loss": 0.3834241032600403 }, { "compression_loss": 0.0, "distillation_loss": 0.4581122100353241, "epoch": 7.71, "learning_rate": 3.0581010309885335e-05, "loss": 0.3659, "step": 21320, "task_loss": 0.6090573072433472 }, { "compression_loss": 0.0, "distillation_loss": 0.2977628707885742, "epoch": 7.71, "learning_rate": 3.0502181270814433e-05, "loss": 0.3684, "step": 21330, "task_loss": 0.28831470012664795 }, { "compression_loss": 0.0, "distillation_loss": 0.32104063034057617, "epoch": 7.71, "learning_rate": 3.042337542400939e-05, "loss": 0.3712, "step": 21340, "task_loss": 0.4596444368362427 }, { "compression_loss": 0.0, "distillation_loss": 0.39884674549102783, "epoch": 7.72, "learning_rate": 3.0344593175819606e-05, "loss": 0.3462, "step": 21350, "task_loss": 0.8089415431022644 }, { "compression_loss": 0.0, "distillation_loss": 0.33032548427581787, "epoch": 7.72, "learning_rate": 3.026583493247288e-05, "loss": 0.3524, "step": 21360, "task_loss": 0.6769004464149475 }, { "compression_loss": 0.0, "distillation_loss": 0.37094488739967346, "epoch": 7.72, "learning_rate": 3.018710110007318e-05, "loss": 0.3836, "step": 21370, "task_loss": 0.5415042638778687 }, { "compression_loss": 0.0, "distillation_loss": 0.2806081175804138, "epoch": 7.73, "learning_rate": 3.010839208459863e-05, "loss": 0.3942, "step": 21380, "task_loss": 0.598015308380127 }, { "compression_loss": 0.0, "distillation_loss": 0.363537073135376, "epoch": 7.73, "learning_rate": 3.0029708291899378e-05, "loss": 0.3359, "step": 21390, "task_loss": 0.7415146231651306 }, { "compression_loss": 0.0, "distillation_loss": 0.32732537388801575, "epoch": 7.73, "learning_rate": 2.9951050127695518e-05, "loss": 0.3601, "step": 21400, "task_loss": 0.7806991338729858 }, { "compression_loss": 0.0, "distillation_loss": 0.44270604848861694, "epoch": 7.74, "learning_rate": 2.9872417997574987e-05, "loss": 0.3318, "step": 21410, "task_loss": 0.5888664722442627 }, { "compression_loss": 0.0, "distillation_loss": 0.2828146815299988, "epoch": 7.74, "learning_rate": 2.979381230699151e-05, "loss": 0.3797, "step": 21420, "task_loss": 0.6815419793128967 }, { "compression_loss": 0.0, "distillation_loss": 0.3201327919960022, "epoch": 7.74, "learning_rate": 2.9715233461262427e-05, "loss": 0.3256, "step": 21430, "task_loss": 0.7906991243362427 }, { "compression_loss": 0.0, "distillation_loss": 0.2204771190881729, "epoch": 7.75, "learning_rate": 2.9636681865566735e-05, "loss": 0.378, "step": 21440, "task_loss": 0.3519682288169861 }, { "compression_loss": 0.0, "distillation_loss": 0.3597320020198822, "epoch": 7.75, "learning_rate": 2.9558157924942824e-05, "loss": 0.3507, "step": 21450, "task_loss": 0.8138501048088074 }, { "compression_loss": 0.0, "distillation_loss": 0.21947142481803894, "epoch": 7.76, "learning_rate": 2.947966204428658e-05, "loss": 0.3732, "step": 21460, "task_loss": 0.6017472743988037 }, { "compression_loss": 0.0, "distillation_loss": 0.3029401898384094, "epoch": 7.76, "learning_rate": 2.940119462834914e-05, "loss": 0.3162, "step": 21470, "task_loss": 0.4510578513145447 }, { "compression_loss": 0.0, "distillation_loss": 0.48782584071159363, "epoch": 7.76, "learning_rate": 2.9322756081734894e-05, "loss": 0.3629, "step": 21480, "task_loss": 0.8380551338195801 }, { "compression_loss": 0.0, "distillation_loss": 0.34943652153015137, "epoch": 7.77, "learning_rate": 2.9244346808899347e-05, "loss": 0.3598, "step": 21490, "task_loss": 0.8652399182319641 }, { "compression_loss": 0.0, "distillation_loss": 0.343707799911499, "epoch": 7.77, "learning_rate": 2.91659672141471e-05, "loss": 0.3269, "step": 21500, "task_loss": 0.6488476991653442 }, { "epoch": 7.77, "eval_exact_match": 83.22611163670766, "eval_f1": 90.06873548509186, "step": 21500 }, { "compression_loss": 0.0, "distillation_loss": 0.28079891204833984, "epoch": 7.77, "learning_rate": 2.9087617701629685e-05, "loss": 0.3729, "step": 21510, "task_loss": 0.5381488800048828 }, { "compression_loss": 0.0, "distillation_loss": 0.30641067028045654, "epoch": 7.78, "learning_rate": 2.9009298675343544e-05, "loss": 0.3535, "step": 21520, "task_loss": 0.3132174015045166 }, { "compression_loss": 0.0, "distillation_loss": 0.4152531921863556, "epoch": 7.78, "learning_rate": 2.8931010539127894e-05, "loss": 0.3741, "step": 21530, "task_loss": 0.7610316276550293 }, { "compression_loss": 0.0, "distillation_loss": 0.36251795291900635, "epoch": 7.78, "learning_rate": 2.8852753696662726e-05, "loss": 0.3404, "step": 21540, "task_loss": 0.6292003393173218 }, { "compression_loss": 0.0, "distillation_loss": 0.40879419445991516, "epoch": 7.79, "learning_rate": 2.877452855146659e-05, "loss": 0.3618, "step": 21550, "task_loss": 0.7074788808822632 }, { "compression_loss": 0.0, "distillation_loss": 0.33598774671554565, "epoch": 7.79, "learning_rate": 2.8696335506894678e-05, "loss": 0.3758, "step": 21560, "task_loss": 0.9449920654296875 }, { "compression_loss": 0.0, "distillation_loss": 0.3502233624458313, "epoch": 7.8, "learning_rate": 2.8618174966136597e-05, "loss": 0.3865, "step": 21570, "task_loss": 0.4362756311893463 }, { "compression_loss": 0.0, "distillation_loss": 0.44666188955307007, "epoch": 7.8, "learning_rate": 2.8540047332214402e-05, "loss": 0.3597, "step": 21580, "task_loss": 0.5449387431144714 }, { "compression_loss": 0.0, "distillation_loss": 0.3455656170845032, "epoch": 7.8, "learning_rate": 2.8461953007980413e-05, "loss": 0.3728, "step": 21590, "task_loss": 0.37384119629859924 }, { "compression_loss": 0.0, "distillation_loss": 0.37466901540756226, "epoch": 7.81, "learning_rate": 2.8383892396115255e-05, "loss": 0.3396, "step": 21600, "task_loss": 0.4856455326080322 }, { "compression_loss": 0.0, "distillation_loss": 0.30749914050102234, "epoch": 7.81, "learning_rate": 2.830586589912568e-05, "loss": 0.3426, "step": 21610, "task_loss": 0.5648226737976074 }, { "compression_loss": 0.0, "distillation_loss": 0.35594284534454346, "epoch": 7.81, "learning_rate": 2.8227873919342545e-05, "loss": 0.3668, "step": 21620, "task_loss": 0.42687565088272095 }, { "compression_loss": 0.0, "distillation_loss": 0.3522205650806427, "epoch": 7.82, "learning_rate": 2.814991685891871e-05, "loss": 0.3696, "step": 21630, "task_loss": 0.823002278804779 }, { "compression_loss": 0.0, "distillation_loss": 0.3458482027053833, "epoch": 7.82, "learning_rate": 2.807199511982703e-05, "loss": 0.3495, "step": 21640, "task_loss": 0.43529629707336426 }, { "compression_loss": 0.0, "distillation_loss": 0.2593701481819153, "epoch": 7.82, "learning_rate": 2.7994109103858147e-05, "loss": 0.3302, "step": 21650, "task_loss": 0.564799964427948 }, { "compression_loss": 0.0, "distillation_loss": 0.3371639549732208, "epoch": 7.83, "learning_rate": 2.7916259212618592e-05, "loss": 0.3596, "step": 21660, "task_loss": 0.8256035447120667 }, { "compression_loss": 0.0, "distillation_loss": 0.4070214033126831, "epoch": 7.83, "learning_rate": 2.7838445847528554e-05, "loss": 0.3238, "step": 21670, "task_loss": 0.38024699687957764 }, { "compression_loss": 0.0, "distillation_loss": 0.3013300895690918, "epoch": 7.84, "learning_rate": 2.776066940981993e-05, "loss": 0.3573, "step": 21680, "task_loss": 0.42121320962905884 }, { "compression_loss": 0.0, "distillation_loss": 0.33513176441192627, "epoch": 7.84, "learning_rate": 2.7682930300534165e-05, "loss": 0.3414, "step": 21690, "task_loss": 0.5897566080093384 }, { "compression_loss": 0.0, "distillation_loss": 0.3103760778903961, "epoch": 7.84, "learning_rate": 2.7605228920520273e-05, "loss": 0.3171, "step": 21700, "task_loss": 0.6985391974449158 }, { "compression_loss": 0.0, "distillation_loss": 0.32781437039375305, "epoch": 7.85, "learning_rate": 2.7527565670432668e-05, "loss": 0.3543, "step": 21710, "task_loss": 0.6251723766326904 }, { "compression_loss": 0.0, "distillation_loss": 0.2737557291984558, "epoch": 7.85, "learning_rate": 2.7449940950729213e-05, "loss": 0.3419, "step": 21720, "task_loss": 0.6766575574874878 }, { "compression_loss": 0.0, "distillation_loss": 0.27434462308883667, "epoch": 7.85, "learning_rate": 2.737235516166903e-05, "loss": 0.3292, "step": 21730, "task_loss": 0.4648294150829315 }, { "compression_loss": 0.0, "distillation_loss": 0.29078248143196106, "epoch": 7.86, "learning_rate": 2.729480870331058e-05, "loss": 0.3265, "step": 21740, "task_loss": 0.6191574335098267 }, { "compression_loss": 0.0, "distillation_loss": 0.26681390404701233, "epoch": 7.86, "learning_rate": 2.721730197550944e-05, "loss": 0.3432, "step": 21750, "task_loss": 0.4409167766571045 }, { "epoch": 7.86, "eval_exact_match": 82.74361400189214, "eval_f1": 89.81621091651192, "step": 21750 }, { "compression_loss": 0.0, "distillation_loss": 0.45901256799697876, "epoch": 7.86, "learning_rate": 2.7139835377916394e-05, "loss": 0.3586, "step": 21760, "task_loss": 0.7206564545631409 }, { "compression_loss": 0.0, "distillation_loss": 0.3710944354534149, "epoch": 7.87, "learning_rate": 2.7062409309975242e-05, "loss": 0.359, "step": 21770, "task_loss": 0.7943992614746094 }, { "compression_loss": 0.0, "distillation_loss": 0.34832853078842163, "epoch": 7.87, "learning_rate": 2.6985024170920843e-05, "loss": 0.3692, "step": 21780, "task_loss": 0.3599025309085846 }, { "compression_loss": 0.0, "distillation_loss": 0.3316296935081482, "epoch": 7.87, "learning_rate": 2.690768035977699e-05, "loss": 0.3727, "step": 21790, "task_loss": 0.6260291337966919 }, { "compression_loss": 0.0, "distillation_loss": 0.25265419483184814, "epoch": 7.88, "learning_rate": 2.6830378275354378e-05, "loss": 0.3767, "step": 21800, "task_loss": 0.29135915637016296 }, { "compression_loss": 0.0, "distillation_loss": 0.4612353444099426, "epoch": 7.88, "learning_rate": 2.6753118316248545e-05, "loss": 0.3797, "step": 21810, "task_loss": 0.5710431337356567 }, { "compression_loss": 0.0, "distillation_loss": 0.3590574264526367, "epoch": 7.89, "learning_rate": 2.6675900880837846e-05, "loss": 0.3856, "step": 21820, "task_loss": 0.6375476121902466 }, { "compression_loss": 0.0, "distillation_loss": 0.2918437719345093, "epoch": 7.89, "learning_rate": 2.65987263672813e-05, "loss": 0.3624, "step": 21830, "task_loss": 0.3540777564048767 }, { "compression_loss": 0.0, "distillation_loss": 0.38157540559768677, "epoch": 7.89, "learning_rate": 2.652159517351669e-05, "loss": 0.351, "step": 21840, "task_loss": 0.3811739385128021 }, { "compression_loss": 0.0, "distillation_loss": 0.39688819646835327, "epoch": 7.9, "learning_rate": 2.644450769725837e-05, "loss": 0.365, "step": 21850, "task_loss": 0.533487856388092 }, { "compression_loss": 0.0, "distillation_loss": 0.32374733686447144, "epoch": 7.9, "learning_rate": 2.6367464335995296e-05, "loss": 0.3343, "step": 21860, "task_loss": 0.4965505301952362 }, { "compression_loss": 0.0, "distillation_loss": 0.36838701367378235, "epoch": 7.9, "learning_rate": 2.6290465486988934e-05, "loss": 0.3604, "step": 21870, "task_loss": 0.7612681984901428 }, { "compression_loss": 0.0, "distillation_loss": 0.2574426531791687, "epoch": 7.91, "learning_rate": 2.6213511547271256e-05, "loss": 0.3493, "step": 21880, "task_loss": 0.33988291025161743 }, { "compression_loss": 0.0, "distillation_loss": 0.324415385723114, "epoch": 7.91, "learning_rate": 2.613660291364264e-05, "loss": 0.3632, "step": 21890, "task_loss": 0.697488009929657 }, { "compression_loss": 0.0, "distillation_loss": 0.2612054944038391, "epoch": 7.91, "learning_rate": 2.6059739982669875e-05, "loss": 0.3544, "step": 21900, "task_loss": 0.5947055816650391 }, { "compression_loss": 0.0, "distillation_loss": 0.3524448871612549, "epoch": 7.92, "learning_rate": 2.5982923150684062e-05, "loss": 0.3431, "step": 21910, "task_loss": 0.31323203444480896 }, { "compression_loss": 0.0, "distillation_loss": 0.3077560067176819, "epoch": 7.92, "learning_rate": 2.5906152813778642e-05, "loss": 0.3363, "step": 21920, "task_loss": 0.5763083696365356 }, { "compression_loss": 0.0, "distillation_loss": 0.32099348306655884, "epoch": 7.93, "learning_rate": 2.582942936780726e-05, "loss": 0.3612, "step": 21930, "task_loss": 0.7717790603637695 }, { "compression_loss": 0.0, "distillation_loss": 0.43049126863479614, "epoch": 7.93, "learning_rate": 2.575275320838183e-05, "loss": 0.3875, "step": 21940, "task_loss": 0.670395016670227 }, { "compression_loss": 0.0, "distillation_loss": 0.3461580276489258, "epoch": 7.93, "learning_rate": 2.5676124730870394e-05, "loss": 0.3417, "step": 21950, "task_loss": 0.7263317108154297 }, { "compression_loss": 0.0, "distillation_loss": 0.2610408365726471, "epoch": 7.94, "learning_rate": 2.5599544330395164e-05, "loss": 0.3782, "step": 21960, "task_loss": 0.5060991048812866 }, { "compression_loss": 0.0, "distillation_loss": 0.33723878860473633, "epoch": 7.94, "learning_rate": 2.5523012401830417e-05, "loss": 0.3645, "step": 21970, "task_loss": 0.5092849135398865 }, { "compression_loss": 0.0, "distillation_loss": 0.3296668827533722, "epoch": 7.94, "learning_rate": 2.5446529339800535e-05, "loss": 0.3522, "step": 21980, "task_loss": 0.6965002417564392 }, { "compression_loss": 0.0, "distillation_loss": 0.2845882177352905, "epoch": 7.95, "learning_rate": 2.5370095538677883e-05, "loss": 0.3182, "step": 21990, "task_loss": 0.4682374596595764 }, { "compression_loss": 0.0, "distillation_loss": 0.31272655725479126, "epoch": 7.95, "learning_rate": 2.529371139258086e-05, "loss": 0.3591, "step": 22000, "task_loss": 0.5068236589431763 }, { "epoch": 7.95, "eval_exact_match": 82.64900662251655, "eval_f1": 89.68277737846533, "step": 22000 }, { "compression_loss": 0.0, "distillation_loss": 0.3210468292236328, "epoch": 7.95, "learning_rate": 2.5217377295371787e-05, "loss": 0.3422, "step": 22010, "task_loss": 0.3419007658958435 }, { "compression_loss": 0.0, "distillation_loss": 0.5161848068237305, "epoch": 7.96, "learning_rate": 2.5141093640654972e-05, "loss": 0.382, "step": 22020, "task_loss": 0.9878392815589905 }, { "compression_loss": 0.0, "distillation_loss": 0.3749774694442749, "epoch": 7.96, "learning_rate": 2.506486082177455e-05, "loss": 0.3769, "step": 22030, "task_loss": 0.583120584487915 }, { "compression_loss": 0.0, "distillation_loss": 0.4009551405906677, "epoch": 7.97, "learning_rate": 2.4988679231812602e-05, "loss": 0.33, "step": 22040, "task_loss": 0.9236621260643005 }, { "compression_loss": 0.0, "distillation_loss": 0.2605592906475067, "epoch": 7.97, "learning_rate": 2.4912549263587004e-05, "loss": 0.4191, "step": 22050, "task_loss": 0.47352492809295654 }, { "compression_loss": 0.0, "distillation_loss": 0.2575927674770355, "epoch": 7.97, "learning_rate": 2.4836471309649488e-05, "loss": 0.3525, "step": 22060, "task_loss": 0.4551050066947937 }, { "compression_loss": 0.0, "distillation_loss": 0.32211726903915405, "epoch": 7.98, "learning_rate": 2.4760445762283548e-05, "loss": 0.3314, "step": 22070, "task_loss": 0.7064181566238403 }, { "compression_loss": 0.0, "distillation_loss": 0.26055270433425903, "epoch": 7.98, "learning_rate": 2.468447301350249e-05, "loss": 0.3222, "step": 22080, "task_loss": 0.8715969324111938 }, { "compression_loss": 0.0, "distillation_loss": 0.3303118944168091, "epoch": 7.98, "learning_rate": 2.4608553455047338e-05, "loss": 0.3335, "step": 22090, "task_loss": 0.3880299925804138 }, { "compression_loss": 0.0, "distillation_loss": 0.39064037799835205, "epoch": 7.99, "learning_rate": 2.4532687478384877e-05, "loss": 0.3472, "step": 22100, "task_loss": 0.5962386131286621 }, { "compression_loss": 0.0, "distillation_loss": 0.41516995429992676, "epoch": 7.99, "learning_rate": 2.4456875474705573e-05, "loss": 0.3663, "step": 22110, "task_loss": 0.6396934986114502 }, { "compression_loss": 0.0, "distillation_loss": 0.5140536427497864, "epoch": 7.99, "learning_rate": 2.4381117834921653e-05, "loss": 0.341, "step": 22120, "task_loss": 0.5780425071716309 }, { "compression_loss": 0.0, "distillation_loss": 0.37106651067733765, "epoch": 8.0, "learning_rate": 2.4305414949664922e-05, "loss": 0.3482, "step": 22130, "task_loss": 0.5088555812835693 }, { "compression_loss": 0.0, "distillation_loss": 0.2919508218765259, "epoch": 8.0, "learning_rate": 2.422976720928497e-05, "loss": 0.3278, "step": 22140, "task_loss": 0.4410659670829773 }, { "compression_loss": 0.0, "distillation_loss": 0.2985595464706421, "epoch": 8.01, "learning_rate": 2.415417500384695e-05, "loss": 0.2867, "step": 22150, "task_loss": 0.3100461959838867 }, { "compression_loss": 0.0, "distillation_loss": 0.22516822814941406, "epoch": 8.01, "learning_rate": 2.4078638723129704e-05, "loss": 0.3064, "step": 22160, "task_loss": 0.2526964545249939 }, { "compression_loss": 0.0, "distillation_loss": 0.3517003357410431, "epoch": 8.01, "learning_rate": 2.40031587566237e-05, "loss": 0.3218, "step": 22170, "task_loss": 0.2854512333869934 }, { "compression_loss": 0.0, "distillation_loss": 0.2611880302429199, "epoch": 8.02, "learning_rate": 2.3927735493529002e-05, "loss": 0.288, "step": 22180, "task_loss": 0.40842729806900024 }, { "compression_loss": 0.0, "distillation_loss": 0.2626832127571106, "epoch": 8.02, "learning_rate": 2.385236932275336e-05, "loss": 0.3052, "step": 22190, "task_loss": 0.6702091693878174 }, { "compression_loss": 0.0, "distillation_loss": 0.25344762206077576, "epoch": 8.02, "learning_rate": 2.377706063291005e-05, "loss": 0.2927, "step": 22200, "task_loss": 0.5836098790168762 }, { "compression_loss": 0.0, "distillation_loss": 0.23019495606422424, "epoch": 8.03, "learning_rate": 2.3701809812316033e-05, "loss": 0.2879, "step": 22210, "task_loss": 0.5896817445755005 }, { "compression_loss": 0.0, "distillation_loss": 0.2813813388347626, "epoch": 8.03, "learning_rate": 2.3626617248989822e-05, "loss": 0.2885, "step": 22220, "task_loss": 0.4454318881034851 }, { "compression_loss": 0.0, "distillation_loss": 0.3009265959262848, "epoch": 8.03, "learning_rate": 2.3551483330649577e-05, "loss": 0.3059, "step": 22230, "task_loss": 0.67862468957901 }, { "compression_loss": 0.0, "distillation_loss": 0.3150326907634735, "epoch": 8.04, "learning_rate": 2.3476408444711027e-05, "loss": 0.2978, "step": 22240, "task_loss": 0.5857815146446228 }, { "compression_loss": 0.0, "distillation_loss": 0.2585512399673462, "epoch": 8.04, "learning_rate": 2.3401392978285548e-05, "loss": 0.3233, "step": 22250, "task_loss": 0.25251004099845886 }, { "epoch": 8.04, "eval_exact_match": 83.0558183538316, "eval_f1": 89.90000054499299, "step": 22250 }, { "compression_loss": 0.0, "distillation_loss": 0.3542407155036926, "epoch": 8.04, "learning_rate": 2.3326437318178093e-05, "loss": 0.3108, "step": 22260, "task_loss": 1.0562341213226318 }, { "compression_loss": 0.0, "distillation_loss": 0.2739490270614624, "epoch": 8.05, "learning_rate": 2.3251541850885265e-05, "loss": 0.3013, "step": 22270, "task_loss": 0.9316976070404053 }, { "compression_loss": 0.0, "distillation_loss": 0.26413020491600037, "epoch": 8.05, "learning_rate": 2.3176706962593264e-05, "loss": 0.3378, "step": 22280, "task_loss": 0.5294308662414551 }, { "compression_loss": 0.0, "distillation_loss": 0.3085627853870392, "epoch": 8.06, "learning_rate": 2.310193303917596e-05, "loss": 0.3001, "step": 22290, "task_loss": 0.4382203221321106 }, { "compression_loss": 0.0, "distillation_loss": 0.27427783608436584, "epoch": 8.06, "learning_rate": 2.302722046619281e-05, "loss": 0.3169, "step": 22300, "task_loss": 0.39493346214294434 }, { "compression_loss": 0.0, "distillation_loss": 0.39170312881469727, "epoch": 8.06, "learning_rate": 2.2952569628887004e-05, "loss": 0.3369, "step": 22310, "task_loss": 0.48804953694343567 }, { "compression_loss": 0.0, "distillation_loss": 0.36464059352874756, "epoch": 8.07, "learning_rate": 2.2877980912183336e-05, "loss": 0.2951, "step": 22320, "task_loss": 0.524968147277832 }, { "compression_loss": 0.0, "distillation_loss": 0.24969807267189026, "epoch": 8.07, "learning_rate": 2.2803454700686325e-05, "loss": 0.2994, "step": 22330, "task_loss": 0.8763778805732727 }, { "compression_loss": 0.0, "distillation_loss": 0.26518428325653076, "epoch": 8.07, "learning_rate": 2.2728991378678165e-05, "loss": 0.2839, "step": 22340, "task_loss": 0.5413922071456909 }, { "compression_loss": 0.0, "distillation_loss": 0.16767820715904236, "epoch": 8.08, "learning_rate": 2.2654591330116794e-05, "loss": 0.3052, "step": 22350, "task_loss": 0.3252837061882019 }, { "compression_loss": 0.0, "distillation_loss": 0.35951513051986694, "epoch": 8.08, "learning_rate": 2.258025493863388e-05, "loss": 0.3146, "step": 22360, "task_loss": 0.5174765586853027 }, { "compression_loss": 0.0, "distillation_loss": 0.2331804484128952, "epoch": 8.08, "learning_rate": 2.250598258753285e-05, "loss": 0.333, "step": 22370, "task_loss": 0.354092001914978 }, { "compression_loss": 0.0, "distillation_loss": 0.2697516083717346, "epoch": 8.09, "learning_rate": 2.243177465978691e-05, "loss": 0.3288, "step": 22380, "task_loss": 0.4610014855861664 }, { "compression_loss": 0.0, "distillation_loss": 0.28994688391685486, "epoch": 8.09, "learning_rate": 2.2357631538037123e-05, "loss": 0.3005, "step": 22390, "task_loss": 0.5197626352310181 }, { "compression_loss": 0.0, "distillation_loss": 0.2243901789188385, "epoch": 8.1, "learning_rate": 2.2283553604590325e-05, "loss": 0.2993, "step": 22400, "task_loss": 0.5959091186523438 }, { "compression_loss": 0.0, "distillation_loss": 0.3568100929260254, "epoch": 8.1, "learning_rate": 2.220954124141727e-05, "loss": 0.3233, "step": 22410, "task_loss": 0.6142816543579102 }, { "compression_loss": 0.0, "distillation_loss": 0.3616940975189209, "epoch": 8.1, "learning_rate": 2.2135594830150573e-05, "loss": 0.3116, "step": 22420, "task_loss": 0.5814266204833984 }, { "compression_loss": 0.0, "distillation_loss": 0.32821333408355713, "epoch": 8.11, "learning_rate": 2.2061714752082815e-05, "loss": 0.3199, "step": 22430, "task_loss": 0.38982293009757996 }, { "compression_loss": 0.0, "distillation_loss": 0.28196531534194946, "epoch": 8.11, "learning_rate": 2.1987901388164503e-05, "loss": 0.3165, "step": 22440, "task_loss": 0.5850852727890015 }, { "compression_loss": 0.0, "distillation_loss": 0.2542758584022522, "epoch": 8.11, "learning_rate": 2.1914155119002177e-05, "loss": 0.2977, "step": 22450, "task_loss": 0.6517289876937866 }, { "compression_loss": 0.0, "distillation_loss": 0.3016597628593445, "epoch": 8.12, "learning_rate": 2.1840476324856386e-05, "loss": 0.2977, "step": 22460, "task_loss": 0.5211827754974365 }, { "compression_loss": 0.0, "distillation_loss": 0.44591793417930603, "epoch": 8.12, "learning_rate": 2.1766865385639794e-05, "loss": 0.345, "step": 22470, "task_loss": 0.6531864404678345 }, { "compression_loss": 0.0, "distillation_loss": 0.22515526413917542, "epoch": 8.12, "learning_rate": 2.169332268091512e-05, "loss": 0.2851, "step": 22480, "task_loss": 0.39858129620552063 }, { "compression_loss": 0.0, "distillation_loss": 0.3161879777908325, "epoch": 8.13, "learning_rate": 2.161984858989331e-05, "loss": 0.312, "step": 22490, "task_loss": 0.5769674181938171 }, { "compression_loss": 0.0, "distillation_loss": 0.2485162615776062, "epoch": 8.13, "learning_rate": 2.154644349143147e-05, "loss": 0.2835, "step": 22500, "task_loss": 0.4577278196811676 }, { "epoch": 8.13, "eval_exact_match": 82.90444654683066, "eval_f1": 89.85075823845933, "step": 22500 }, { "compression_loss": 0.0, "distillation_loss": 0.3061697483062744, "epoch": 8.14, "learning_rate": 2.1473107764030978e-05, "loss": 0.325, "step": 22510, "task_loss": 0.3557536005973816 }, { "compression_loss": 0.0, "distillation_loss": 0.27145296335220337, "epoch": 8.14, "learning_rate": 2.1399841785835495e-05, "loss": 0.2845, "step": 22520, "task_loss": 0.2070748656988144 }, { "compression_loss": 0.0, "distillation_loss": 0.28222960233688354, "epoch": 8.14, "learning_rate": 2.1326645934629052e-05, "loss": 0.3249, "step": 22530, "task_loss": 0.7930005788803101 }, { "compression_loss": 0.0, "distillation_loss": 0.377989798784256, "epoch": 8.15, "learning_rate": 2.1253520587834062e-05, "loss": 0.296, "step": 22540, "task_loss": 0.9438679218292236 }, { "compression_loss": 0.0, "distillation_loss": 0.27759093046188354, "epoch": 8.15, "learning_rate": 2.1180466122509413e-05, "loss": 0.3032, "step": 22550, "task_loss": 0.33349353075027466 }, { "compression_loss": 0.0, "distillation_loss": 0.3169790804386139, "epoch": 8.15, "learning_rate": 2.1107482915348477e-05, "loss": 0.3342, "step": 22560, "task_loss": 0.6454753875732422 }, { "compression_loss": 0.0, "distillation_loss": 0.32010066509246826, "epoch": 8.16, "learning_rate": 2.1034571342677242e-05, "loss": 0.3413, "step": 22570, "task_loss": 0.7830992937088013 }, { "compression_loss": 0.0, "distillation_loss": 0.31526443362236023, "epoch": 8.16, "learning_rate": 2.0961731780452256e-05, "loss": 0.3276, "step": 22580, "task_loss": 0.5468152761459351 }, { "compression_loss": 0.0, "distillation_loss": 0.3077006936073303, "epoch": 8.16, "learning_rate": 2.0888964604258828e-05, "loss": 0.2857, "step": 22590, "task_loss": 0.5536618828773499 }, { "compression_loss": 0.0, "distillation_loss": 0.24466858804225922, "epoch": 8.17, "learning_rate": 2.0816270189308964e-05, "loss": 0.2898, "step": 22600, "task_loss": 0.6005507707595825 }, { "compression_loss": 0.0, "distillation_loss": 0.27373987436294556, "epoch": 8.17, "learning_rate": 2.0743648910439537e-05, "loss": 0.2916, "step": 22610, "task_loss": 0.4629909098148346 }, { "compression_loss": 0.0, "distillation_loss": 0.30114513635635376, "epoch": 8.17, "learning_rate": 2.0671101142110257e-05, "loss": 0.3162, "step": 22620, "task_loss": 0.5511022806167603 }, { "compression_loss": 0.0, "distillation_loss": 0.3324176073074341, "epoch": 8.18, "learning_rate": 2.0598627258401834e-05, "loss": 0.3083, "step": 22630, "task_loss": 0.8414646983146667 }, { "compression_loss": 0.0, "distillation_loss": 0.33706897497177124, "epoch": 8.18, "learning_rate": 2.0526227633013956e-05, "loss": 0.31, "step": 22640, "task_loss": 0.5560680031776428 }, { "compression_loss": 0.0, "distillation_loss": 0.3432811498641968, "epoch": 8.19, "learning_rate": 2.045390263926347e-05, "loss": 0.3108, "step": 22650, "task_loss": 0.7345128655433655 }, { "compression_loss": 0.0, "distillation_loss": 0.3108120560646057, "epoch": 8.19, "learning_rate": 2.0381652650082324e-05, "loss": 0.2908, "step": 22660, "task_loss": 0.5983444452285767 }, { "compression_loss": 0.0, "distillation_loss": 0.2624809741973877, "epoch": 8.19, "learning_rate": 2.0309478038015807e-05, "loss": 0.3283, "step": 22670, "task_loss": 0.30220234394073486 }, { "compression_loss": 0.0, "distillation_loss": 0.28360921144485474, "epoch": 8.2, "learning_rate": 2.023737917522045e-05, "loss": 0.351, "step": 22680, "task_loss": 0.4737190008163452 }, { "compression_loss": 0.0, "distillation_loss": 0.35011863708496094, "epoch": 8.2, "learning_rate": 2.016535643346224e-05, "loss": 0.3214, "step": 22690, "task_loss": 0.5056818723678589 }, { "compression_loss": 0.0, "distillation_loss": 0.3075874447822571, "epoch": 8.2, "learning_rate": 2.0093410184114667e-05, "loss": 0.3411, "step": 22700, "task_loss": 0.36893951892852783 }, { "compression_loss": 0.0, "distillation_loss": 0.286554217338562, "epoch": 8.21, "learning_rate": 2.0021540798156785e-05, "loss": 0.303, "step": 22710, "task_loss": 0.4626897871494293 }, { "compression_loss": 0.0, "distillation_loss": 0.20945802330970764, "epoch": 8.21, "learning_rate": 1.9949748646171282e-05, "loss": 0.3026, "step": 22720, "task_loss": 0.4665585458278656 }, { "compression_loss": 0.0, "distillation_loss": 0.2661295235157013, "epoch": 8.21, "learning_rate": 1.9878034098342688e-05, "loss": 0.2739, "step": 22730, "task_loss": 0.44505608081817627 }, { "compression_loss": 0.0, "distillation_loss": 0.3084584176540375, "epoch": 8.22, "learning_rate": 1.980639752445529e-05, "loss": 0.3236, "step": 22740, "task_loss": 0.47151315212249756 }, { "compression_loss": 0.0, "distillation_loss": 0.37254661321640015, "epoch": 8.22, "learning_rate": 1.973483929389136e-05, "loss": 0.3031, "step": 22750, "task_loss": 0.5002536177635193 }, { "epoch": 8.22, "eval_exact_match": 83.36802270577105, "eval_f1": 90.14923737850407, "step": 22750 }, { "compression_loss": 0.0, "distillation_loss": 0.24313656985759735, "epoch": 8.23, "learning_rate": 1.9663359775629198e-05, "loss": 0.3456, "step": 22760, "task_loss": 0.3750152289867401 }, { "compression_loss": 0.0, "distillation_loss": 0.2501145899295807, "epoch": 8.23, "learning_rate": 1.959195933824125e-05, "loss": 0.3191, "step": 22770, "task_loss": 0.7105112075805664 }, { "compression_loss": 0.0, "distillation_loss": 0.3618265986442566, "epoch": 8.23, "learning_rate": 1.9520638349892158e-05, "loss": 0.3374, "step": 22780, "task_loss": 0.48824140429496765 }, { "compression_loss": 0.0, "distillation_loss": 0.3943551182746887, "epoch": 8.24, "learning_rate": 1.9449397178336928e-05, "loss": 0.2946, "step": 22790, "task_loss": 0.662896990776062 }, { "compression_loss": 0.0, "distillation_loss": 0.23531006276607513, "epoch": 8.24, "learning_rate": 1.9378236190919002e-05, "loss": 0.3176, "step": 22800, "task_loss": 0.5393174886703491 }, { "compression_loss": 0.0, "distillation_loss": 0.28416210412979126, "epoch": 8.24, "learning_rate": 1.9307155754568368e-05, "loss": 0.2945, "step": 22810, "task_loss": 0.33785438537597656 }, { "compression_loss": 0.0, "distillation_loss": 0.4986143410205841, "epoch": 8.25, "learning_rate": 1.9236156235799624e-05, "loss": 0.3286, "step": 22820, "task_loss": 0.6909259557723999 }, { "compression_loss": 0.0, "distillation_loss": 0.36012768745422363, "epoch": 8.25, "learning_rate": 1.9165238000710218e-05, "loss": 0.3426, "step": 22830, "task_loss": 0.4857126474380493 }, { "compression_loss": 0.0, "distillation_loss": 0.3498384952545166, "epoch": 8.25, "learning_rate": 1.9094401414978365e-05, "loss": 0.3115, "step": 22840, "task_loss": 0.6187173128128052 }, { "compression_loss": 0.0, "distillation_loss": 0.32221004366874695, "epoch": 8.26, "learning_rate": 1.9023646843861352e-05, "loss": 0.313, "step": 22850, "task_loss": 0.7463681101799011 }, { "compression_loss": 0.0, "distillation_loss": 0.3636491298675537, "epoch": 8.26, "learning_rate": 1.8952974652193525e-05, "loss": 0.2902, "step": 22860, "task_loss": 0.7195571660995483 }, { "compression_loss": 0.0, "distillation_loss": 0.3010321855545044, "epoch": 8.27, "learning_rate": 1.888238520438449e-05, "loss": 0.3284, "step": 22870, "task_loss": 0.5021539330482483 }, { "compression_loss": 0.0, "distillation_loss": 0.47702494263648987, "epoch": 8.27, "learning_rate": 1.8811878864417145e-05, "loss": 0.3776, "step": 22880, "task_loss": 0.7389390468597412 }, { "compression_loss": 0.0, "distillation_loss": 0.3341755270957947, "epoch": 8.27, "learning_rate": 1.8741455995845906e-05, "loss": 0.2775, "step": 22890, "task_loss": 0.7437736988067627 }, { "compression_loss": 0.0, "distillation_loss": 0.2533724904060364, "epoch": 8.28, "learning_rate": 1.8671116961794767e-05, "loss": 0.2914, "step": 22900, "task_loss": 0.6004878282546997 }, { "compression_loss": 0.0, "distillation_loss": 0.3350604176521301, "epoch": 8.28, "learning_rate": 1.860086212495545e-05, "loss": 0.2999, "step": 22910, "task_loss": 0.5949532389640808 }, { "compression_loss": 0.0, "distillation_loss": 0.25673776865005493, "epoch": 8.28, "learning_rate": 1.8530691847585468e-05, "loss": 0.308, "step": 22920, "task_loss": 0.4904293119907379 }, { "compression_loss": 0.0, "distillation_loss": 0.2922332286834717, "epoch": 8.29, "learning_rate": 1.8460606491506437e-05, "loss": 0.3331, "step": 22930, "task_loss": 0.5136717557907104 }, { "compression_loss": 0.0, "distillation_loss": 0.33091992139816284, "epoch": 8.29, "learning_rate": 1.8390606418101974e-05, "loss": 0.3354, "step": 22940, "task_loss": 0.9739881753921509 }, { "compression_loss": 0.0, "distillation_loss": 0.36726266145706177, "epoch": 8.29, "learning_rate": 1.8320691988316008e-05, "loss": 0.3084, "step": 22950, "task_loss": 0.8299188017845154 }, { "compression_loss": 0.0, "distillation_loss": 0.24538114666938782, "epoch": 8.3, "learning_rate": 1.8250863562650848e-05, "loss": 0.3162, "step": 22960, "task_loss": 0.416057288646698 }, { "compression_loss": 0.0, "distillation_loss": 0.3002382516860962, "epoch": 8.3, "learning_rate": 1.8181121501165358e-05, "loss": 0.3206, "step": 22970, "task_loss": 0.4835646152496338 }, { "compression_loss": 0.0, "distillation_loss": 0.2719339430332184, "epoch": 8.31, "learning_rate": 1.811146616347302e-05, "loss": 0.3197, "step": 22980, "task_loss": 0.5774111747741699 }, { "compression_loss": 0.0, "distillation_loss": 0.22802424430847168, "epoch": 8.31, "learning_rate": 1.8041897908740205e-05, "loss": 0.2812, "step": 22990, "task_loss": 0.3885321021080017 }, { "compression_loss": 0.0, "distillation_loss": 0.30409711599349976, "epoch": 8.31, "learning_rate": 1.7972417095684214e-05, "loss": 0.3038, "step": 23000, "task_loss": 0.4505375623703003 }, { "epoch": 8.31, "eval_exact_match": 83.25449385052035, "eval_f1": 90.06136052123999, "step": 23000 }, { "compression_loss": 0.0, "distillation_loss": 0.4253186583518982, "epoch": 8.32, "learning_rate": 1.790302408257151e-05, "loss": 0.3461, "step": 23010, "task_loss": 0.9150567650794983 }, { "compression_loss": 0.0, "distillation_loss": 0.3353206515312195, "epoch": 8.32, "learning_rate": 1.7833719227215755e-05, "loss": 0.3282, "step": 23020, "task_loss": 0.7890245914459229 }, { "compression_loss": 0.0, "distillation_loss": 0.3734143376350403, "epoch": 8.32, "learning_rate": 1.7764502886976142e-05, "loss": 0.3267, "step": 23030, "task_loss": 0.5730995535850525 }, { "compression_loss": 0.0, "distillation_loss": 0.2806342542171478, "epoch": 8.33, "learning_rate": 1.769537541875536e-05, "loss": 0.3222, "step": 23040, "task_loss": 0.44223088026046753 }, { "compression_loss": 0.0, "distillation_loss": 0.30025961995124817, "epoch": 8.33, "learning_rate": 1.7626337178997885e-05, "loss": 0.3062, "step": 23050, "task_loss": 0.6115245819091797 }, { "compression_loss": 0.0, "distillation_loss": 0.33962395787239075, "epoch": 8.33, "learning_rate": 1.755738852368811e-05, "loss": 0.2956, "step": 23060, "task_loss": 0.7975297570228577 }, { "compression_loss": 0.0, "distillation_loss": 0.22749751806259155, "epoch": 8.34, "learning_rate": 1.748852980834849e-05, "loss": 0.3171, "step": 23070, "task_loss": 0.3744935393333435 }, { "compression_loss": 0.0, "distillation_loss": 0.2707931101322174, "epoch": 8.34, "learning_rate": 1.7419761388037698e-05, "loss": 0.295, "step": 23080, "task_loss": 0.7108138799667358 }, { "compression_loss": 0.0, "distillation_loss": 0.25612032413482666, "epoch": 8.34, "learning_rate": 1.7351083617348838e-05, "loss": 0.3024, "step": 23090, "task_loss": 0.3751325309276581 }, { "compression_loss": 0.0, "distillation_loss": 0.23818457126617432, "epoch": 8.35, "learning_rate": 1.7282496850407593e-05, "loss": 0.3057, "step": 23100, "task_loss": 0.8201920986175537 }, { "compression_loss": 0.0, "distillation_loss": 0.27771884202957153, "epoch": 8.35, "learning_rate": 1.721400144087041e-05, "loss": 0.2883, "step": 23110, "task_loss": 0.4348314106464386 }, { "compression_loss": 0.0, "distillation_loss": 0.3101360499858856, "epoch": 8.36, "learning_rate": 1.714559774192262e-05, "loss": 0.3363, "step": 23120, "task_loss": 0.5495152473449707 }, { "compression_loss": 0.0, "distillation_loss": 0.29964226484298706, "epoch": 8.36, "learning_rate": 1.707728610627674e-05, "loss": 0.3349, "step": 23130, "task_loss": 0.6978568434715271 }, { "compression_loss": 0.0, "distillation_loss": 0.28877317905426025, "epoch": 8.36, "learning_rate": 1.7009066886170497e-05, "loss": 0.3116, "step": 23140, "task_loss": 0.5868189334869385 }, { "compression_loss": 0.0, "distillation_loss": 0.2869138717651367, "epoch": 8.37, "learning_rate": 1.6940940433365148e-05, "loss": 0.2882, "step": 23150, "task_loss": 0.4035617709159851 }, { "compression_loss": 0.0, "distillation_loss": 0.30516546964645386, "epoch": 8.37, "learning_rate": 1.6872907099143585e-05, "loss": 0.3137, "step": 23160, "task_loss": 0.6182640194892883 }, { "compression_loss": 0.0, "distillation_loss": 0.3285907208919525, "epoch": 8.37, "learning_rate": 1.6804967234308577e-05, "loss": 0.3, "step": 23170, "task_loss": 0.7113856673240662 }, { "compression_loss": 0.0, "distillation_loss": 0.19711598753929138, "epoch": 8.38, "learning_rate": 1.673712118918088e-05, "loss": 0.3228, "step": 23180, "task_loss": 0.34729012846946716 }, { "compression_loss": 0.0, "distillation_loss": 0.2672497630119324, "epoch": 8.38, "learning_rate": 1.6669369313597535e-05, "loss": 0.2929, "step": 23190, "task_loss": 0.33527523279190063 }, { "compression_loss": 0.0, "distillation_loss": 0.30760812759399414, "epoch": 8.38, "learning_rate": 1.6601711956909998e-05, "loss": 0.3359, "step": 23200, "task_loss": 0.5162175893783569 }, { "compression_loss": 0.0, "distillation_loss": 0.20776550471782684, "epoch": 8.39, "learning_rate": 1.653414946798235e-05, "loss": 0.3143, "step": 23210, "task_loss": 0.2421736717224121 }, { "compression_loss": 0.0, "distillation_loss": 0.33642876148223877, "epoch": 8.39, "learning_rate": 1.646668219518948e-05, "loss": 0.3164, "step": 23220, "task_loss": 0.6240791082382202 }, { "compression_loss": 0.0, "distillation_loss": 0.273520827293396, "epoch": 8.4, "learning_rate": 1.639931048641538e-05, "loss": 0.2976, "step": 23230, "task_loss": 0.45852556824684143 }, { "compression_loss": 0.0, "distillation_loss": 0.2595791220664978, "epoch": 8.4, "learning_rate": 1.6332034689051194e-05, "loss": 0.3234, "step": 23240, "task_loss": 0.24186216294765472 }, { "compression_loss": 0.0, "distillation_loss": 0.32000231742858887, "epoch": 8.4, "learning_rate": 1.6264855149993574e-05, "loss": 0.3122, "step": 23250, "task_loss": 0.7240666747093201 }, { "epoch": 8.4, "eval_exact_match": 83.13150425733207, "eval_f1": 89.97983813033478, "step": 23250 }, { "compression_loss": 0.0, "distillation_loss": 0.38170820474624634, "epoch": 8.41, "learning_rate": 1.6197772215642807e-05, "loss": 0.319, "step": 23260, "task_loss": 0.6090672612190247 }, { "compression_loss": 0.0, "distillation_loss": 0.261313259601593, "epoch": 8.41, "learning_rate": 1.6130786231901073e-05, "loss": 0.3146, "step": 23270, "task_loss": 0.8908061981201172 }, { "compression_loss": 0.0, "distillation_loss": 0.3164830803871155, "epoch": 8.41, "learning_rate": 1.606389754417061e-05, "loss": 0.3362, "step": 23280, "task_loss": 0.6108741760253906 }, { "compression_loss": 0.0, "distillation_loss": 0.22788286209106445, "epoch": 8.42, "learning_rate": 1.5997106497351993e-05, "loss": 0.3181, "step": 23290, "task_loss": 0.3512367904186249 }, { "compression_loss": 0.0, "distillation_loss": 0.29590117931365967, "epoch": 8.42, "learning_rate": 1.593041343584232e-05, "loss": 0.31, "step": 23300, "task_loss": 0.6037249565124512 }, { "compression_loss": 0.0, "distillation_loss": 0.39731332659721375, "epoch": 8.42, "learning_rate": 1.5863818703533445e-05, "loss": 0.3173, "step": 23310, "task_loss": 0.5619301199913025 }, { "compression_loss": 0.0, "distillation_loss": 0.37759941816329956, "epoch": 8.43, "learning_rate": 1.5797322643810196e-05, "loss": 0.3294, "step": 23320, "task_loss": 0.7838083505630493 }, { "compression_loss": 0.0, "distillation_loss": 0.211870938539505, "epoch": 8.43, "learning_rate": 1.5730925599548637e-05, "loss": 0.3146, "step": 23330, "task_loss": 0.15924398601055145 }, { "compression_loss": 0.0, "distillation_loss": 0.2876982092857361, "epoch": 8.44, "learning_rate": 1.5664627913114222e-05, "loss": 0.3385, "step": 23340, "task_loss": 0.3038162887096405 }, { "compression_loss": 0.0, "distillation_loss": 0.31377512216567993, "epoch": 8.44, "learning_rate": 1.5598429926360136e-05, "loss": 0.321, "step": 23350, "task_loss": 0.36857476830482483 }, { "compression_loss": 0.0, "distillation_loss": 0.20846720039844513, "epoch": 8.44, "learning_rate": 1.5532331980625454e-05, "loss": 0.2739, "step": 23360, "task_loss": 0.42006412148475647 }, { "compression_loss": 0.0, "distillation_loss": 0.257539302110672, "epoch": 8.45, "learning_rate": 1.5466334416733425e-05, "loss": 0.297, "step": 23370, "task_loss": 0.5687024593353271 }, { "compression_loss": 0.0, "distillation_loss": 0.3588574528694153, "epoch": 8.45, "learning_rate": 1.5400437574989648e-05, "loss": 0.3067, "step": 23380, "task_loss": 0.44749951362609863 }, { "compression_loss": 0.0, "distillation_loss": 0.32865849137306213, "epoch": 8.45, "learning_rate": 1.5334641795180442e-05, "loss": 0.3075, "step": 23390, "task_loss": 0.6262214779853821 }, { "compression_loss": 0.0, "distillation_loss": 0.2602709233760834, "epoch": 8.46, "learning_rate": 1.5268947416570933e-05, "loss": 0.3546, "step": 23400, "task_loss": 0.5079340934753418 }, { "compression_loss": 0.0, "distillation_loss": 0.15402564406394958, "epoch": 8.46, "learning_rate": 1.5203354777903448e-05, "loss": 0.291, "step": 23410, "task_loss": 0.20646686851978302 }, { "compression_loss": 0.0, "distillation_loss": 0.2539033889770508, "epoch": 8.46, "learning_rate": 1.5137864217395681e-05, "loss": 0.3228, "step": 23420, "task_loss": 0.4042036831378937 }, { "compression_loss": 0.0, "distillation_loss": 0.27788808941841125, "epoch": 8.47, "learning_rate": 1.5072476072739005e-05, "loss": 0.2676, "step": 23430, "task_loss": 0.4695894420146942 }, { "compression_loss": 0.0, "distillation_loss": 0.3503713011741638, "epoch": 8.47, "learning_rate": 1.5007190681096652e-05, "loss": 0.3337, "step": 23440, "task_loss": 0.9511378407478333 }, { "compression_loss": 0.0, "distillation_loss": 0.3843029737472534, "epoch": 8.47, "learning_rate": 1.4942008379102068e-05, "loss": 0.3123, "step": 23450, "task_loss": 0.4682992994785309 }, { "compression_loss": 0.0, "distillation_loss": 0.34484705328941345, "epoch": 8.48, "learning_rate": 1.4876929502857115e-05, "loss": 0.3202, "step": 23460, "task_loss": 0.6297692060470581 }, { "compression_loss": 0.0, "distillation_loss": 0.3099765181541443, "epoch": 8.48, "learning_rate": 1.4811954387930384e-05, "loss": 0.3222, "step": 23470, "task_loss": 0.5310537219047546 }, { "compression_loss": 0.0, "distillation_loss": 0.30124685168266296, "epoch": 8.49, "learning_rate": 1.474708336935538e-05, "loss": 0.3038, "step": 23480, "task_loss": 0.5357911586761475 }, { "compression_loss": 0.0, "distillation_loss": 0.23196589946746826, "epoch": 8.49, "learning_rate": 1.4682316781628928e-05, "loss": 0.3089, "step": 23490, "task_loss": 0.565646231174469 }, { "compression_loss": 0.0, "distillation_loss": 0.24483175575733185, "epoch": 8.49, "learning_rate": 1.4617654958709308e-05, "loss": 0.3079, "step": 23500, "task_loss": 0.6693049669265747 }, { "epoch": 8.49, "eval_exact_match": 83.12204351939451, "eval_f1": 89.96558516117871, "step": 23500 }, { "compression_loss": 0.0, "distillation_loss": 0.24072255194187164, "epoch": 8.5, "learning_rate": 1.4553098234014623e-05, "loss": 0.3041, "step": 23510, "task_loss": 0.2527986466884613 }, { "compression_loss": 0.0, "distillation_loss": 0.24449646472930908, "epoch": 8.5, "learning_rate": 1.4488646940421064e-05, "loss": 0.3331, "step": 23520, "task_loss": 0.4520113468170166 }, { "compression_loss": 0.0, "distillation_loss": 0.3243837356567383, "epoch": 8.5, "learning_rate": 1.4424301410261182e-05, "loss": 0.306, "step": 23530, "task_loss": 0.5300151705741882 }, { "compression_loss": 0.0, "distillation_loss": 0.32603827118873596, "epoch": 8.51, "learning_rate": 1.4360061975322142e-05, "loss": 0.2902, "step": 23540, "task_loss": 0.27277934551239014 }, { "compression_loss": 0.0, "distillation_loss": 0.3202453851699829, "epoch": 8.51, "learning_rate": 1.4295928966844085e-05, "loss": 0.3045, "step": 23550, "task_loss": 0.5317361354827881 }, { "compression_loss": 0.0, "distillation_loss": 0.24685320258140564, "epoch": 8.51, "learning_rate": 1.423190271551837e-05, "loss": 0.3274, "step": 23560, "task_loss": 0.22007986903190613 }, { "compression_loss": 0.0, "distillation_loss": 0.35863107442855835, "epoch": 8.52, "learning_rate": 1.4167983551485887e-05, "loss": 0.339, "step": 23570, "task_loss": 0.37125977873802185 }, { "compression_loss": 0.0, "distillation_loss": 0.3166862428188324, "epoch": 8.52, "learning_rate": 1.4104171804335311e-05, "loss": 0.3295, "step": 23580, "task_loss": 0.4220460057258606 }, { "compression_loss": 0.0, "distillation_loss": 0.330414354801178, "epoch": 8.53, "learning_rate": 1.404046780310151e-05, "loss": 0.3021, "step": 23590, "task_loss": 0.3584241271018982 }, { "compression_loss": 0.0, "distillation_loss": 0.2974003553390503, "epoch": 8.53, "learning_rate": 1.39768718762637e-05, "loss": 0.3255, "step": 23600, "task_loss": 0.7118889093399048 }, { "compression_loss": 0.0, "distillation_loss": 0.22618696093559265, "epoch": 8.53, "learning_rate": 1.391338435174388e-05, "loss": 0.3214, "step": 23610, "task_loss": 0.2164674550294876 }, { "compression_loss": 0.0, "distillation_loss": 0.2954291105270386, "epoch": 8.54, "learning_rate": 1.3850005556905072e-05, "loss": 0.3052, "step": 23620, "task_loss": 0.5197908282279968 }, { "compression_loss": 0.0, "distillation_loss": 0.2527273893356323, "epoch": 8.54, "learning_rate": 1.3786735818549667e-05, "loss": 0.3171, "step": 23630, "task_loss": 0.652328372001648 }, { "compression_loss": 0.0, "distillation_loss": 0.39712145924568176, "epoch": 8.54, "learning_rate": 1.372357546291769e-05, "loss": 0.3434, "step": 23640, "task_loss": 1.187708854675293 }, { "compression_loss": 0.0, "distillation_loss": 0.47445833683013916, "epoch": 8.55, "learning_rate": 1.3660524815685188e-05, "loss": 0.3131, "step": 23650, "task_loss": 0.677756130695343 }, { "compression_loss": 0.0, "distillation_loss": 0.2589554190635681, "epoch": 8.55, "learning_rate": 1.359758420196249e-05, "loss": 0.3015, "step": 23660, "task_loss": 0.3724703788757324 }, { "compression_loss": 0.0, "distillation_loss": 0.30475327372550964, "epoch": 8.55, "learning_rate": 1.3534753946292586e-05, "loss": 0.3147, "step": 23670, "task_loss": 0.464124470949173 }, { "compression_loss": 0.0, "distillation_loss": 0.26423659920692444, "epoch": 8.56, "learning_rate": 1.347203437264936e-05, "loss": 0.319, "step": 23680, "task_loss": 0.6123193502426147 }, { "compression_loss": 0.0, "distillation_loss": 0.2449067085981369, "epoch": 8.56, "learning_rate": 1.3409425804436078e-05, "loss": 0.2941, "step": 23690, "task_loss": 0.26298606395721436 }, { "compression_loss": 0.0, "distillation_loss": 0.3464253842830658, "epoch": 8.57, "learning_rate": 1.3346928564483535e-05, "loss": 0.3161, "step": 23700, "task_loss": 0.5386533737182617 }, { "compression_loss": 0.0, "distillation_loss": 0.2706737816333771, "epoch": 8.57, "learning_rate": 1.3284542975048519e-05, "loss": 0.3168, "step": 23710, "task_loss": 0.3305785655975342 }, { "compression_loss": 0.0, "distillation_loss": 0.38617292046546936, "epoch": 8.57, "learning_rate": 1.3222269357812115e-05, "loss": 0.3332, "step": 23720, "task_loss": 0.7377265691757202 }, { "compression_loss": 0.0, "distillation_loss": 0.2689724564552307, "epoch": 8.58, "learning_rate": 1.3160108033878046e-05, "loss": 0.3306, "step": 23730, "task_loss": 0.4797666668891907 }, { "compression_loss": 0.0, "distillation_loss": 0.314256489276886, "epoch": 8.58, "learning_rate": 1.3098059323770975e-05, "loss": 0.3069, "step": 23740, "task_loss": 0.6989715695381165 }, { "compression_loss": 0.0, "distillation_loss": 0.28853997588157654, "epoch": 8.58, "learning_rate": 1.3036123547434927e-05, "loss": 0.3023, "step": 23750, "task_loss": 0.3831864297389984 }, { "epoch": 8.58, "eval_exact_match": 83.23557237464522, "eval_f1": 90.0526820682477, "step": 23750 }, { "compression_loss": 0.0, "distillation_loss": 0.2678840756416321, "epoch": 8.59, "learning_rate": 1.2974301024231595e-05, "loss": 0.2999, "step": 23760, "task_loss": 0.6748077869415283 }, { "compression_loss": 0.0, "distillation_loss": 0.4937059283256531, "epoch": 8.59, "learning_rate": 1.2912592072938709e-05, "loss": 0.307, "step": 23770, "task_loss": 0.5111285448074341 }, { "compression_loss": 0.0, "distillation_loss": 0.26543956995010376, "epoch": 8.59, "learning_rate": 1.2850997011748333e-05, "loss": 0.3042, "step": 23780, "task_loss": 0.5223565101623535 }, { "compression_loss": 0.0, "distillation_loss": 0.2755920886993408, "epoch": 8.6, "learning_rate": 1.2789516158265369e-05, "loss": 0.2859, "step": 23790, "task_loss": 0.7235616445541382 }, { "compression_loss": 0.0, "distillation_loss": 0.2730027437210083, "epoch": 8.6, "learning_rate": 1.272814982950573e-05, "loss": 0.2873, "step": 23800, "task_loss": 0.4539136588573456 }, { "compression_loss": 0.0, "distillation_loss": 0.3317798972129822, "epoch": 8.6, "learning_rate": 1.2666898341894853e-05, "loss": 0.3073, "step": 23810, "task_loss": 0.7397780418395996 }, { "compression_loss": 0.0, "distillation_loss": 0.3097071945667267, "epoch": 8.61, "learning_rate": 1.2605762011266012e-05, "loss": 0.3264, "step": 23820, "task_loss": 0.5371835231781006 }, { "compression_loss": 0.0, "distillation_loss": 0.35412904620170593, "epoch": 8.61, "learning_rate": 1.2544741152858692e-05, "loss": 0.33, "step": 23830, "task_loss": 0.9191930294036865 }, { "compression_loss": 0.0, "distillation_loss": 0.38428613543510437, "epoch": 8.62, "learning_rate": 1.248383608131694e-05, "loss": 0.3249, "step": 23840, "task_loss": 0.49637895822525024 }, { "compression_loss": 0.0, "distillation_loss": 0.3155321478843689, "epoch": 8.62, "learning_rate": 1.2423047110687794e-05, "loss": 0.2899, "step": 23850, "task_loss": 0.28766974806785583 }, { "compression_loss": 0.0, "distillation_loss": 0.3072446882724762, "epoch": 8.62, "learning_rate": 1.2362374554419625e-05, "loss": 0.2689, "step": 23860, "task_loss": 0.4855462908744812 }, { "compression_loss": 0.0, "distillation_loss": 0.27834153175354004, "epoch": 8.63, "learning_rate": 1.2301818725360548e-05, "loss": 0.2806, "step": 23870, "task_loss": 0.6370406746864319 }, { "compression_loss": 0.0, "distillation_loss": 0.3155345916748047, "epoch": 8.63, "learning_rate": 1.2241379935756749e-05, "loss": 0.3611, "step": 23880, "task_loss": 0.5455687046051025 }, { "compression_loss": 0.0, "distillation_loss": 0.25810787081718445, "epoch": 8.63, "learning_rate": 1.2181058497250998e-05, "loss": 0.3393, "step": 23890, "task_loss": 0.3866387605667114 }, { "compression_loss": 0.0, "distillation_loss": 0.2348720282316208, "epoch": 8.64, "learning_rate": 1.2120854720880877e-05, "loss": 0.2784, "step": 23900, "task_loss": 0.5155615210533142 }, { "compression_loss": 0.0, "distillation_loss": 0.2619689106941223, "epoch": 8.64, "learning_rate": 1.206076891707731e-05, "loss": 0.2989, "step": 23910, "task_loss": 0.4420399069786072 }, { "compression_loss": 0.0, "distillation_loss": 0.27206704020500183, "epoch": 8.64, "learning_rate": 1.2000801395662918e-05, "loss": 0.342, "step": 23920, "task_loss": 0.17631283402442932 }, { "compression_loss": 0.0, "distillation_loss": 0.2513728141784668, "epoch": 8.65, "learning_rate": 1.1940952465850405e-05, "loss": 0.291, "step": 23930, "task_loss": 0.3972375988960266 }, { "compression_loss": 0.0, "distillation_loss": 0.3371734023094177, "epoch": 8.65, "learning_rate": 1.1881222436240966e-05, "loss": 0.3306, "step": 23940, "task_loss": 0.607552170753479 }, { "compression_loss": 0.0, "distillation_loss": 0.43905913829803467, "epoch": 8.66, "learning_rate": 1.1821611614822724e-05, "loss": 0.3306, "step": 23950, "task_loss": 0.5458712577819824 }, { "compression_loss": 0.0, "distillation_loss": 0.23583155870437622, "epoch": 8.66, "learning_rate": 1.176212030896912e-05, "loss": 0.3205, "step": 23960, "task_loss": 0.6100504398345947 }, { "compression_loss": 0.0, "distillation_loss": 0.46240583062171936, "epoch": 8.66, "learning_rate": 1.1702748825437348e-05, "loss": 0.3571, "step": 23970, "task_loss": 0.3039376735687256 }, { "compression_loss": 0.0, "distillation_loss": 0.3308100998401642, "epoch": 8.67, "learning_rate": 1.16434974703667e-05, "loss": 0.3045, "step": 23980, "task_loss": 0.6687793731689453 }, { "compression_loss": 0.0, "distillation_loss": 0.20676454901695251, "epoch": 8.67, "learning_rate": 1.1584366549277138e-05, "loss": 0.3027, "step": 23990, "task_loss": 0.1708330363035202 }, { "compression_loss": 0.0, "distillation_loss": 0.3004925847053528, "epoch": 8.67, "learning_rate": 1.1525356367067541e-05, "loss": 0.3253, "step": 24000, "task_loss": 0.639447033405304 }, { "epoch": 8.67, "eval_exact_match": 83.1693472090823, "eval_f1": 90.00836238528001, "step": 24000 }, { "compression_loss": 0.0, "distillation_loss": 0.3217807412147522, "epoch": 8.68, "learning_rate": 1.1466467228014262e-05, "loss": 0.2779, "step": 24010, "task_loss": 0.3356829285621643 }, { "compression_loss": 0.0, "distillation_loss": 0.3157300055027008, "epoch": 8.68, "learning_rate": 1.14076994357695e-05, "loss": 0.2987, "step": 24020, "task_loss": 0.46271902322769165 }, { "compression_loss": 0.0, "distillation_loss": 0.33147913217544556, "epoch": 8.68, "learning_rate": 1.134905329335976e-05, "loss": 0.3338, "step": 24030, "task_loss": 0.4586483836174011 }, { "compression_loss": 0.0, "distillation_loss": 0.2759827971458435, "epoch": 8.69, "learning_rate": 1.1290529103184282e-05, "loss": 0.2986, "step": 24040, "task_loss": 0.5049731731414795 }, { "compression_loss": 0.0, "distillation_loss": 0.27000918984413147, "epoch": 8.69, "learning_rate": 1.1232127167013436e-05, "loss": 0.3024, "step": 24050, "task_loss": 0.619734525680542 }, { "compression_loss": 0.0, "distillation_loss": 0.28138256072998047, "epoch": 8.7, "learning_rate": 1.1173847785987288e-05, "loss": 0.2856, "step": 24060, "task_loss": 0.572986900806427 }, { "compression_loss": 0.0, "distillation_loss": 0.31417810916900635, "epoch": 8.7, "learning_rate": 1.1115691260613889e-05, "loss": 0.3172, "step": 24070, "task_loss": 0.5405772924423218 }, { "compression_loss": 0.0, "distillation_loss": 0.24283945560455322, "epoch": 8.7, "learning_rate": 1.1057657890767854e-05, "loss": 0.287, "step": 24080, "task_loss": 0.6355692744255066 }, { "compression_loss": 0.0, "distillation_loss": 0.2398536056280136, "epoch": 8.71, "learning_rate": 1.0999747975688754e-05, "loss": 0.2763, "step": 24090, "task_loss": 0.46709010004997253 }, { "compression_loss": 0.0, "distillation_loss": 0.30363500118255615, "epoch": 8.71, "learning_rate": 1.094196181397959e-05, "loss": 0.327, "step": 24100, "task_loss": 0.44533640146255493 }, { "compression_loss": 0.0, "distillation_loss": 0.2811916470527649, "epoch": 8.71, "learning_rate": 1.0884299703605228e-05, "loss": 0.292, "step": 24110, "task_loss": 0.5359020233154297 }, { "compression_loss": 0.0, "distillation_loss": 0.3132500648498535, "epoch": 8.72, "learning_rate": 1.0826761941890906e-05, "loss": 0.276, "step": 24120, "task_loss": 0.5441325902938843 }, { "compression_loss": 0.0, "distillation_loss": 0.3040134906768799, "epoch": 8.72, "learning_rate": 1.0769348825520669e-05, "loss": 0.329, "step": 24130, "task_loss": 0.2830539345741272 }, { "compression_loss": 0.0, "distillation_loss": 0.3059019446372986, "epoch": 8.72, "learning_rate": 1.0712060650535867e-05, "loss": 0.2936, "step": 24140, "task_loss": 0.47051605582237244 }, { "compression_loss": 0.0, "distillation_loss": 0.3694162666797638, "epoch": 8.73, "learning_rate": 1.0654897712333557e-05, "loss": 0.3272, "step": 24150, "task_loss": 0.48830804228782654 }, { "compression_loss": 0.0, "distillation_loss": 0.28213226795196533, "epoch": 8.73, "learning_rate": 1.059786030566512e-05, "loss": 0.2719, "step": 24160, "task_loss": 0.2837570905685425 }, { "compression_loss": 0.0, "distillation_loss": 0.2804946303367615, "epoch": 8.74, "learning_rate": 1.0540948724634564e-05, "loss": 0.3319, "step": 24170, "task_loss": 0.25393009185791016 }, { "compression_loss": 0.0, "distillation_loss": 0.36412930488586426, "epoch": 8.74, "learning_rate": 1.0484163262697156e-05, "loss": 0.3162, "step": 24180, "task_loss": 0.8942012190818787 }, { "compression_loss": 0.0, "distillation_loss": 0.25897452235221863, "epoch": 8.74, "learning_rate": 1.042750421265783e-05, "loss": 0.2955, "step": 24190, "task_loss": 0.5445088148117065 }, { "compression_loss": 0.0, "distillation_loss": 0.2680971324443817, "epoch": 8.75, "learning_rate": 1.0370971866669712e-05, "loss": 0.2892, "step": 24200, "task_loss": 0.4629053473472595 }, { "compression_loss": 0.0, "distillation_loss": 0.2193024605512619, "epoch": 8.75, "learning_rate": 1.0314566516232566e-05, "loss": 0.2714, "step": 24210, "task_loss": 0.27407723665237427 }, { "compression_loss": 0.0, "distillation_loss": 0.22114166617393494, "epoch": 8.75, "learning_rate": 1.025828845219136e-05, "loss": 0.3139, "step": 24220, "task_loss": 0.6401352882385254 }, { "compression_loss": 0.0, "distillation_loss": 0.30140265822410583, "epoch": 8.76, "learning_rate": 1.0202137964734709e-05, "loss": 0.296, "step": 24230, "task_loss": 0.4010269343852997 }, { "compression_loss": 0.0, "distillation_loss": 0.29186466336250305, "epoch": 8.76, "learning_rate": 1.0146115343393427e-05, "loss": 0.3189, "step": 24240, "task_loss": 0.40033257007598877 }, { "compression_loss": 0.0, "distillation_loss": 0.2807198762893677, "epoch": 8.76, "learning_rate": 1.0090220877038949e-05, "loss": 0.3235, "step": 24250, "task_loss": 0.6275461316108704 }, { "epoch": 8.76, "eval_exact_match": 83.37748344370861, "eval_f1": 90.13053426752283, "step": 24250 }, { "compression_loss": 0.0, "distillation_loss": 0.24098724126815796, "epoch": 8.77, "learning_rate": 1.0034454853881984e-05, "loss": 0.2883, "step": 24260, "task_loss": 0.41819271445274353 }, { "compression_loss": 0.0, "distillation_loss": 0.2524189352989197, "epoch": 8.77, "learning_rate": 9.978817561470873e-06, "loss": 0.2971, "step": 24270, "task_loss": 0.790593147277832 }, { "compression_loss": 0.0, "distillation_loss": 0.2279432862997055, "epoch": 8.77, "learning_rate": 9.923309286690207e-06, "loss": 0.2953, "step": 24280, "task_loss": 0.2802739441394806 }, { "compression_loss": 0.0, "distillation_loss": 0.32530757784843445, "epoch": 8.78, "learning_rate": 9.86793031575932e-06, "loss": 0.3165, "step": 24290, "task_loss": 0.5283281207084656 }, { "compression_loss": 0.0, "distillation_loss": 0.30216020345687866, "epoch": 8.78, "learning_rate": 9.812680934230822e-06, "loss": 0.3341, "step": 24300, "task_loss": 0.9180983304977417 }, { "compression_loss": 0.0, "distillation_loss": 0.24517136812210083, "epoch": 8.79, "learning_rate": 9.757561426989076e-06, "loss": 0.3064, "step": 24310, "task_loss": 0.44735443592071533 }, { "compression_loss": 0.0, "distillation_loss": 0.27164602279663086, "epoch": 8.79, "learning_rate": 9.702572078248811e-06, "loss": 0.2865, "step": 24320, "task_loss": 0.6187409162521362 }, { "compression_loss": 0.0, "distillation_loss": 0.30350732803344727, "epoch": 8.79, "learning_rate": 9.647713171553596e-06, "loss": 0.3185, "step": 24330, "task_loss": 0.5968219637870789 }, { "compression_loss": 0.0, "distillation_loss": 0.25562575459480286, "epoch": 8.8, "learning_rate": 9.59298498977442e-06, "loss": 0.284, "step": 24340, "task_loss": 0.49374791979789734 }, { "compression_loss": 0.0, "distillation_loss": 0.25753918290138245, "epoch": 8.8, "learning_rate": 9.53838781510815e-06, "loss": 0.3393, "step": 24350, "task_loss": 0.4400380253791809 }, { "compression_loss": 0.0, "distillation_loss": 0.3407808542251587, "epoch": 8.8, "learning_rate": 9.483921929076223e-06, "loss": 0.2922, "step": 24360, "task_loss": 0.7328530550003052 }, { "compression_loss": 0.0, "distillation_loss": 0.32406896352767944, "epoch": 8.81, "learning_rate": 9.429587612523036e-06, "loss": 0.3012, "step": 24370, "task_loss": 0.496076762676239 }, { "compression_loss": 0.0, "distillation_loss": 0.3123983144760132, "epoch": 8.81, "learning_rate": 9.3753851456146e-06, "loss": 0.3108, "step": 24380, "task_loss": 0.6676946878433228 }, { "compression_loss": 0.0, "distillation_loss": 0.31041258573532104, "epoch": 8.81, "learning_rate": 9.321314807837065e-06, "loss": 0.2882, "step": 24390, "task_loss": 0.697533369064331 }, { "compression_loss": 0.0, "distillation_loss": 0.3565478026866913, "epoch": 8.82, "learning_rate": 9.267376877995281e-06, "loss": 0.3524, "step": 24400, "task_loss": 0.5232845544815063 }, { "compression_loss": 0.0, "distillation_loss": 0.29742079973220825, "epoch": 8.82, "learning_rate": 9.213571634211337e-06, "loss": 0.3312, "step": 24410, "task_loss": 0.7807422876358032 }, { "compression_loss": 0.0, "distillation_loss": 0.33375900983810425, "epoch": 8.83, "learning_rate": 9.159899353923163e-06, "loss": 0.3086, "step": 24420, "task_loss": 0.9461276531219482 }, { "compression_loss": 0.0, "distillation_loss": 0.300223708152771, "epoch": 8.83, "learning_rate": 9.106360313883099e-06, "loss": 0.3211, "step": 24430, "task_loss": 0.4010380506515503 }, { "compression_loss": 0.0, "distillation_loss": 0.27285024523735046, "epoch": 8.83, "learning_rate": 9.052954790156437e-06, "loss": 0.3262, "step": 24440, "task_loss": 0.3871135711669922 }, { "compression_loss": 0.0, "distillation_loss": 0.2731539011001587, "epoch": 8.84, "learning_rate": 8.999683058119992e-06, "loss": 0.3058, "step": 24450, "task_loss": 0.4444922208786011 }, { "compression_loss": 0.0, "distillation_loss": 0.3398137390613556, "epoch": 8.84, "learning_rate": 8.946545392460764e-06, "loss": 0.3225, "step": 24460, "task_loss": 0.4049307703971863 }, { "compression_loss": 0.0, "distillation_loss": 0.33151674270629883, "epoch": 8.84, "learning_rate": 8.893542067174394e-06, "loss": 0.2843, "step": 24470, "task_loss": 0.3135819733142853 }, { "compression_loss": 0.0, "distillation_loss": 0.26082944869995117, "epoch": 8.85, "learning_rate": 8.84067335556387e-06, "loss": 0.3005, "step": 24480, "task_loss": 0.6256157755851746 }, { "compression_loss": 0.0, "distillation_loss": 0.3421831429004669, "epoch": 8.85, "learning_rate": 8.787939530238035e-06, "loss": 0.319, "step": 24490, "task_loss": 0.47051504254341125 }, { "compression_loss": 0.0, "distillation_loss": 0.44341349601745605, "epoch": 8.85, "learning_rate": 8.735340863110246e-06, "loss": 0.3367, "step": 24500, "task_loss": 0.28469258546829224 }, { "epoch": 8.85, "eval_exact_match": 83.4247871333964, "eval_f1": 90.13270154934425, "step": 24500 }, { "compression_loss": 0.0, "distillation_loss": 0.2990521788597107, "epoch": 8.86, "learning_rate": 8.68287762539689e-06, "loss": 0.3001, "step": 24510, "task_loss": 0.48367151618003845 }, { "compression_loss": 0.0, "distillation_loss": 0.3240818977355957, "epoch": 8.86, "learning_rate": 8.630550087616077e-06, "loss": 0.2951, "step": 24520, "task_loss": 0.6060233116149902 }, { "compression_loss": 0.0, "distillation_loss": 0.346133828163147, "epoch": 8.87, "learning_rate": 8.578358519586184e-06, "loss": 0.306, "step": 24530, "task_loss": 0.7388773560523987 }, { "compression_loss": 0.0, "distillation_loss": 0.198516845703125, "epoch": 8.87, "learning_rate": 8.526303190424489e-06, "loss": 0.3196, "step": 24540, "task_loss": 0.34029442071914673 }, { "compression_loss": 0.0, "distillation_loss": 0.21414974331855774, "epoch": 8.87, "learning_rate": 8.474384368545771e-06, "loss": 0.3066, "step": 24550, "task_loss": 0.6532096862792969 }, { "compression_loss": 0.0, "distillation_loss": 0.3364071547985077, "epoch": 8.88, "learning_rate": 8.422602321660946e-06, "loss": 0.2868, "step": 24560, "task_loss": 0.7407635450363159 }, { "compression_loss": 0.0, "distillation_loss": 0.24205178022384644, "epoch": 8.88, "learning_rate": 8.37095731677564e-06, "loss": 0.2937, "step": 24570, "task_loss": 0.36204320192337036 }, { "compression_loss": 0.0, "distillation_loss": 0.32036536931991577, "epoch": 8.88, "learning_rate": 8.319449620188875e-06, "loss": 0.3009, "step": 24580, "task_loss": 0.5532072186470032 }, { "compression_loss": 0.0, "distillation_loss": 0.3431103229522705, "epoch": 8.89, "learning_rate": 8.268079497491656e-06, "loss": 0.2938, "step": 24590, "task_loss": 0.47085040807724 }, { "compression_loss": 0.0, "distillation_loss": 0.26091110706329346, "epoch": 8.89, "learning_rate": 8.216847213565618e-06, "loss": 0.2954, "step": 24600, "task_loss": 0.6554282903671265 }, { "compression_loss": 0.0, "distillation_loss": 0.284633994102478, "epoch": 8.89, "learning_rate": 8.165753032581614e-06, "loss": 0.3042, "step": 24610, "task_loss": 0.5411564111709595 }, { "compression_loss": 0.0, "distillation_loss": 0.23999324440956116, "epoch": 8.9, "learning_rate": 8.114797217998467e-06, "loss": 0.3104, "step": 24620, "task_loss": 0.3989071846008301 }, { "compression_loss": 0.0, "distillation_loss": 0.38341277837753296, "epoch": 8.9, "learning_rate": 8.063980032561454e-06, "loss": 0.3384, "step": 24630, "task_loss": 0.959318995475769 }, { "compression_loss": 0.0, "distillation_loss": 0.21845731139183044, "epoch": 8.9, "learning_rate": 8.013301738301086e-06, "loss": 0.2954, "step": 24640, "task_loss": 0.33747684955596924 }, { "compression_loss": 0.0, "distillation_loss": 0.3176514506340027, "epoch": 8.91, "learning_rate": 7.96276259653169e-06, "loss": 0.3091, "step": 24650, "task_loss": 0.33057931065559387 }, { "compression_loss": 0.0, "distillation_loss": 0.2908194065093994, "epoch": 8.91, "learning_rate": 7.912362867850083e-06, "loss": 0.2927, "step": 24660, "task_loss": 0.41401976346969604 }, { "compression_loss": 0.0, "distillation_loss": 0.3124035596847534, "epoch": 8.92, "learning_rate": 7.8621028121342e-06, "loss": 0.3021, "step": 24670, "task_loss": 0.8065996170043945 }, { "compression_loss": 0.0, "distillation_loss": 0.21487663686275482, "epoch": 8.92, "learning_rate": 7.811982688541792e-06, "loss": 0.3237, "step": 24680, "task_loss": 0.5148287415504456 }, { "compression_loss": 0.0, "distillation_loss": 0.3609391748905182, "epoch": 8.92, "learning_rate": 7.76200275550907e-06, "loss": 0.3003, "step": 24690, "task_loss": 0.5247024297714233 }, { "compression_loss": 0.0, "distillation_loss": 0.26552635431289673, "epoch": 8.93, "learning_rate": 7.71216327074939e-06, "loss": 0.301, "step": 24700, "task_loss": 0.49575549364089966 }, { "compression_loss": 0.0, "distillation_loss": 0.2935408055782318, "epoch": 8.93, "learning_rate": 7.662464491251865e-06, "loss": 0.2868, "step": 24710, "task_loss": 0.6105607748031616 }, { "compression_loss": 0.0, "distillation_loss": 0.25080007314682007, "epoch": 8.93, "learning_rate": 7.612906673280149e-06, "loss": 0.3025, "step": 24720, "task_loss": 0.2942613363265991 }, { "compression_loss": 0.0, "distillation_loss": 0.2175687700510025, "epoch": 8.94, "learning_rate": 7.563490072370988e-06, "loss": 0.2828, "step": 24730, "task_loss": 0.4805278778076172 }, { "compression_loss": 0.0, "distillation_loss": 0.3156837224960327, "epoch": 8.94, "learning_rate": 7.5142149433330064e-06, "loss": 0.275, "step": 24740, "task_loss": 0.5296868085861206 }, { "compression_loss": 0.0, "distillation_loss": 0.24178892374038696, "epoch": 8.94, "learning_rate": 7.46508154024534e-06, "loss": 0.2847, "step": 24750, "task_loss": 0.38275861740112305 }, { "epoch": 8.94, "eval_exact_match": 83.40586565752129, "eval_f1": 90.19943208724408, "step": 24750 }, { "compression_loss": 0.0, "distillation_loss": 0.23588743805885315, "epoch": 8.95, "learning_rate": 7.41609011645634e-06, "loss": 0.3342, "step": 24760, "task_loss": 0.388775110244751 }, { "compression_loss": 0.0, "distillation_loss": 0.22134511172771454, "epoch": 8.95, "learning_rate": 7.367240924582245e-06, "loss": 0.2849, "step": 24770, "task_loss": 0.3853006362915039 }, { "compression_loss": 0.0, "distillation_loss": 0.2299889624118805, "epoch": 8.96, "learning_rate": 7.3185342165059285e-06, "loss": 0.2984, "step": 24780, "task_loss": 0.22366122901439667 }, { "compression_loss": 0.0, "distillation_loss": 0.5092062950134277, "epoch": 8.96, "learning_rate": 7.269970243375551e-06, "loss": 0.2925, "step": 24790, "task_loss": 0.64637291431427 }, { "compression_loss": 0.0, "distillation_loss": 0.3082968294620514, "epoch": 8.96, "learning_rate": 7.221549255603302e-06, "loss": 0.3162, "step": 24800, "task_loss": 0.43850719928741455 }, { "compression_loss": 0.0, "distillation_loss": 0.30956393480300903, "epoch": 8.97, "learning_rate": 7.1732715028640445e-06, "loss": 0.3237, "step": 24810, "task_loss": 0.5990317463874817 }, { "compression_loss": 0.0, "distillation_loss": 0.24433210492134094, "epoch": 8.97, "learning_rate": 7.125137234094145e-06, "loss": 0.3109, "step": 24820, "task_loss": 0.46226024627685547 }, { "compression_loss": 0.0, "distillation_loss": 0.3623145818710327, "epoch": 8.97, "learning_rate": 7.077146697490052e-06, "loss": 0.3295, "step": 24830, "task_loss": 0.4708067774772644 }, { "compression_loss": 0.0, "distillation_loss": 0.2837136387825012, "epoch": 8.98, "learning_rate": 7.02930014050712e-06, "loss": 0.3078, "step": 24840, "task_loss": 0.6401948928833008 }, { "compression_loss": 0.0, "distillation_loss": 0.2711445093154907, "epoch": 8.98, "learning_rate": 6.98159780985828e-06, "loss": 0.3127, "step": 24850, "task_loss": 0.38128766417503357 }, { "compression_loss": 0.0, "distillation_loss": 0.32651424407958984, "epoch": 8.98, "learning_rate": 6.934039951512807e-06, "loss": 0.3118, "step": 24860, "task_loss": 0.5000793933868408 }, { "compression_loss": 0.0, "distillation_loss": 0.3723224401473999, "epoch": 8.99, "learning_rate": 6.886626810694988e-06, "loss": 0.3275, "step": 24870, "task_loss": 0.5765509009361267 }, { "compression_loss": 0.0, "distillation_loss": 0.570521891117096, "epoch": 8.99, "learning_rate": 6.839358631882928e-06, "loss": 0.3048, "step": 24880, "task_loss": 0.7745019197463989 }, { "compression_loss": 0.0, "distillation_loss": 0.35303252935409546, "epoch": 9.0, "learning_rate": 6.792235658807258e-06, "loss": 0.2959, "step": 24890, "task_loss": 0.6391245126724243 }, { "compression_loss": 0.0, "distillation_loss": 0.3144342005252838, "epoch": 9.0, "learning_rate": 6.745258134449877e-06, "loss": 0.3054, "step": 24900, "task_loss": 0.3757985830307007 }, { "compression_loss": 0.0, "distillation_loss": 0.33729472756385803, "epoch": 9.0, "learning_rate": 6.698426301042671e-06, "loss": 0.3118, "step": 24910, "task_loss": 0.5654165744781494 }, { "compression_loss": 0.0, "distillation_loss": 0.2673114538192749, "epoch": 9.01, "learning_rate": 6.651740400066356e-06, "loss": 0.2841, "step": 24920, "task_loss": 0.7930485606193542 }, { "compression_loss": 0.0, "distillation_loss": 0.3094874918460846, "epoch": 9.01, "learning_rate": 6.605200672249114e-06, "loss": 0.3033, "step": 24930, "task_loss": 0.5905535817146301 }, { "compression_loss": 0.0, "distillation_loss": 0.2626067101955414, "epoch": 9.01, "learning_rate": 6.558807357565434e-06, "loss": 0.2958, "step": 24940, "task_loss": 0.5565435886383057 }, { "compression_loss": 0.0, "distillation_loss": 0.20787197351455688, "epoch": 9.02, "learning_rate": 6.512560695234854e-06, "loss": 0.2749, "step": 24950, "task_loss": 0.6173804402351379 }, { "compression_loss": 0.0, "distillation_loss": 0.4326663315296173, "epoch": 9.02, "learning_rate": 6.4664609237207205e-06, "loss": 0.2847, "step": 24960, "task_loss": 0.4918290674686432 }, { "compression_loss": 0.0, "distillation_loss": 0.3277257978916168, "epoch": 9.02, "learning_rate": 6.42050828072894e-06, "loss": 0.2905, "step": 24970, "task_loss": 0.5493934154510498 }, { "compression_loss": 0.0, "distillation_loss": 0.26245617866516113, "epoch": 9.03, "learning_rate": 6.3747030032068e-06, "loss": 0.2992, "step": 24980, "task_loss": 0.6874579787254333 }, { "compression_loss": 0.0, "distillation_loss": 0.30544644594192505, "epoch": 9.03, "learning_rate": 6.32904532734172e-06, "loss": 0.2894, "step": 24990, "task_loss": 0.7642111778259277 }, { "compression_loss": 0.0, "distillation_loss": 0.38944071531295776, "epoch": 9.04, "learning_rate": 6.283535488560031e-06, "loss": 0.2805, "step": 25000, "task_loss": 0.9234870672225952 }, { "epoch": 9.04, "eval_exact_match": 83.20719016083254, "eval_f1": 90.12546160298142, "step": 25000 }, { "compression_loss": 0.0, "distillation_loss": 0.29679083824157715, "epoch": 9.04, "learning_rate": 6.2381737215257446e-06, "loss": 0.2977, "step": 25010, "task_loss": 0.505827784538269 }, { "compression_loss": 0.0, "distillation_loss": 0.34737223386764526, "epoch": 9.04, "learning_rate": 6.192960260139417e-06, "loss": 0.2884, "step": 25020, "task_loss": 0.4295722246170044 }, { "compression_loss": 0.0, "distillation_loss": 0.3087272644042969, "epoch": 9.05, "learning_rate": 6.147895337536835e-06, "loss": 0.3117, "step": 25030, "task_loss": 0.35688644647598267 }, { "compression_loss": 0.0, "distillation_loss": 0.25100329518318176, "epoch": 9.05, "learning_rate": 6.102979186087896e-06, "loss": 0.2811, "step": 25040, "task_loss": 0.38825923204421997 }, { "compression_loss": 0.0, "distillation_loss": 0.26149922609329224, "epoch": 9.05, "learning_rate": 6.058212037395389e-06, "loss": 0.3192, "step": 25050, "task_loss": 0.3461078107357025 }, { "compression_loss": 0.0, "distillation_loss": 0.25997209548950195, "epoch": 9.06, "learning_rate": 6.013594122293796e-06, "loss": 0.2923, "step": 25060, "task_loss": 0.4676539897918701 }, { "compression_loss": 0.0, "distillation_loss": 0.3166268765926361, "epoch": 9.06, "learning_rate": 5.969125670848064e-06, "loss": 0.2811, "step": 25070, "task_loss": 0.2946656048297882 }, { "compression_loss": 0.0, "distillation_loss": 0.22155921161174774, "epoch": 9.06, "learning_rate": 5.924806912352495e-06, "loss": 0.2831, "step": 25080, "task_loss": 0.49417394399642944 }, { "compression_loss": 0.0, "distillation_loss": 0.2537713348865509, "epoch": 9.07, "learning_rate": 5.880638075329512e-06, "loss": 0.2797, "step": 25090, "task_loss": 0.48220908641815186 }, { "compression_loss": 0.0, "distillation_loss": 0.20896217226982117, "epoch": 9.07, "learning_rate": 5.836619387528503e-06, "loss": 0.2584, "step": 25100, "task_loss": 0.2613734304904938 }, { "compression_loss": 0.0, "distillation_loss": 0.22877725958824158, "epoch": 9.07, "learning_rate": 5.792751075924592e-06, "loss": 0.2966, "step": 25110, "task_loss": 0.4536246359348297 }, { "compression_loss": 0.0, "distillation_loss": 0.35032469034194946, "epoch": 9.08, "learning_rate": 5.7490333667175805e-06, "loss": 0.2964, "step": 25120, "task_loss": 0.9190288782119751 }, { "compression_loss": 0.0, "distillation_loss": 0.2878870964050293, "epoch": 9.08, "learning_rate": 5.705466485330657e-06, "loss": 0.2988, "step": 25130, "task_loss": 0.7237658500671387 }, { "compression_loss": 0.0, "distillation_loss": 0.28640902042388916, "epoch": 9.09, "learning_rate": 5.662050656409317e-06, "loss": 0.282, "step": 25140, "task_loss": 0.24912115931510925 }, { "compression_loss": 0.0, "distillation_loss": 0.24825415015220642, "epoch": 9.09, "learning_rate": 5.618786103820175e-06, "loss": 0.2743, "step": 25150, "task_loss": 0.7251200079917908 }, { "compression_loss": 0.0, "distillation_loss": 0.3168322443962097, "epoch": 9.09, "learning_rate": 5.575673050649818e-06, "loss": 0.283, "step": 25160, "task_loss": 0.9995952844619751 }, { "compression_loss": 0.0, "distillation_loss": 0.24792785942554474, "epoch": 9.1, "learning_rate": 5.532711719203632e-06, "loss": 0.3235, "step": 25170, "task_loss": 0.4345579147338867 }, { "compression_loss": 0.0, "distillation_loss": 0.269896924495697, "epoch": 9.1, "learning_rate": 5.494176426081241e-06, "loss": 0.3099, "step": 25180, "task_loss": 0.5822728872299194 }, { "compression_loss": 0.0, "distillation_loss": 0.3608018457889557, "epoch": 9.1, "learning_rate": 5.45150397555954e-06, "loss": 0.2885, "step": 25190, "task_loss": 0.6332502365112305 }, { "compression_loss": 0.0, "distillation_loss": 0.27320295572280884, "epoch": 9.11, "learning_rate": 5.4089838870195325e-06, "loss": 0.2992, "step": 25200, "task_loss": 0.3529553711414337 }, { "compression_loss": 0.0, "distillation_loss": 0.22571894526481628, "epoch": 9.11, "learning_rate": 5.366616379709066e-06, "loss": 0.2927, "step": 25210, "task_loss": 0.5368704795837402 }, { "compression_loss": 0.0, "distillation_loss": 0.27815741300582886, "epoch": 9.11, "learning_rate": 5.324401672089289e-06, "loss": 0.3008, "step": 25220, "task_loss": 0.5218074321746826 }, { "compression_loss": 0.0, "distillation_loss": 0.2736639380455017, "epoch": 9.12, "learning_rate": 5.282339981833406e-06, "loss": 0.2941, "step": 25230, "task_loss": 0.5212745070457458 }, { "compression_loss": 0.0, "distillation_loss": 0.30182701349258423, "epoch": 9.12, "learning_rate": 5.240431525825641e-06, "loss": 0.303, "step": 25240, "task_loss": 0.42352715134620667 }, { "compression_loss": 0.0, "distillation_loss": 0.28600355982780457, "epoch": 9.13, "learning_rate": 5.198676520160087e-06, "loss": 0.2664, "step": 25250, "task_loss": 0.4605422914028168 }, { "epoch": 9.13, "eval_exact_match": 83.32071901608326, "eval_f1": 90.1759308609037, "step": 25250 }, { "compression_loss": 0.0, "distillation_loss": 0.22900843620300293, "epoch": 9.13, "learning_rate": 5.157075180139602e-06, "loss": 0.286, "step": 25260, "task_loss": 0.6918904781341553 }, { "compression_loss": 0.0, "distillation_loss": 0.23345553874969482, "epoch": 9.13, "learning_rate": 5.1156277202746694e-06, "loss": 0.2836, "step": 25270, "task_loss": 0.3756367862224579 }, { "compression_loss": 0.0, "distillation_loss": 0.29328298568725586, "epoch": 9.14, "learning_rate": 5.074334354282335e-06, "loss": 0.2912, "step": 25280, "task_loss": 0.5100090503692627 }, { "compression_loss": 0.0, "distillation_loss": 0.26017504930496216, "epoch": 9.14, "learning_rate": 5.033195295085081e-06, "loss": 0.2888, "step": 25290, "task_loss": 0.4601510465145111 }, { "compression_loss": 0.0, "distillation_loss": 0.3635603189468384, "epoch": 9.14, "learning_rate": 4.992210754809733e-06, "loss": 0.2877, "step": 25300, "task_loss": 0.943723738193512 }, { "compression_loss": 0.0, "distillation_loss": 0.2453012764453888, "epoch": 9.15, "learning_rate": 4.951380944786361e-06, "loss": 0.2797, "step": 25310, "task_loss": 0.5946983695030212 }, { "compression_loss": 0.0, "distillation_loss": 0.31875067949295044, "epoch": 9.15, "learning_rate": 4.910706075547198e-06, "loss": 0.2872, "step": 25320, "task_loss": 0.6306192874908447 }, { "compression_loss": 0.0, "distillation_loss": 0.24196487665176392, "epoch": 9.15, "learning_rate": 4.8701863568255366e-06, "loss": 0.2564, "step": 25330, "task_loss": 0.8180104494094849 }, { "compression_loss": 0.0, "distillation_loss": 0.2831239700317383, "epoch": 9.16, "learning_rate": 4.829821997554683e-06, "loss": 0.2927, "step": 25340, "task_loss": 0.5410017967224121 }, { "compression_loss": 0.0, "distillation_loss": 0.3321933448314667, "epoch": 9.16, "learning_rate": 4.789613205866839e-06, "loss": 0.2908, "step": 25350, "task_loss": 0.8938983678817749 }, { "compression_loss": 0.0, "distillation_loss": 0.2835544943809509, "epoch": 9.17, "learning_rate": 4.749560189092066e-06, "loss": 0.2755, "step": 25360, "task_loss": 0.45705628395080566 }, { "compression_loss": 0.0, "distillation_loss": 0.2626034617424011, "epoch": 9.17, "learning_rate": 4.709663153757165e-06, "loss": 0.2861, "step": 25370, "task_loss": 0.3554117977619171 }, { "compression_loss": 0.0, "distillation_loss": 0.24724841117858887, "epoch": 9.17, "learning_rate": 4.669922305584701e-06, "loss": 0.2859, "step": 25380, "task_loss": 0.2859405279159546 }, { "compression_loss": 0.0, "distillation_loss": 0.2531455457210541, "epoch": 9.18, "learning_rate": 4.630337849491818e-06, "loss": 0.2832, "step": 25390, "task_loss": 0.3663128614425659 }, { "compression_loss": 0.0, "distillation_loss": 0.34095948934555054, "epoch": 9.18, "learning_rate": 4.5909099895892995e-06, "loss": 0.2919, "step": 25400, "task_loss": 0.6796036958694458 }, { "compression_loss": 0.0, "distillation_loss": 0.2302282750606537, "epoch": 9.18, "learning_rate": 4.551638929180444e-06, "loss": 0.2915, "step": 25410, "task_loss": 0.39458438754081726 }, { "compression_loss": 0.0, "distillation_loss": 0.220722034573555, "epoch": 9.19, "learning_rate": 4.512524870760054e-06, "loss": 0.2622, "step": 25420, "task_loss": 0.15429294109344482 }, { "compression_loss": 0.0, "distillation_loss": 0.3225393295288086, "epoch": 9.19, "learning_rate": 4.473568016013349e-06, "loss": 0.2863, "step": 25430, "task_loss": 0.43470489978790283 }, { "compression_loss": 0.0, "distillation_loss": 0.18860642611980438, "epoch": 9.19, "learning_rate": 4.4347685658149885e-06, "loss": 0.2572, "step": 25440, "task_loss": 0.4267352819442749 }, { "compression_loss": 0.0, "distillation_loss": 0.34222182631492615, "epoch": 9.2, "learning_rate": 4.396126720227975e-06, "loss": 0.3, "step": 25450, "task_loss": 0.569180428981781 }, { "compression_loss": 0.0, "distillation_loss": 0.28997889161109924, "epoch": 9.2, "learning_rate": 4.357642678502669e-06, "loss": 0.2775, "step": 25460, "task_loss": 0.6120558977127075 }, { "compression_loss": 0.0, "distillation_loss": 0.2590046525001526, "epoch": 9.2, "learning_rate": 4.319316639075711e-06, "loss": 0.2804, "step": 25470, "task_loss": 0.45842182636260986 }, { "compression_loss": 0.0, "distillation_loss": 0.282612144947052, "epoch": 9.21, "learning_rate": 4.281148799569073e-06, "loss": 0.2986, "step": 25480, "task_loss": 0.5861254930496216 }, { "compression_loss": 0.0, "distillation_loss": 0.19793371856212616, "epoch": 9.21, "learning_rate": 4.243139356788951e-06, "loss": 0.2715, "step": 25490, "task_loss": 0.5529229640960693 }, { "compression_loss": 0.0, "distillation_loss": 0.25628167390823364, "epoch": 9.22, "learning_rate": 4.205288506724823e-06, "loss": 0.2758, "step": 25500, "task_loss": 0.30487939715385437 }, { "epoch": 9.22, "eval_exact_match": 83.37748344370861, "eval_f1": 90.1936862438257, "step": 25500 }, { "compression_loss": 0.0, "distillation_loss": 0.22346541285514832, "epoch": 9.22, "learning_rate": 4.167596444548396e-06, "loss": 0.2788, "step": 25510, "task_loss": 0.3134952187538147 }, { "compression_loss": 0.0, "distillation_loss": 0.3261803686618805, "epoch": 9.22, "learning_rate": 4.130063364612621e-06, "loss": 0.3254, "step": 25520, "task_loss": 0.47811222076416016 }, { "compression_loss": 0.0, "distillation_loss": 0.22242692112922668, "epoch": 9.23, "learning_rate": 4.0926894604506725e-06, "loss": 0.3134, "step": 25530, "task_loss": 0.42973819375038147 }, { "compression_loss": 0.0, "distillation_loss": 0.2093992978334427, "epoch": 9.23, "learning_rate": 4.055474924774975e-06, "loss": 0.2835, "step": 25540, "task_loss": 0.3554569482803345 }, { "compression_loss": 0.0, "distillation_loss": 0.223178893327713, "epoch": 9.23, "learning_rate": 4.01841994947618e-06, "loss": 0.2766, "step": 25550, "task_loss": 0.5469294786453247 }, { "compression_loss": 0.0, "distillation_loss": 0.26629751920700073, "epoch": 9.24, "learning_rate": 3.981524725622215e-06, "loss": 0.2989, "step": 25560, "task_loss": 0.5456836223602295 }, { "compression_loss": 0.0, "distillation_loss": 0.4269055426120758, "epoch": 9.24, "learning_rate": 3.944789443457237e-06, "loss": 0.3092, "step": 25570, "task_loss": 0.5398896336555481 }, { "compression_loss": 0.0, "distillation_loss": 0.2995172142982483, "epoch": 9.24, "learning_rate": 3.908214292400743e-06, "loss": 0.291, "step": 25580, "task_loss": 0.6619876623153687 }, { "compression_loss": 0.0, "distillation_loss": 0.23310492932796478, "epoch": 9.25, "learning_rate": 3.871799461046491e-06, "loss": 0.2687, "step": 25590, "task_loss": 0.3361056447029114 }, { "compression_loss": 0.0, "distillation_loss": 0.2728113532066345, "epoch": 9.25, "learning_rate": 3.8355451371616e-06, "loss": 0.2857, "step": 25600, "task_loss": 0.4070819616317749 }, { "compression_loss": 0.0, "distillation_loss": 0.21583805978298187, "epoch": 9.26, "learning_rate": 3.799451507685557e-06, "loss": 0.2933, "step": 25610, "task_loss": 0.3874829411506653 }, { "compression_loss": 0.0, "distillation_loss": 0.3354247808456421, "epoch": 9.26, "learning_rate": 3.7635187587292616e-06, "loss": 0.2835, "step": 25620, "task_loss": 0.5514976978302002 }, { "compression_loss": 0.0, "distillation_loss": 0.3161737322807312, "epoch": 9.26, "learning_rate": 3.727747075574033e-06, "loss": 0.2812, "step": 25630, "task_loss": 0.7555432319641113 }, { "compression_loss": 0.0, "distillation_loss": 0.23402747511863708, "epoch": 9.27, "learning_rate": 3.6921366426707064e-06, "loss": 0.2882, "step": 25640, "task_loss": 0.5421460270881653 }, { "compression_loss": 0.0, "distillation_loss": 0.28667300939559937, "epoch": 9.27, "learning_rate": 3.656687643638646e-06, "loss": 0.2625, "step": 25650, "task_loss": 0.6737666130065918 }, { "compression_loss": 0.0, "distillation_loss": 0.3086755871772766, "epoch": 9.27, "learning_rate": 3.6214002612648187e-06, "loss": 0.2956, "step": 25660, "task_loss": 0.5526766777038574 }, { "compression_loss": 0.0, "distillation_loss": 0.24900871515274048, "epoch": 9.28, "learning_rate": 3.5862746775028274e-06, "loss": 0.269, "step": 25670, "task_loss": 0.7501120567321777 }, { "compression_loss": 0.0, "distillation_loss": 0.25524961948394775, "epoch": 9.28, "learning_rate": 3.5513110734719947e-06, "loss": 0.2781, "step": 25680, "task_loss": 0.4351167678833008 }, { "compression_loss": 0.0, "distillation_loss": 0.24399134516716003, "epoch": 9.28, "learning_rate": 3.5165096294564294e-06, "loss": 0.2996, "step": 25690, "task_loss": 0.3778623342514038 }, { "compression_loss": 0.0, "distillation_loss": 0.26403456926345825, "epoch": 9.29, "learning_rate": 3.481870524904063e-06, "loss": 0.2898, "step": 25700, "task_loss": 0.4827141761779785 }, { "compression_loss": 0.0, "distillation_loss": 0.31666100025177, "epoch": 9.29, "learning_rate": 3.4473939384257713e-06, "loss": 0.2671, "step": 25710, "task_loss": 0.6182833909988403 }, { "compression_loss": 0.0, "distillation_loss": 0.2807222604751587, "epoch": 9.3, "learning_rate": 3.4130800477944353e-06, "loss": 0.2924, "step": 25720, "task_loss": 0.3987352252006531 }, { "compression_loss": 0.0, "distillation_loss": 0.3156668543815613, "epoch": 9.3, "learning_rate": 3.378929029944014e-06, "loss": 0.2552, "step": 25730, "task_loss": 0.339354932308197 }, { "compression_loss": 0.0, "distillation_loss": 0.2318783700466156, "epoch": 9.3, "learning_rate": 3.3449410609686222e-06, "loss": 0.2721, "step": 25740, "task_loss": 0.5828352570533752 }, { "compression_loss": 0.0, "distillation_loss": 0.33963948488235474, "epoch": 9.31, "learning_rate": 3.311116316121681e-06, "loss": 0.2969, "step": 25750, "task_loss": 0.6039541363716125 }, { "epoch": 9.31, "eval_exact_match": 83.45316934720908, "eval_f1": 90.26675509384134, "step": 25750 }, { "compression_loss": 0.0, "distillation_loss": 0.2563812732696533, "epoch": 9.31, "learning_rate": 3.27745496981493e-06, "loss": 0.3062, "step": 25760, "task_loss": 0.2901385724544525 }, { "compression_loss": 0.0, "distillation_loss": 0.3281647562980652, "epoch": 9.31, "learning_rate": 3.2439571956175955e-06, "loss": 0.2789, "step": 25770, "task_loss": 0.7631877660751343 }, { "compression_loss": 0.0, "distillation_loss": 0.21847324073314667, "epoch": 9.32, "learning_rate": 3.2106231662554687e-06, "loss": 0.2985, "step": 25780, "task_loss": 0.2929229140281677 }, { "compression_loss": 0.0, "distillation_loss": 0.2040231078863144, "epoch": 9.32, "learning_rate": 3.17745305361002e-06, "loss": 0.263, "step": 25790, "task_loss": 0.2177676260471344 }, { "compression_loss": 0.0, "distillation_loss": 0.26135972142219543, "epoch": 9.32, "learning_rate": 3.144447028717492e-06, "loss": 0.2908, "step": 25800, "task_loss": 0.380513995885849 }, { "compression_loss": 0.0, "distillation_loss": 0.313922643661499, "epoch": 9.33, "learning_rate": 3.1116052617680475e-06, "loss": 0.312, "step": 25810, "task_loss": 0.41387584805488586 }, { "compression_loss": 0.0, "distillation_loss": 0.3513936400413513, "epoch": 9.33, "learning_rate": 3.078927922104888e-06, "loss": 0.2748, "step": 25820, "task_loss": 0.607092022895813 }, { "compression_loss": 0.0, "distillation_loss": 0.2572995126247406, "epoch": 9.34, "learning_rate": 3.04641517822337e-06, "loss": 0.2937, "step": 25830, "task_loss": 0.3874950110912323 }, { "compression_loss": 0.0, "distillation_loss": 0.336165189743042, "epoch": 9.34, "learning_rate": 3.0140671977701126e-06, "loss": 0.2795, "step": 25840, "task_loss": 0.7848570346832275 }, { "compression_loss": 0.0, "distillation_loss": 0.2801818549633026, "epoch": 9.34, "learning_rate": 2.9818841475422074e-06, "loss": 0.2711, "step": 25850, "task_loss": 0.5263223648071289 }, { "compression_loss": 0.0, "distillation_loss": 0.2951273024082184, "epoch": 9.35, "learning_rate": 2.9498661934862625e-06, "loss": 0.2934, "step": 25860, "task_loss": 0.39920687675476074 }, { "compression_loss": 0.0, "distillation_loss": 0.3156875967979431, "epoch": 9.35, "learning_rate": 2.9180135006976253e-06, "loss": 0.2879, "step": 25870, "task_loss": 0.4999083876609802 }, { "compression_loss": 0.0, "distillation_loss": 0.325722873210907, "epoch": 9.35, "learning_rate": 2.8863262334194892e-06, "loss": 0.2734, "step": 25880, "task_loss": 0.49259310960769653 }, { "compression_loss": 0.0, "distillation_loss": 0.3251590132713318, "epoch": 9.36, "learning_rate": 2.854804555042066e-06, "loss": 0.291, "step": 25890, "task_loss": 0.6317105293273926 }, { "compression_loss": 0.0, "distillation_loss": 0.2367677092552185, "epoch": 9.36, "learning_rate": 2.823448628101714e-06, "loss": 0.268, "step": 25900, "task_loss": 0.24415592849254608 }, { "compression_loss": 0.0, "distillation_loss": 0.381255567073822, "epoch": 9.36, "learning_rate": 2.792258614280147e-06, "loss": 0.2871, "step": 25910, "task_loss": 0.8889896869659424 }, { "compression_loss": 0.0, "distillation_loss": 0.26195400953292847, "epoch": 9.37, "learning_rate": 2.761234674403565e-06, "loss": 0.2644, "step": 25920, "task_loss": 0.5699973702430725 }, { "compression_loss": 0.0, "distillation_loss": 0.307630717754364, "epoch": 9.37, "learning_rate": 2.730376968441837e-06, "loss": 0.2978, "step": 25930, "task_loss": 0.7901893258094788 }, { "compression_loss": 0.0, "distillation_loss": 0.24636954069137573, "epoch": 9.37, "learning_rate": 2.699685655507652e-06, "loss": 0.2725, "step": 25940, "task_loss": 0.4935848116874695 }, { "compression_loss": 0.0, "distillation_loss": 0.27287501096725464, "epoch": 9.38, "learning_rate": 2.6691608938557673e-06, "loss": 0.2744, "step": 25950, "task_loss": 0.5872029066085815 }, { "compression_loss": 0.0, "distillation_loss": 0.24528878927230835, "epoch": 9.38, "learning_rate": 2.638802840882099e-06, "loss": 0.2937, "step": 25960, "task_loss": 0.5012997388839722 }, { "compression_loss": 0.0, "distillation_loss": 0.2681114971637726, "epoch": 9.39, "learning_rate": 2.608611653122982e-06, "loss": 0.2798, "step": 25970, "task_loss": 0.5551661849021912 }, { "compression_loss": 0.0, "distillation_loss": 0.25442802906036377, "epoch": 9.39, "learning_rate": 2.5785874862543364e-06, "loss": 0.2823, "step": 25980, "task_loss": 0.28142935037612915 }, { "compression_loss": 0.0, "distillation_loss": 0.3172297477722168, "epoch": 9.39, "learning_rate": 2.548730495090867e-06, "loss": 0.273, "step": 25990, "task_loss": 0.5310195088386536 }, { "compression_loss": 0.0, "distillation_loss": 0.23420450091362, "epoch": 9.4, "learning_rate": 2.5190408335852423e-06, "loss": 0.2762, "step": 26000, "task_loss": 0.34213316440582275 }, { "epoch": 9.4, "eval_exact_match": 83.3112582781457, "eval_f1": 90.12102540358967, "step": 26000 }, { "compression_loss": 0.0, "distillation_loss": 0.23456361889839172, "epoch": 9.4, "learning_rate": 2.4895186548273553e-06, "loss": 0.2888, "step": 26010, "task_loss": 0.6904394626617432 }, { "compression_loss": 0.0, "distillation_loss": 0.24155323207378387, "epoch": 9.4, "learning_rate": 2.4601641110434842e-06, "loss": 0.272, "step": 26020, "task_loss": 0.5414040088653564 }, { "compression_loss": 0.0, "distillation_loss": 0.25628653168678284, "epoch": 9.41, "learning_rate": 2.430977353595531e-06, "loss": 0.2877, "step": 26030, "task_loss": 0.47668391466140747 }, { "compression_loss": 0.0, "distillation_loss": 0.26848334074020386, "epoch": 9.41, "learning_rate": 2.401958532980213e-06, "loss": 0.264, "step": 26040, "task_loss": 0.5960483551025391 }, { "compression_loss": 0.0, "distillation_loss": 0.3336072564125061, "epoch": 9.41, "learning_rate": 2.3731077988283558e-06, "loss": 0.305, "step": 26050, "task_loss": 0.7050433158874512 }, { "compression_loss": 0.0, "distillation_loss": 0.39169514179229736, "epoch": 9.42, "learning_rate": 2.34442529990403e-06, "loss": 0.269, "step": 26060, "task_loss": 1.0993900299072266 }, { "compression_loss": 0.0, "distillation_loss": 0.25674140453338623, "epoch": 9.42, "learning_rate": 2.315911184103842e-06, "loss": 0.2943, "step": 26070, "task_loss": 0.6436038017272949 }, { "compression_loss": 0.0, "distillation_loss": 0.26511308550834656, "epoch": 9.43, "learning_rate": 2.287565598456169e-06, "loss": 0.2789, "step": 26080, "task_loss": 0.13820317387580872 }, { "compression_loss": 0.0, "distillation_loss": 0.24257254600524902, "epoch": 9.43, "learning_rate": 2.259388689120386e-06, "loss": 0.2852, "step": 26090, "task_loss": 0.5551714897155762 }, { "compression_loss": 0.0, "distillation_loss": 0.25758957862854004, "epoch": 9.43, "learning_rate": 2.2313806013861007e-06, "loss": 0.281, "step": 26100, "task_loss": 0.46998971700668335 }, { "compression_loss": 0.0, "distillation_loss": 0.28022924065589905, "epoch": 9.44, "learning_rate": 2.2035414796724365e-06, "loss": 0.2816, "step": 26110, "task_loss": 0.5369085073471069 }, { "compression_loss": 0.0, "distillation_loss": 0.3347325325012207, "epoch": 9.44, "learning_rate": 2.1758714675272663e-06, "loss": 0.2728, "step": 26120, "task_loss": 0.9577845931053162 }, { "compression_loss": 0.0, "distillation_loss": 0.37050122022628784, "epoch": 9.44, "learning_rate": 2.1483707076264827e-06, "loss": 0.2957, "step": 26130, "task_loss": 0.3568909168243408 }, { "compression_loss": 0.0, "distillation_loss": 0.27206090092658997, "epoch": 9.45, "learning_rate": 2.121039341773235e-06, "loss": 0.2914, "step": 26140, "task_loss": 0.5149966478347778 }, { "compression_loss": 0.0, "distillation_loss": 0.2517910599708557, "epoch": 9.45, "learning_rate": 2.0938775108972628e-06, "loss": 0.2741, "step": 26150, "task_loss": 0.27969563007354736 }, { "compression_loss": 0.0, "distillation_loss": 0.1978009045124054, "epoch": 9.45, "learning_rate": 2.0668853550540775e-06, "loss": 0.2689, "step": 26160, "task_loss": 0.6869149208068848 }, { "compression_loss": 0.0, "distillation_loss": 0.35537177324295044, "epoch": 9.46, "learning_rate": 2.0400630134243162e-06, "loss": 0.2731, "step": 26170, "task_loss": 0.6466042399406433 }, { "compression_loss": 0.0, "distillation_loss": 0.32208681106567383, "epoch": 9.46, "learning_rate": 2.013410624312997e-06, "loss": 0.3048, "step": 26180, "task_loss": 0.41794082522392273 }, { "compression_loss": 0.0, "distillation_loss": 0.32221776247024536, "epoch": 9.47, "learning_rate": 1.9869283251488022e-06, "loss": 0.285, "step": 26190, "task_loss": 0.5621099472045898 }, { "compression_loss": 0.0, "distillation_loss": 0.28082454204559326, "epoch": 9.47, "learning_rate": 1.9606162524833615e-06, "loss": 0.2701, "step": 26200, "task_loss": 0.492424339056015 }, { "compression_loss": 0.0, "distillation_loss": 0.293172687292099, "epoch": 9.47, "learning_rate": 1.9344745419905726e-06, "loss": 0.2903, "step": 26210, "task_loss": 0.8020915985107422 }, { "compression_loss": 0.0, "distillation_loss": 0.3128081262111664, "epoch": 9.48, "learning_rate": 1.9085033284658917e-06, "loss": 0.3234, "step": 26220, "task_loss": 0.9452996253967285 }, { "compression_loss": 0.0, "distillation_loss": 0.22008506953716278, "epoch": 9.48, "learning_rate": 1.8827027458256239e-06, "loss": 0.2711, "step": 26230, "task_loss": 0.38822752237319946 }, { "compression_loss": 0.0, "distillation_loss": 0.2757604420185089, "epoch": 9.48, "learning_rate": 1.857072927106244e-06, "loss": 0.2846, "step": 26240, "task_loss": 0.554851233959198 }, { "compression_loss": 0.0, "distillation_loss": 0.33274781703948975, "epoch": 9.49, "learning_rate": 1.831614004463733e-06, "loss": 0.301, "step": 26250, "task_loss": 0.6326380968093872 }, { "epoch": 9.49, "eval_exact_match": 83.4247871333964, "eval_f1": 90.22614199574525, "step": 26250 }, { "compression_loss": 0.0, "distillation_loss": 0.210701122879982, "epoch": 9.49, "learning_rate": 1.806326109172838e-06, "loss": 0.278, "step": 26260, "task_loss": 0.2917744815349579 }, { "compression_loss": 0.0, "distillation_loss": 0.38611477613449097, "epoch": 9.49, "learning_rate": 1.781209371626461e-06, "loss": 0.3125, "step": 26270, "task_loss": 0.5859373807907104 }, { "compression_loss": 0.0, "distillation_loss": 0.2683292031288147, "epoch": 9.5, "learning_rate": 1.75626392133494e-06, "loss": 0.2873, "step": 26280, "task_loss": 0.4396800994873047 }, { "compression_loss": 0.0, "distillation_loss": 0.3382301926612854, "epoch": 9.5, "learning_rate": 1.7314898869254157e-06, "loss": 0.2832, "step": 26290, "task_loss": 0.4854893684387207 }, { "compression_loss": 0.0, "distillation_loss": 0.20767495036125183, "epoch": 9.5, "learning_rate": 1.7068873961411294e-06, "loss": 0.2781, "step": 26300, "task_loss": 0.23974767327308655 }, { "compression_loss": 0.0, "distillation_loss": 0.26005885004997253, "epoch": 9.51, "learning_rate": 1.6824565758408027e-06, "loss": 0.286, "step": 26310, "task_loss": 0.3799484968185425 }, { "compression_loss": 0.0, "distillation_loss": 0.2298750877380371, "epoch": 9.51, "learning_rate": 1.6581975519979562e-06, "loss": 0.2722, "step": 26320, "task_loss": 0.4086710810661316 }, { "compression_loss": 0.0, "distillation_loss": 0.2837666869163513, "epoch": 9.52, "learning_rate": 1.6341104497002816e-06, "loss": 0.2824, "step": 26330, "task_loss": 0.403903067111969 }, { "compression_loss": 0.0, "distillation_loss": 0.19592724740505219, "epoch": 9.52, "learning_rate": 1.6101953931489643e-06, "loss": 0.2891, "step": 26340, "task_loss": 0.30998939275741577 }, { "compression_loss": 0.0, "distillation_loss": 0.23085609078407288, "epoch": 9.52, "learning_rate": 1.5864525056580967e-06, "loss": 0.3152, "step": 26350, "task_loss": 0.6044930219650269 }, { "compression_loss": 0.0, "distillation_loss": 0.3257802724838257, "epoch": 9.53, "learning_rate": 1.5628819096539764e-06, "loss": 0.3009, "step": 26360, "task_loss": 0.5781923532485962 }, { "compression_loss": 0.0, "distillation_loss": 0.22241540253162384, "epoch": 9.53, "learning_rate": 1.5394837266745246e-06, "loss": 0.2875, "step": 26370, "task_loss": 0.3078111410140991 }, { "compression_loss": 0.0, "distillation_loss": 0.28470808267593384, "epoch": 9.53, "learning_rate": 1.5162580773686438e-06, "loss": 0.3048, "step": 26380, "task_loss": 0.3852382302284241 }, { "compression_loss": 0.0, "distillation_loss": 0.2786301076412201, "epoch": 9.54, "learning_rate": 1.4932050814955942e-06, "loss": 0.2919, "step": 26390, "task_loss": 0.42377230525016785 }, { "compression_loss": 0.0, "distillation_loss": 0.20077863335609436, "epoch": 9.54, "learning_rate": 1.470324857924365e-06, "loss": 0.2532, "step": 26400, "task_loss": 0.4952772259712219 }, { "compression_loss": 0.0, "distillation_loss": 0.27380576729774475, "epoch": 9.54, "learning_rate": 1.4476175246330906e-06, "loss": 0.269, "step": 26410, "task_loss": 0.581963300704956 }, { "compression_loss": 0.0, "distillation_loss": 0.28494900465011597, "epoch": 9.55, "learning_rate": 1.4250831987084155e-06, "loss": 0.2935, "step": 26420, "task_loss": 0.604192852973938 }, { "compression_loss": 0.0, "distillation_loss": 0.23649415373802185, "epoch": 9.55, "learning_rate": 1.4027219963449087e-06, "loss": 0.2837, "step": 26430, "task_loss": 0.4841282367706299 }, { "compression_loss": 0.0, "distillation_loss": 0.284720242023468, "epoch": 9.56, "learning_rate": 1.3805340328444287e-06, "loss": 0.2888, "step": 26440, "task_loss": 0.30555108189582825 }, { "compression_loss": 0.0, "distillation_loss": 0.26956403255462646, "epoch": 9.56, "learning_rate": 1.358519422615601e-06, "loss": 0.2874, "step": 26450, "task_loss": 0.7349599599838257 }, { "compression_loss": 0.0, "distillation_loss": 0.2146541029214859, "epoch": 9.56, "learning_rate": 1.3366782791731396e-06, "loss": 0.2741, "step": 26460, "task_loss": 0.4127127528190613 }, { "compression_loss": 0.0, "distillation_loss": 0.3042886257171631, "epoch": 9.57, "learning_rate": 1.315010715137333e-06, "loss": 0.2722, "step": 26470, "task_loss": 0.5704532861709595 }, { "compression_loss": 0.0, "distillation_loss": 0.2715678811073303, "epoch": 9.57, "learning_rate": 1.2935168422334307e-06, "loss": 0.2912, "step": 26480, "task_loss": 0.40685567259788513 }, { "compression_loss": 0.0, "distillation_loss": 0.23535844683647156, "epoch": 9.57, "learning_rate": 1.2721967712910648e-06, "loss": 0.2938, "step": 26490, "task_loss": 0.571018636226654 }, { "compression_loss": 0.0, "distillation_loss": 0.34679874777793884, "epoch": 9.58, "learning_rate": 1.2510506122436886e-06, "loss": 0.2925, "step": 26500, "task_loss": 0.7452425956726074 }, { "epoch": 9.58, "eval_exact_match": 83.37748344370861, "eval_f1": 90.14017214184005, "step": 26500 }, { "compression_loss": 0.0, "distillation_loss": 0.25748512148857117, "epoch": 9.58, "learning_rate": 1.2300784741280078e-06, "loss": 0.2815, "step": 26510, "task_loss": 0.6882176399230957 }, { "compression_loss": 0.0, "distillation_loss": 0.314084529876709, "epoch": 9.58, "learning_rate": 1.209280465083427e-06, "loss": 0.2916, "step": 26520, "task_loss": 0.6169145107269287 }, { "compression_loss": 0.0, "distillation_loss": 0.3948596715927124, "epoch": 9.59, "learning_rate": 1.1886566923514634e-06, "loss": 0.2843, "step": 26530, "task_loss": 0.5022493600845337 }, { "compression_loss": 0.0, "distillation_loss": 0.2946526110172272, "epoch": 9.59, "learning_rate": 1.1682072622752342e-06, "loss": 0.3098, "step": 26540, "task_loss": 0.4513667821884155 }, { "compression_loss": 0.0, "distillation_loss": 0.26810207962989807, "epoch": 9.6, "learning_rate": 1.147932280298877e-06, "loss": 0.2904, "step": 26550, "task_loss": 0.5581741333007812 }, { "compression_loss": 0.0, "distillation_loss": 0.3102302551269531, "epoch": 9.6, "learning_rate": 1.127831850967007e-06, "loss": 0.2916, "step": 26560, "task_loss": 0.4799581468105316 }, { "compression_loss": 0.0, "distillation_loss": 0.2536352872848511, "epoch": 9.6, "learning_rate": 1.1079060779242022e-06, "loss": 0.2907, "step": 26570, "task_loss": 0.4057456851005554 }, { "compression_loss": 0.0, "distillation_loss": 0.30852973461151123, "epoch": 9.61, "learning_rate": 1.0881550639144531e-06, "loss": 0.2985, "step": 26580, "task_loss": 0.35658955574035645 }, { "compression_loss": 0.0, "distillation_loss": 0.2384970337152481, "epoch": 9.61, "learning_rate": 1.0685789107806264e-06, "loss": 0.27, "step": 26590, "task_loss": 0.4211677014827728 }, { "compression_loss": 0.0, "distillation_loss": 0.2926676273345947, "epoch": 9.61, "learning_rate": 1.0491777194639433e-06, "loss": 0.2998, "step": 26600, "task_loss": 0.9406899809837341 }, { "compression_loss": 0.0, "distillation_loss": 0.30535075068473816, "epoch": 9.62, "learning_rate": 1.029951590003487e-06, "loss": 0.2847, "step": 26610, "task_loss": 0.4420982897281647 }, { "compression_loss": 0.0, "distillation_loss": 0.25047576427459717, "epoch": 9.62, "learning_rate": 1.0109006215356314e-06, "loss": 0.2606, "step": 26620, "task_loss": 0.855073094367981 }, { "compression_loss": 0.0, "distillation_loss": 0.2909805178642273, "epoch": 9.62, "learning_rate": 9.920249122935775e-07, "loss": 0.293, "step": 26630, "task_loss": 0.49562349915504456 }, { "compression_loss": 0.0, "distillation_loss": 0.2831922769546509, "epoch": 9.63, "learning_rate": 9.733245596068346e-07, "loss": 0.2753, "step": 26640, "task_loss": 0.673780083656311 }, { "compression_loss": 0.0, "distillation_loss": 0.2511312961578369, "epoch": 9.63, "learning_rate": 9.547996599007135e-07, "loss": 0.2762, "step": 26650, "task_loss": 0.47640103101730347 }, { "compression_loss": 0.0, "distillation_loss": 0.3380993604660034, "epoch": 9.63, "learning_rate": 9.364503086958147e-07, "loss": 0.3117, "step": 26660, "task_loss": 0.5715634822845459 }, { "compression_loss": 0.0, "distillation_loss": 0.21443752944469452, "epoch": 9.64, "learning_rate": 9.182766006075692e-07, "loss": 0.3208, "step": 26670, "task_loss": 0.2922282814979553 }, { "compression_loss": 0.0, "distillation_loss": 0.2359142154455185, "epoch": 9.64, "learning_rate": 9.002786293457259e-07, "loss": 0.2814, "step": 26680, "task_loss": 0.5400431156158447 }, { "compression_loss": 0.0, "distillation_loss": 0.22956624627113342, "epoch": 9.65, "learning_rate": 8.824564877138851e-07, "loss": 0.2725, "step": 26690, "task_loss": 0.6306565999984741 }, { "compression_loss": 0.0, "distillation_loss": 0.2651234269142151, "epoch": 9.65, "learning_rate": 8.648102676089857e-07, "loss": 0.2841, "step": 26700, "task_loss": 0.4753642678260803 }, { "compression_loss": 0.0, "distillation_loss": 0.3443555235862732, "epoch": 9.65, "learning_rate": 8.473400600208857e-07, "loss": 0.2948, "step": 26710, "task_loss": 0.5741276741027832 }, { "compression_loss": 0.0, "distillation_loss": 0.2716384530067444, "epoch": 9.66, "learning_rate": 8.317674383652068e-07, "loss": 0.3119, "step": 26720, "task_loss": 0.25244268774986267 }, { "compression_loss": 0.0, "distillation_loss": 0.29673638939857483, "epoch": 9.66, "learning_rate": 8.146319019851694e-07, "loss": 0.2915, "step": 26730, "task_loss": 0.7287982702255249 }, { "compression_loss": 0.0, "distillation_loss": 0.2692734897136688, "epoch": 9.66, "learning_rate": 7.976726368584502e-07, "loss": 0.298, "step": 26740, "task_loss": 0.32548055052757263 }, { "compression_loss": 0.0, "distillation_loss": 0.2941330671310425, "epoch": 9.67, "learning_rate": 7.808897304327305e-07, "loss": 0.3058, "step": 26750, "task_loss": 0.7823320031166077 }, { "epoch": 9.67, "eval_exact_match": 83.44370860927152, "eval_f1": 90.1998081355494, "step": 26750 }, { "compression_loss": 0.0, "distillation_loss": 0.3193681240081787, "epoch": 9.67, "learning_rate": 7.642832692463141e-07, "loss": 0.2897, "step": 26760, "task_loss": 0.8019513487815857 }, { "compression_loss": 0.0, "distillation_loss": 0.22801771759986877, "epoch": 9.67, "learning_rate": 7.478533389276997e-07, "loss": 0.2885, "step": 26770, "task_loss": 0.4875749945640564 }, { "compression_loss": 0.0, "distillation_loss": 0.2520209550857544, "epoch": 9.68, "learning_rate": 7.316000241951303e-07, "loss": 0.2847, "step": 26780, "task_loss": 0.4516429603099823 }, { "compression_loss": 0.0, "distillation_loss": 0.2385767251253128, "epoch": 9.68, "learning_rate": 7.155234088561735e-07, "loss": 0.288, "step": 26790, "task_loss": 0.7741764187812805 }, { "compression_loss": 0.0, "distillation_loss": 0.3399713337421417, "epoch": 9.69, "learning_rate": 6.996235758072472e-07, "loss": 0.2673, "step": 26800, "task_loss": 0.5370351672172546 }, { "compression_loss": 0.0, "distillation_loss": 0.202695831656456, "epoch": 9.69, "learning_rate": 6.839006070332509e-07, "loss": 0.2775, "step": 26810, "task_loss": 0.36401206254959106 }, { "compression_loss": 0.0, "distillation_loss": 0.255326509475708, "epoch": 9.69, "learning_rate": 6.683545836070914e-07, "loss": 0.2813, "step": 26820, "task_loss": 0.7015193700790405 }, { "compression_loss": 0.0, "distillation_loss": 0.23634222149848938, "epoch": 9.7, "learning_rate": 6.529855856892941e-07, "loss": 0.2704, "step": 26830, "task_loss": 0.3981613516807556 }, { "compression_loss": 0.0, "distillation_loss": 0.25996315479278564, "epoch": 9.7, "learning_rate": 6.377936925275834e-07, "loss": 0.2703, "step": 26840, "task_loss": 0.31600648164749146 }, { "compression_loss": 0.0, "distillation_loss": 0.294119656085968, "epoch": 9.7, "learning_rate": 6.22778982456471e-07, "loss": 0.2867, "step": 26850, "task_loss": 0.4647236764431 }, { "compression_loss": 0.0, "distillation_loss": 0.3896469175815582, "epoch": 9.71, "learning_rate": 6.07941532896844e-07, "loss": 0.2964, "step": 26860, "task_loss": 0.46653464436531067 }, { "compression_loss": 0.0, "distillation_loss": 0.24614590406417847, "epoch": 9.71, "learning_rate": 5.932814203555835e-07, "loss": 0.2757, "step": 26870, "task_loss": 0.26867130398750305 }, { "compression_loss": 0.0, "distillation_loss": 0.24889053404331207, "epoch": 9.71, "learning_rate": 5.787987204251615e-07, "loss": 0.2827, "step": 26880, "task_loss": 1.2295489311218262 }, { "compression_loss": 0.0, "distillation_loss": 0.2833463251590729, "epoch": 9.72, "learning_rate": 5.644935077832475e-07, "loss": 0.2791, "step": 26890, "task_loss": 0.6843339800834656 }, { "compression_loss": 0.0, "distillation_loss": 0.28050845861434937, "epoch": 9.72, "learning_rate": 5.50365856192328e-07, "loss": 0.2909, "step": 26900, "task_loss": 1.0125864744186401 }, { "compression_loss": 0.0, "distillation_loss": 0.22105228900909424, "epoch": 9.73, "learning_rate": 5.3641583849933e-07, "loss": 0.2665, "step": 26910, "task_loss": 0.577481746673584 }, { "compression_loss": 0.0, "distillation_loss": 0.29488325119018555, "epoch": 9.73, "learning_rate": 5.22643526635228e-07, "loss": 0.2781, "step": 26920, "task_loss": 0.3614484965801239 }, { "compression_loss": 0.0, "distillation_loss": 0.29972535371780396, "epoch": 9.73, "learning_rate": 5.090489916146983e-07, "loss": 0.2706, "step": 26930, "task_loss": 0.7378089427947998 }, { "compression_loss": 0.0, "distillation_loss": 0.2527899444103241, "epoch": 9.74, "learning_rate": 4.956323035357346e-07, "loss": 0.2986, "step": 26940, "task_loss": 0.4819590449333191 }, { "compression_loss": 0.0, "distillation_loss": 0.2659050226211548, "epoch": 9.74, "learning_rate": 4.823935315792937e-07, "loss": 0.304, "step": 26950, "task_loss": 0.5571587681770325 }, { "compression_loss": 0.0, "distillation_loss": 0.2077215313911438, "epoch": 9.74, "learning_rate": 4.693327440089273e-07, "loss": 0.2783, "step": 26960, "task_loss": 0.5672307014465332 }, { "compression_loss": 0.0, "distillation_loss": 0.35510140657424927, "epoch": 9.75, "learning_rate": 4.5645000817045476e-07, "loss": 0.3091, "step": 26970, "task_loss": 0.4848492443561554 }, { "compression_loss": 0.0, "distillation_loss": 0.39507466554641724, "epoch": 9.75, "learning_rate": 4.437453904915944e-07, "loss": 0.2982, "step": 26980, "task_loss": 0.914400577545166 }, { "compression_loss": 0.0, "distillation_loss": 0.3037624955177307, "epoch": 9.75, "learning_rate": 4.312189564816293e-07, "loss": 0.266, "step": 26990, "task_loss": 0.8692284822463989 }, { "compression_loss": 0.0, "distillation_loss": 0.29725557565689087, "epoch": 9.76, "learning_rate": 4.1887077073105733e-07, "loss": 0.2947, "step": 27000, "task_loss": 0.7238430976867676 }, { "epoch": 9.76, "eval_exact_match": 83.04635761589404, "eval_f1": 89.96929111407182, "step": 27000 }, { "compression_loss": 0.0, "distillation_loss": 0.2712564468383789, "epoch": 9.76, "learning_rate": 4.067008969112845e-07, "loss": 0.2767, "step": 27010, "task_loss": 0.7396634817123413 }, { "compression_loss": 0.0, "distillation_loss": 0.28446534276008606, "epoch": 9.77, "learning_rate": 3.9470939777426715e-07, "loss": 0.273, "step": 27020, "task_loss": 0.5197400450706482 }, { "compression_loss": 0.0, "distillation_loss": 0.2189192771911621, "epoch": 9.77, "learning_rate": 3.828963351522052e-07, "loss": 0.2674, "step": 27030, "task_loss": 0.5614264607429504 }, { "compression_loss": 0.0, "distillation_loss": 0.31505560874938965, "epoch": 9.77, "learning_rate": 3.712617699572312e-07, "loss": 0.2818, "step": 27040, "task_loss": 0.6665478944778442 }, { "compression_loss": 0.0, "distillation_loss": 0.3059966564178467, "epoch": 9.78, "learning_rate": 3.5980576218107237e-07, "loss": 0.3109, "step": 27050, "task_loss": 0.7996760010719299 }, { "compression_loss": 0.0, "distillation_loss": 0.2522026300430298, "epoch": 9.78, "learning_rate": 3.4852837089475116e-07, "loss": 0.2912, "step": 27060, "task_loss": 0.3180551826953888 }, { "compression_loss": 0.0, "distillation_loss": 0.28468310832977295, "epoch": 9.78, "learning_rate": 3.3742965424829017e-07, "loss": 0.2834, "step": 27070, "task_loss": 0.4297701418399811 }, { "compression_loss": 0.0, "distillation_loss": 0.33653613924980164, "epoch": 9.79, "learning_rate": 3.265096694704128e-07, "loss": 0.285, "step": 27080, "task_loss": 0.666775107383728 }, { "compression_loss": 0.0, "distillation_loss": 0.3369113802909851, "epoch": 9.79, "learning_rate": 3.157684728682247e-07, "loss": 0.278, "step": 27090, "task_loss": 0.5345579385757446 }, { "compression_loss": 0.0, "distillation_loss": 0.2958570420742035, "epoch": 9.79, "learning_rate": 3.0520611982694955e-07, "loss": 0.2678, "step": 27100, "task_loss": 0.4423734247684479 }, { "compression_loss": 0.0, "distillation_loss": 0.29316452145576477, "epoch": 9.8, "learning_rate": 2.9482266480963355e-07, "loss": 0.3016, "step": 27110, "task_loss": 0.5462176203727722 }, { "compression_loss": 0.0, "distillation_loss": 0.29860496520996094, "epoch": 9.8, "learning_rate": 2.846181613568621e-07, "loss": 0.2853, "step": 27120, "task_loss": 0.36607810854911804 }, { "compression_loss": 0.0, "distillation_loss": 0.3323076367378235, "epoch": 9.8, "learning_rate": 2.7459266208648354e-07, "loss": 0.2869, "step": 27130, "task_loss": 0.6542870998382568 }, { "compression_loss": 0.0, "distillation_loss": 0.2639560401439667, "epoch": 9.81, "learning_rate": 2.6474621869334134e-07, "loss": 0.2994, "step": 27140, "task_loss": 0.6767532229423523 }, { "compression_loss": 0.0, "distillation_loss": 0.28091293573379517, "epoch": 9.81, "learning_rate": 2.5507888194901735e-07, "loss": 0.2785, "step": 27150, "task_loss": 0.632643461227417 }, { "compression_loss": 0.0, "distillation_loss": 0.2712363004684448, "epoch": 9.82, "learning_rate": 2.455907017015446e-07, "loss": 0.2911, "step": 27160, "task_loss": 0.640754222869873 }, { "compression_loss": 0.0, "distillation_loss": 0.26335710287094116, "epoch": 9.82, "learning_rate": 2.3628172687516988e-07, "loss": 0.2924, "step": 27170, "task_loss": 0.45733070373535156 }, { "compression_loss": 0.0, "distillation_loss": 0.27504533529281616, "epoch": 9.82, "learning_rate": 2.2715200547010147e-07, "loss": 0.2924, "step": 27180, "task_loss": 0.5883292555809021 }, { "compression_loss": 0.0, "distillation_loss": 0.2938588857650757, "epoch": 9.83, "learning_rate": 2.1820158456225257e-07, "loss": 0.3078, "step": 27190, "task_loss": 0.45251211524009705 }, { "compression_loss": 0.0, "distillation_loss": 0.2968611717224121, "epoch": 9.83, "learning_rate": 2.094305103029964e-07, "loss": 0.2776, "step": 27200, "task_loss": 0.3613067865371704 }, { "compression_loss": 0.0, "distillation_loss": 0.18803226947784424, "epoch": 9.83, "learning_rate": 2.008388279189488e-07, "loss": 0.2865, "step": 27210, "task_loss": 0.317171573638916 }, { "compression_loss": 0.0, "distillation_loss": 0.2504810094833374, "epoch": 9.84, "learning_rate": 1.9242658171171154e-07, "loss": 0.2758, "step": 27220, "task_loss": 0.5608529448509216 }, { "compression_loss": 0.0, "distillation_loss": 0.2654745876789093, "epoch": 9.84, "learning_rate": 1.8419381505765497e-07, "loss": 0.2829, "step": 27230, "task_loss": 0.4092085361480713 }, { "compression_loss": 0.0, "distillation_loss": 0.27439069747924805, "epoch": 9.84, "learning_rate": 1.7614057040769235e-07, "loss": 0.2687, "step": 27240, "task_loss": 0.5781913995742798 }, { "compression_loss": 0.0, "distillation_loss": 0.3405267000198364, "epoch": 9.85, "learning_rate": 1.6826688928706256e-07, "loss": 0.3083, "step": 27250, "task_loss": 0.7811588048934937 }, { "epoch": 9.85, "eval_exact_match": 83.23557237464522, "eval_f1": 90.1943930477886, "step": 27250 }, { "compression_loss": 0.0, "distillation_loss": 0.3025619685649872, "epoch": 9.85, "learning_rate": 1.6057281229510831e-07, "loss": 0.3215, "step": 27260, "task_loss": 0.4894791841506958 }, { "compression_loss": 0.0, "distillation_loss": 0.25182005763053894, "epoch": 9.86, "learning_rate": 1.5305837910507824e-07, "loss": 0.2655, "step": 27270, "task_loss": 0.25380706787109375 }, { "compression_loss": 0.0, "distillation_loss": 0.24280399084091187, "epoch": 9.86, "learning_rate": 1.4572362846392073e-07, "loss": 0.2681, "step": 27280, "task_loss": 0.8004210591316223 }, { "compression_loss": 0.0, "distillation_loss": 0.25001049041748047, "epoch": 9.86, "learning_rate": 1.3856859819207422e-07, "loss": 0.2768, "step": 27290, "task_loss": 0.3485010266304016 }, { "compression_loss": 0.0, "distillation_loss": 0.2553676664829254, "epoch": 9.87, "learning_rate": 1.3159332518328458e-07, "loss": 0.2877, "step": 27300, "task_loss": 0.37561753392219543 }, { "compression_loss": 0.0, "distillation_loss": 0.32698652148246765, "epoch": 9.87, "learning_rate": 1.2479784540439907e-07, "loss": 0.2789, "step": 27310, "task_loss": 0.47436389327049255 }, { "compression_loss": 0.0, "distillation_loss": 0.22853092849254608, "epoch": 9.87, "learning_rate": 1.1818219389520323e-07, "loss": 0.2661, "step": 27320, "task_loss": 0.34670090675354004 }, { "compression_loss": 0.0, "distillation_loss": 0.17037039995193481, "epoch": 9.88, "learning_rate": 1.1174640476821096e-07, "loss": 0.2788, "step": 27330, "task_loss": 0.2407091110944748 }, { "compression_loss": 0.0, "distillation_loss": 0.257743775844574, "epoch": 9.88, "learning_rate": 1.054905112085247e-07, "loss": 0.2676, "step": 27340, "task_loss": 0.41856345534324646 }, { "compression_loss": 0.0, "distillation_loss": 0.2541770339012146, "epoch": 9.88, "learning_rate": 9.941454547362948e-08, "loss": 0.2946, "step": 27350, "task_loss": 0.6853160262107849 }, { "compression_loss": 0.0, "distillation_loss": 0.25304538011550903, "epoch": 9.89, "learning_rate": 9.3518538893253e-08, "loss": 0.2595, "step": 27360, "task_loss": 0.4963826537132263 }, { "compression_loss": 0.0, "distillation_loss": 0.2942352294921875, "epoch": 9.89, "learning_rate": 8.780252186918302e-08, "loss": 0.2936, "step": 27370, "task_loss": 0.8194656372070312 }, { "compression_loss": 0.0, "distillation_loss": 0.2384113222360611, "epoch": 9.9, "learning_rate": 8.226652387512745e-08, "loss": 0.276, "step": 27380, "task_loss": 0.5101650953292847 }, { "compression_loss": 0.0, "distillation_loss": 0.21360494196414948, "epoch": 9.9, "learning_rate": 7.691057345655505e-08, "loss": 0.2589, "step": 27390, "task_loss": 0.39360785484313965 }, { "compression_loss": 0.0, "distillation_loss": 0.33178994059562683, "epoch": 9.9, "learning_rate": 7.17346982305439e-08, "loss": 0.2801, "step": 27400, "task_loss": 0.43100279569625854 }, { "compression_loss": 0.0, "distillation_loss": 0.20473375916481018, "epoch": 9.91, "learning_rate": 6.673892488564925e-08, "loss": 0.2998, "step": 27410, "task_loss": 0.714508056640625 }, { "compression_loss": 0.0, "distillation_loss": 0.39567437767982483, "epoch": 9.91, "learning_rate": 6.192327918175588e-08, "loss": 0.2809, "step": 27420, "task_loss": 0.7333629131317139 }, { "compression_loss": 0.0, "distillation_loss": 0.35162118077278137, "epoch": 9.91, "learning_rate": 5.7287785949957645e-08, "loss": 0.2931, "step": 27430, "task_loss": 0.6849603652954102 }, { "compression_loss": 0.0, "distillation_loss": 0.25811824202537537, "epoch": 9.92, "learning_rate": 5.283246909240979e-08, "loss": 0.2797, "step": 27440, "task_loss": 0.2360648214817047 }, { "compression_loss": 0.0, "distillation_loss": 0.2470182627439499, "epoch": 9.92, "learning_rate": 4.855735158223184e-08, "loss": 0.2769, "step": 27450, "task_loss": 0.27092844247817993 }, { "compression_loss": 0.0, "distillation_loss": 0.30194318294525146, "epoch": 9.92, "learning_rate": 4.446245546335991e-08, "loss": 0.2753, "step": 27460, "task_loss": 0.637370765209198 }, { "compression_loss": 0.0, "distillation_loss": 0.23251381516456604, "epoch": 9.93, "learning_rate": 4.054780185045736e-08, "loss": 0.2846, "step": 27470, "task_loss": 0.4610733985900879 }, { "compression_loss": 0.0, "distillation_loss": 0.26140350103378296, "epoch": 9.93, "learning_rate": 3.6813410928798194e-08, "loss": 0.2864, "step": 27480, "task_loss": 0.3604086637496948 }, { "compression_loss": 0.0, "distillation_loss": 0.2288418412208557, "epoch": 9.93, "learning_rate": 3.3259301954154385e-08, "loss": 0.3032, "step": 27490, "task_loss": 0.36844557523727417 }, { "compression_loss": 0.0, "distillation_loss": 0.30113106966018677, "epoch": 9.94, "learning_rate": 2.9885493252706504e-08, "loss": 0.311, "step": 27500, "task_loss": 0.8197970390319824 }, { "epoch": 9.94, "eval_exact_match": 83.40586565752129, "eval_f1": 90.22397249750207, "step": 27500 }, { "compression_loss": 0.0, "distillation_loss": 0.2746572494506836, "epoch": 9.94, "learning_rate": 2.6692002220946585e-08, "loss": 0.2967, "step": 27510, "task_loss": 0.5205618143081665 }, { "compression_loss": 0.0, "distillation_loss": 0.2536380887031555, "epoch": 9.95, "learning_rate": 2.367884532559261e-08, "loss": 0.2916, "step": 27520, "task_loss": 0.2964051365852356 }, { "compression_loss": 0.0, "distillation_loss": 0.22983843088150024, "epoch": 9.95, "learning_rate": 2.084603810349139e-08, "loss": 0.2668, "step": 27530, "task_loss": 0.4493524134159088 }, { "compression_loss": 0.0, "distillation_loss": 0.21424216032028198, "epoch": 9.95, "learning_rate": 1.8193595161548612e-08, "loss": 0.2512, "step": 27540, "task_loss": 0.4849116802215576 }, { "compression_loss": 0.0, "distillation_loss": 0.2508390247821808, "epoch": 9.96, "learning_rate": 1.5721530176655007e-08, "loss": 0.3112, "step": 27550, "task_loss": 0.2029593586921692 }, { "compression_loss": 0.0, "distillation_loss": 0.23758484423160553, "epoch": 9.96, "learning_rate": 1.3429855895604747e-08, "loss": 0.2829, "step": 27560, "task_loss": 0.41130393743515015 }, { "compression_loss": 0.0, "distillation_loss": 0.2728580832481384, "epoch": 9.96, "learning_rate": 1.1318584135048826e-08, "loss": 0.2873, "step": 27570, "task_loss": 0.4645345211029053 }, { "compression_loss": 0.0, "distillation_loss": 0.3066297769546509, "epoch": 9.97, "learning_rate": 9.387725781405674e-09, "loss": 0.2714, "step": 27580, "task_loss": 0.41699323058128357 }, { "compression_loss": 0.0, "distillation_loss": 0.23650574684143066, "epoch": 9.97, "learning_rate": 7.637290790830087e-09, "loss": 0.2506, "step": 27590, "task_loss": 0.28398048877716064 }, { "compression_loss": 0.0, "distillation_loss": 0.28961479663848877, "epoch": 9.97, "learning_rate": 6.067288189162689e-09, "loss": 0.2773, "step": 27600, "task_loss": 0.5457533001899719 }, { "compression_loss": 0.0, "distillation_loss": 0.27585506439208984, "epoch": 9.98, "learning_rate": 4.677726071856125e-09, "loss": 0.2972, "step": 27610, "task_loss": 0.5272984504699707 }, { "compression_loss": 0.0, "distillation_loss": 0.25609445571899414, "epoch": 9.98, "learning_rate": 3.4686116039633893e-09, "loss": 0.2935, "step": 27620, "task_loss": 0.41631197929382324 }, { "compression_loss": 0.0, "distillation_loss": 0.29504984617233276, "epoch": 9.99, "learning_rate": 2.4399510200873164e-09, "loss": 0.2935, "step": 27630, "task_loss": 0.48990002274513245 }, { "compression_loss": 0.0, "distillation_loss": 0.26198610663414, "epoch": 9.99, "learning_rate": 1.5917496243417205e-09, "loss": 0.2849, "step": 27640, "task_loss": 0.4885336458683014 }, { "compression_loss": 0.0, "distillation_loss": 0.3299500644207001, "epoch": 9.99, "learning_rate": 9.240117903436261e-10, "loss": 0.3169, "step": 27650, "task_loss": 0.3545738160610199 }, { "compression_loss": 0.0, "distillation_loss": 0.2477174997329712, "epoch": 10.0, "learning_rate": 4.3674096117052304e-10, "loss": 0.2932, "step": 27660, "task_loss": 0.4664233326911926 }, { "compression_loss": 0.0, "distillation_loss": 0.18182365596294403, "epoch": 10.0, "learning_rate": 1.299396493603666e-10, "loss": 0.2592, "step": 27670, "task_loss": 0.4754747748374939 }, { "compression_loss": 0.0, "distillation_loss": 0.27542853355407715, "epoch": 10.0, "learning_rate": 3.6094368766059754e-12, "loss": 0.295, "step": 27680, "task_loss": 0.7415885925292969 }, { "compression_loss": 0.0, "distillation_loss": 0.2828420400619507, "epoch": 10.01, "learning_rate": 6.999994224902488e-05, "loss": 0.2729, "step": 27690, "task_loss": 0.44293585419654846 }, { "compression_loss": 0.0, "distillation_loss": 0.24220460653305054, "epoch": 10.01, "learning_rate": 6.999970763601507e-05, "loss": 0.2814, "step": 27700, "task_loss": 0.429747611284256 }, { "compression_loss": 0.0, "distillation_loss": 0.25033819675445557, "epoch": 10.01, "learning_rate": 6.999929255274344e-05, "loss": 0.3027, "step": 27710, "task_loss": 0.3507382273674011 }, { "compression_loss": 0.0, "distillation_loss": 0.29220086336135864, "epoch": 10.02, "learning_rate": 6.999869700135032e-05, "loss": 0.3413, "step": 27720, "task_loss": 0.4566412568092346 }, { "compression_loss": 0.0, "distillation_loss": 0.2963969111442566, "epoch": 10.02, "learning_rate": 6.999792098490654e-05, "loss": 0.309, "step": 27730, "task_loss": 0.6837878227233887 }, { "compression_loss": 0.0, "distillation_loss": 0.36075472831726074, "epoch": 10.03, "learning_rate": 6.999696450741354e-05, "loss": 0.3257, "step": 27740, "task_loss": 0.6962229609489441 }, { "compression_loss": 0.0, "distillation_loss": 0.33492541313171387, "epoch": 10.03, "learning_rate": 6.99958275738032e-05, "loss": 0.332, "step": 27750, "task_loss": 0.8062801361083984 }, { "epoch": 10.03, "eval_exact_match": 82.8476821192053, "eval_f1": 89.69458878455617, "step": 27750 }, { "compression_loss": 0.0, "distillation_loss": 0.23911315202713013, "epoch": 10.03, "learning_rate": 6.999451018993795e-05, "loss": 0.3091, "step": 27760, "task_loss": 0.32358914613723755 }, { "compression_loss": 0.0, "distillation_loss": 0.31843769550323486, "epoch": 10.04, "learning_rate": 6.999301236261068e-05, "loss": 0.3318, "step": 27770, "task_loss": 0.6448853015899658 }, { "compression_loss": 0.0, "distillation_loss": 0.24498042464256287, "epoch": 10.04, "learning_rate": 6.999133409954468e-05, "loss": 0.2847, "step": 27780, "task_loss": 0.6785096526145935 }, { "compression_loss": 0.0, "distillation_loss": 0.28559374809265137, "epoch": 10.04, "learning_rate": 6.998947540939361e-05, "loss": 0.3198, "step": 27790, "task_loss": 0.3165036141872406 }, { "compression_loss": 0.0, "distillation_loss": 0.2844002842903137, "epoch": 10.05, "learning_rate": 6.998743630174154e-05, "loss": 0.35, "step": 27800, "task_loss": 0.4596233665943146 }, { "compression_loss": 0.0, "distillation_loss": 0.3088065981864929, "epoch": 10.05, "learning_rate": 6.998521678710278e-05, "loss": 0.3219, "step": 27810, "task_loss": 0.4042125642299652 }, { "compression_loss": 0.0, "distillation_loss": 0.42176491022109985, "epoch": 10.05, "learning_rate": 6.998281687692191e-05, "loss": 0.3226, "step": 27820, "task_loss": 0.6242938041687012 }, { "compression_loss": 0.0, "distillation_loss": 0.26688888669013977, "epoch": 10.06, "learning_rate": 6.998023658357363e-05, "loss": 0.2935, "step": 27830, "task_loss": 0.7084911465644836 }, { "compression_loss": 0.0, "distillation_loss": 0.2304016351699829, "epoch": 10.06, "learning_rate": 6.997747592036285e-05, "loss": 0.2946, "step": 27840, "task_loss": 0.3190610706806183 }, { "compression_loss": 0.0, "distillation_loss": 0.3269502818584442, "epoch": 10.07, "learning_rate": 6.997453490152445e-05, "loss": 0.3034, "step": 27850, "task_loss": 0.8847068548202515 }, { "compression_loss": 0.0, "distillation_loss": 0.273052453994751, "epoch": 10.07, "learning_rate": 6.997141354222332e-05, "loss": 0.302, "step": 27860, "task_loss": 0.6260406970977783 }, { "compression_loss": 0.0, "distillation_loss": 0.3863045573234558, "epoch": 10.07, "learning_rate": 6.996811185855423e-05, "loss": 0.3105, "step": 27870, "task_loss": 0.745919406414032 }, { "compression_loss": 0.0, "distillation_loss": 0.3247497081756592, "epoch": 10.08, "learning_rate": 6.996462986754179e-05, "loss": 0.323, "step": 27880, "task_loss": 0.9325064420700073 }, { "compression_loss": 0.0, "distillation_loss": 0.30745017528533936, "epoch": 10.08, "learning_rate": 6.99609675871403e-05, "loss": 0.3525, "step": 27890, "task_loss": 0.42076602578163147 }, { "compression_loss": 0.0, "distillation_loss": 0.2294982671737671, "epoch": 10.08, "learning_rate": 6.995712503623371e-05, "loss": 0.3174, "step": 27900, "task_loss": 0.40610066056251526 }, { "compression_loss": 0.0, "distillation_loss": 0.29557597637176514, "epoch": 10.09, "learning_rate": 6.995310223463551e-05, "loss": 0.3273, "step": 27910, "task_loss": 0.42172640562057495 }, { "compression_loss": 0.0, "distillation_loss": 0.41919246315956116, "epoch": 10.09, "learning_rate": 6.994889920308862e-05, "loss": 0.3145, "step": 27920, "task_loss": 0.9535088539123535 }, { "compression_loss": 0.0, "distillation_loss": 0.32514023780822754, "epoch": 10.09, "learning_rate": 6.994451596326527e-05, "loss": 0.3451, "step": 27930, "task_loss": 0.29906806349754333 }, { "compression_loss": 0.0, "distillation_loss": 0.23976783454418182, "epoch": 10.1, "learning_rate": 6.993995253776693e-05, "loss": 0.2963, "step": 27940, "task_loss": 0.37582865357398987 }, { "compression_loss": 0.0, "distillation_loss": 0.2302740216255188, "epoch": 10.1, "learning_rate": 6.993520895012417e-05, "loss": 0.2894, "step": 27950, "task_loss": 0.5257495641708374 }, { "compression_loss": 0.0, "distillation_loss": 0.305722177028656, "epoch": 10.1, "learning_rate": 6.993028522479649e-05, "loss": 0.3383, "step": 27960, "task_loss": 0.5389013886451721 }, { "compression_loss": 0.0, "distillation_loss": 0.29677560925483704, "epoch": 10.11, "learning_rate": 6.992518138717231e-05, "loss": 0.3484, "step": 27970, "task_loss": 0.7256383895874023 }, { "compression_loss": 0.0, "distillation_loss": 0.29055553674697876, "epoch": 10.11, "learning_rate": 6.99198974635687e-05, "loss": 0.3162, "step": 27980, "task_loss": 0.2578037679195404 }, { "compression_loss": 0.0, "distillation_loss": 0.26083576679229736, "epoch": 10.12, "learning_rate": 6.991443348123137e-05, "loss": 0.35, "step": 27990, "task_loss": 0.4990968108177185 }, { "compression_loss": 0.0, "distillation_loss": 0.2811985909938812, "epoch": 10.12, "learning_rate": 6.990878946833444e-05, "loss": 0.2998, "step": 28000, "task_loss": 0.43009185791015625 }, { "epoch": 10.12, "eval_exact_match": 83.0085146641438, "eval_f1": 89.82292999619031, "step": 28000 }, { "compression_loss": 0.0, "distillation_loss": 0.3266202211380005, "epoch": 10.12, "learning_rate": 6.990296545398035e-05, "loss": 0.324, "step": 28010, "task_loss": 0.6813063621520996 }, { "compression_loss": 0.0, "distillation_loss": 0.28243324160575867, "epoch": 10.13, "learning_rate": 6.989696146819965e-05, "loss": 0.3062, "step": 28020, "task_loss": 0.74167400598526 }, { "compression_loss": 0.0, "distillation_loss": 0.44650954008102417, "epoch": 10.13, "learning_rate": 6.989077754195093e-05, "loss": 0.3442, "step": 28030, "task_loss": 0.6010991930961609 }, { "compression_loss": 0.0, "distillation_loss": 0.31492531299591064, "epoch": 10.13, "learning_rate": 6.98844137071206e-05, "loss": 0.3425, "step": 28040, "task_loss": 0.3457045257091522 }, { "compression_loss": 0.0, "distillation_loss": 0.4411157965660095, "epoch": 10.14, "learning_rate": 6.987786999652272e-05, "loss": 0.3247, "step": 28050, "task_loss": 0.5268443822860718 }, { "compression_loss": 0.0, "distillation_loss": 0.3450314402580261, "epoch": 10.14, "learning_rate": 6.987114644389885e-05, "loss": 0.3357, "step": 28060, "task_loss": 0.4746524691581726 }, { "compression_loss": 0.0, "distillation_loss": 0.31496644020080566, "epoch": 10.14, "learning_rate": 6.986424308391791e-05, "loss": 0.3108, "step": 28070, "task_loss": 0.5983226299285889 }, { "compression_loss": 0.0, "distillation_loss": 0.36821407079696655, "epoch": 10.15, "learning_rate": 6.985715995217594e-05, "loss": 0.3272, "step": 28080, "task_loss": 0.38681572675704956 }, { "compression_loss": 0.0, "distillation_loss": 0.2646081745624542, "epoch": 10.15, "learning_rate": 6.984989708519595e-05, "loss": 0.3579, "step": 28090, "task_loss": 0.4529436528682709 }, { "compression_loss": 0.0, "distillation_loss": 0.39518582820892334, "epoch": 10.16, "learning_rate": 6.984245452042771e-05, "loss": 0.3484, "step": 28100, "task_loss": 0.5414221882820129 }, { "compression_loss": 0.0, "distillation_loss": 0.3264884352684021, "epoch": 10.16, "learning_rate": 6.983483229624761e-05, "loss": 0.3151, "step": 28110, "task_loss": 0.29908621311187744 }, { "compression_loss": 0.0, "distillation_loss": 0.3241349458694458, "epoch": 10.16, "learning_rate": 6.982703045195837e-05, "loss": 0.3256, "step": 28120, "task_loss": 0.4549922049045563 }, { "compression_loss": 0.0, "distillation_loss": 0.39608556032180786, "epoch": 10.17, "learning_rate": 6.981904902778897e-05, "loss": 0.3149, "step": 28130, "task_loss": 0.4623832702636719 }, { "compression_loss": 0.0, "distillation_loss": 0.2852123975753784, "epoch": 10.17, "learning_rate": 6.981088806489427e-05, "loss": 0.3348, "step": 28140, "task_loss": 0.5782081484794617 }, { "compression_loss": 0.0, "distillation_loss": 0.295036256313324, "epoch": 10.17, "learning_rate": 6.980254760535496e-05, "loss": 0.3309, "step": 28150, "task_loss": 0.6902420520782471 }, { "compression_loss": 0.0, "distillation_loss": 0.35321053862571716, "epoch": 10.18, "learning_rate": 6.979402769217725e-05, "loss": 0.361, "step": 28160, "task_loss": 0.7698351144790649 }, { "compression_loss": 0.0, "distillation_loss": 0.38389283418655396, "epoch": 10.18, "learning_rate": 6.978532836929269e-05, "loss": 0.3518, "step": 28170, "task_loss": 0.5905847549438477 }, { "compression_loss": 0.0, "distillation_loss": 0.3887339234352112, "epoch": 10.18, "learning_rate": 6.977644968155789e-05, "loss": 0.3568, "step": 28180, "task_loss": 0.321150004863739 }, { "compression_loss": 0.0, "distillation_loss": 0.28333669900894165, "epoch": 10.19, "learning_rate": 6.976739167475437e-05, "loss": 0.3561, "step": 28190, "task_loss": 0.30792325735092163 }, { "compression_loss": 0.0, "distillation_loss": 0.34358274936676025, "epoch": 10.19, "learning_rate": 6.975815439558825e-05, "loss": 0.3405, "step": 28200, "task_loss": 0.562443733215332 }, { "compression_loss": 0.0, "distillation_loss": 0.21040789783000946, "epoch": 10.2, "learning_rate": 6.974873789169005e-05, "loss": 0.3179, "step": 28210, "task_loss": 0.5361858606338501 }, { "compression_loss": 0.0, "distillation_loss": 0.3476879596710205, "epoch": 10.2, "learning_rate": 6.973914221161442e-05, "loss": 0.3577, "step": 28220, "task_loss": 0.46973469853401184 }, { "compression_loss": 0.0, "distillation_loss": 0.32371383905410767, "epoch": 10.2, "learning_rate": 6.972936740483992e-05, "loss": 0.3243, "step": 28230, "task_loss": 0.969584584236145 }, { "compression_loss": 0.0, "distillation_loss": 0.27451419830322266, "epoch": 10.21, "learning_rate": 6.971941352176876e-05, "loss": 0.3535, "step": 28240, "task_loss": 0.4802190661430359 }, { "compression_loss": 0.0, "distillation_loss": 0.38729017972946167, "epoch": 10.21, "learning_rate": 6.970928061372649e-05, "loss": 0.3805, "step": 28250, "task_loss": 0.5118781328201294 }, { "epoch": 10.21, "eval_exact_match": 82.44087038789026, "eval_f1": 89.28424231167489, "step": 28250 }, { "compression_loss": 0.0, "distillation_loss": 0.3107261657714844, "epoch": 10.21, "learning_rate": 6.969896873296178e-05, "loss": 0.3785, "step": 28260, "task_loss": 0.37636032700538635 }, { "compression_loss": 0.0, "distillation_loss": 0.264718234539032, "epoch": 10.22, "learning_rate": 6.968847793264618e-05, "loss": 0.3562, "step": 28270, "task_loss": 0.523037314414978 }, { "compression_loss": 0.0, "distillation_loss": 0.30193042755126953, "epoch": 10.22, "learning_rate": 6.967780826687376e-05, "loss": 0.3533, "step": 28280, "task_loss": 0.3143378794193268 }, { "compression_loss": 0.0, "distillation_loss": 0.4219399094581604, "epoch": 10.22, "learning_rate": 6.966695979066094e-05, "loss": 0.3557, "step": 28290, "task_loss": 0.5991357564926147 }, { "compression_loss": 0.0, "distillation_loss": 0.2808760404586792, "epoch": 10.23, "learning_rate": 6.965593255994606e-05, "loss": 0.3481, "step": 28300, "task_loss": 0.2749364376068115 }, { "compression_loss": 0.0, "distillation_loss": 0.31335628032684326, "epoch": 10.23, "learning_rate": 6.964472663158928e-05, "loss": 0.3115, "step": 28310, "task_loss": 0.4930295944213867 }, { "compression_loss": 0.0, "distillation_loss": 0.46561485528945923, "epoch": 10.23, "learning_rate": 6.96333420633721e-05, "loss": 0.3376, "step": 28320, "task_loss": 0.6846115589141846 }, { "compression_loss": 0.0, "distillation_loss": 0.27835899591445923, "epoch": 10.24, "learning_rate": 6.962177891399719e-05, "loss": 0.3513, "step": 28330, "task_loss": 0.5056399703025818 }, { "compression_loss": 0.0, "distillation_loss": 0.2896972894668579, "epoch": 10.24, "learning_rate": 6.961003724308804e-05, "loss": 0.3194, "step": 28340, "task_loss": 0.43239104747772217 }, { "compression_loss": 0.0, "distillation_loss": 0.3265209197998047, "epoch": 10.25, "learning_rate": 6.959811711118866e-05, "loss": 0.3258, "step": 28350, "task_loss": 0.7637419700622559 }, { "compression_loss": 0.0, "distillation_loss": 0.344051331281662, "epoch": 10.25, "learning_rate": 6.958601857976325e-05, "loss": 0.3481, "step": 28360, "task_loss": 0.33167892694473267 }, { "compression_loss": 0.0, "distillation_loss": 0.39247578382492065, "epoch": 10.25, "learning_rate": 6.957374171119591e-05, "loss": 0.363, "step": 28370, "task_loss": 0.6352831125259399 }, { "compression_loss": 0.0, "distillation_loss": 0.33645737171173096, "epoch": 10.26, "learning_rate": 6.956128656879031e-05, "loss": 0.3583, "step": 28380, "task_loss": 0.49702680110931396 }, { "compression_loss": 0.0, "distillation_loss": 0.369404673576355, "epoch": 10.26, "learning_rate": 6.954865321676934e-05, "loss": 0.3321, "step": 28390, "task_loss": 0.4059206247329712 }, { "compression_loss": 0.0, "distillation_loss": 0.31144052743911743, "epoch": 10.26, "learning_rate": 6.953584172027481e-05, "loss": 0.3918, "step": 28400, "task_loss": 0.5401674509048462 }, { "compression_loss": 0.0, "distillation_loss": 0.3088327646255493, "epoch": 10.27, "learning_rate": 6.95228521453671e-05, "loss": 0.3069, "step": 28410, "task_loss": 0.7411280870437622 }, { "compression_loss": 0.0, "distillation_loss": 0.3105813264846802, "epoch": 10.27, "learning_rate": 6.950968455902485e-05, "loss": 0.3334, "step": 28420, "task_loss": 0.5137057900428772 }, { "compression_loss": 0.0, "distillation_loss": 0.3664456009864807, "epoch": 10.27, "learning_rate": 6.949633902914455e-05, "loss": 0.3259, "step": 28430, "task_loss": 0.42102259397506714 }, { "compression_loss": 0.0, "distillation_loss": 0.27014023065567017, "epoch": 10.28, "learning_rate": 6.948281562454023e-05, "loss": 0.3174, "step": 28440, "task_loss": 0.765162467956543 }, { "compression_loss": 0.0, "distillation_loss": 0.30423638224601746, "epoch": 10.28, "learning_rate": 6.946911441494312e-05, "loss": 0.3587, "step": 28450, "task_loss": 0.5088251829147339 }, { "compression_loss": 0.0, "distillation_loss": 0.2934759855270386, "epoch": 10.29, "learning_rate": 6.945523547100128e-05, "loss": 0.3603, "step": 28460, "task_loss": 0.7545541524887085 }, { "compression_loss": 0.0, "distillation_loss": 0.3176019787788391, "epoch": 10.29, "learning_rate": 6.944117886427917e-05, "loss": 0.3805, "step": 28470, "task_loss": 0.49498802423477173 }, { "compression_loss": 0.0, "distillation_loss": 0.27311158180236816, "epoch": 10.29, "learning_rate": 6.942694466725742e-05, "loss": 0.3732, "step": 28480, "task_loss": 0.46992021799087524 }, { "compression_loss": 0.0, "distillation_loss": 0.30763930082321167, "epoch": 10.3, "learning_rate": 6.941253295333232e-05, "loss": 0.3442, "step": 28490, "task_loss": 0.9897069334983826 }, { "compression_loss": 0.0, "distillation_loss": 0.32085344195365906, "epoch": 10.3, "learning_rate": 6.939794379681553e-05, "loss": 0.3693, "step": 28500, "task_loss": 0.38556602597236633 }, { "epoch": 10.3, "eval_exact_match": 82.76253547776727, "eval_f1": 89.61271925469204, "step": 28500 }, { "compression_loss": 0.0, "distillation_loss": 0.4487529397010803, "epoch": 10.3, "learning_rate": 6.938317727293362e-05, "loss": 0.3326, "step": 28510, "task_loss": 0.825667142868042 }, { "compression_loss": 0.0, "distillation_loss": 0.3878217339515686, "epoch": 10.31, "learning_rate": 6.936823345782777e-05, "loss": 0.3751, "step": 28520, "task_loss": 0.8324292302131653 }, { "compression_loss": 0.0, "distillation_loss": 0.24599125981330872, "epoch": 10.31, "learning_rate": 6.935311242855331e-05, "loss": 0.3319, "step": 28530, "task_loss": 0.2502692639827728 }, { "compression_loss": 0.0, "distillation_loss": 0.3209283947944641, "epoch": 10.31, "learning_rate": 6.933781426307934e-05, "loss": 0.3166, "step": 28540, "task_loss": 0.6298089027404785 }, { "compression_loss": 0.0, "distillation_loss": 0.3517735004425049, "epoch": 10.32, "learning_rate": 6.932233904028838e-05, "loss": 0.3164, "step": 28550, "task_loss": 0.541971743106842 }, { "compression_loss": 0.0, "distillation_loss": 0.29791098833084106, "epoch": 10.32, "learning_rate": 6.930668683997586e-05, "loss": 0.3842, "step": 28560, "task_loss": 0.3178185820579529 }, { "compression_loss": 0.0, "distillation_loss": 0.2982218861579895, "epoch": 10.33, "learning_rate": 6.929085774284978e-05, "loss": 0.3407, "step": 28570, "task_loss": 0.3605462312698364 }, { "compression_loss": 0.0, "distillation_loss": 0.279826283454895, "epoch": 10.33, "learning_rate": 6.92748518305303e-05, "loss": 0.3561, "step": 28580, "task_loss": 0.41762399673461914 }, { "compression_loss": 0.0, "distillation_loss": 0.4041915535926819, "epoch": 10.33, "learning_rate": 6.925866918554928e-05, "loss": 0.3758, "step": 28590, "task_loss": 0.7959795594215393 }, { "compression_loss": 0.0, "distillation_loss": 0.3129355311393738, "epoch": 10.34, "learning_rate": 6.924230989134988e-05, "loss": 0.3674, "step": 28600, "task_loss": 0.2740345299243927 }, { "compression_loss": 0.0, "distillation_loss": 0.40068450570106506, "epoch": 10.34, "learning_rate": 6.922577403228613e-05, "loss": 0.3409, "step": 28610, "task_loss": 0.4845631718635559 }, { "compression_loss": 0.0, "distillation_loss": 0.3121311366558075, "epoch": 10.34, "learning_rate": 6.920906169362248e-05, "loss": 0.3437, "step": 28620, "task_loss": 0.6060510277748108 }, { "compression_loss": 0.0, "distillation_loss": 0.3488480746746063, "epoch": 10.35, "learning_rate": 6.919217296153334e-05, "loss": 0.3565, "step": 28630, "task_loss": 0.5007976293563843 }, { "compression_loss": 0.0, "distillation_loss": 0.31296420097351074, "epoch": 10.35, "learning_rate": 6.917510792310273e-05, "loss": 0.3155, "step": 28640, "task_loss": 0.6471648812294006 }, { "compression_loss": 0.0, "distillation_loss": 0.33371689915657043, "epoch": 10.35, "learning_rate": 6.915786666632371e-05, "loss": 0.3836, "step": 28650, "task_loss": 0.538357138633728 }, { "compression_loss": 0.0, "distillation_loss": 0.2839885950088501, "epoch": 10.36, "learning_rate": 6.914044928009801e-05, "loss": 0.3373, "step": 28660, "task_loss": 0.298797070980072 }, { "compression_loss": 0.0, "distillation_loss": 0.409714013338089, "epoch": 10.36, "learning_rate": 6.91228558542355e-05, "loss": 0.314, "step": 28670, "task_loss": 0.4804989695549011 }, { "compression_loss": 0.0, "distillation_loss": 0.34721839427948, "epoch": 10.37, "learning_rate": 6.910508647945382e-05, "loss": 0.3392, "step": 28680, "task_loss": 0.4981377124786377 }, { "compression_loss": 0.0, "distillation_loss": 0.3223315477371216, "epoch": 10.37, "learning_rate": 6.908714124737785e-05, "loss": 0.3647, "step": 28690, "task_loss": 0.8685950040817261 }, { "compression_loss": 0.0, "distillation_loss": 0.4292945861816406, "epoch": 10.37, "learning_rate": 6.906902025053921e-05, "loss": 0.3661, "step": 28700, "task_loss": 0.7423338294029236 }, { "compression_loss": 0.0, "distillation_loss": 0.3805582523345947, "epoch": 10.38, "learning_rate": 6.905072358237589e-05, "loss": 0.3568, "step": 28710, "task_loss": 0.8040531277656555 }, { "compression_loss": 0.0, "distillation_loss": 0.29263046383857727, "epoch": 10.38, "learning_rate": 6.903225133723164e-05, "loss": 0.325, "step": 28720, "task_loss": 0.5416374206542969 }, { "compression_loss": 0.0, "distillation_loss": 0.4065228998661041, "epoch": 10.38, "learning_rate": 6.901360361035558e-05, "loss": 0.3613, "step": 28730, "task_loss": 0.29483121633529663 }, { "compression_loss": 0.0, "distillation_loss": 0.2587207853794098, "epoch": 10.39, "learning_rate": 6.899478049790166e-05, "loss": 0.3002, "step": 28740, "task_loss": 0.4107722342014313 }, { "compression_loss": 0.0, "distillation_loss": 0.31920528411865234, "epoch": 10.39, "learning_rate": 6.897578209692816e-05, "loss": 0.3557, "step": 28750, "task_loss": 0.5641241073608398 }, { "epoch": 10.39, "eval_exact_match": 82.47871333964049, "eval_f1": 89.37729603350506, "step": 28750 }, { "compression_loss": 0.0, "distillation_loss": 0.420917809009552, "epoch": 10.39, "learning_rate": 6.895660850539724e-05, "loss": 0.385, "step": 28760, "task_loss": 0.7262535095214844 }, { "compression_loss": 0.0, "distillation_loss": 0.304683119058609, "epoch": 10.4, "learning_rate": 6.89372598221744e-05, "loss": 0.3361, "step": 28770, "task_loss": 0.34669065475463867 }, { "compression_loss": 0.0, "distillation_loss": 0.3875262141227722, "epoch": 10.4, "learning_rate": 6.891773614702792e-05, "loss": 0.3481, "step": 28780, "task_loss": 0.563037633895874 }, { "compression_loss": 0.0, "distillation_loss": 0.3265758156776428, "epoch": 10.4, "learning_rate": 6.889803758062846e-05, "loss": 0.365, "step": 28790, "task_loss": 0.550947904586792 }, { "compression_loss": 0.0, "distillation_loss": 0.26651397347450256, "epoch": 10.41, "learning_rate": 6.887816422454846e-05, "loss": 0.3395, "step": 28800, "task_loss": 0.2152198851108551 }, { "compression_loss": 0.0, "distillation_loss": 0.3168576657772064, "epoch": 10.41, "learning_rate": 6.885811618126159e-05, "loss": 0.3296, "step": 28810, "task_loss": 0.5101230144500732 }, { "compression_loss": 0.0, "distillation_loss": 0.32384443283081055, "epoch": 10.42, "learning_rate": 6.883789355414233e-05, "loss": 0.314, "step": 28820, "task_loss": 0.4653700590133667 }, { "compression_loss": 0.0, "distillation_loss": 0.35834062099456787, "epoch": 10.42, "learning_rate": 6.881749644746535e-05, "loss": 0.3611, "step": 28830, "task_loss": 0.37349605560302734 }, { "compression_loss": 0.0, "distillation_loss": 0.2610001266002655, "epoch": 10.42, "learning_rate": 6.879692496640498e-05, "loss": 0.3326, "step": 28840, "task_loss": 0.7551101446151733 }, { "compression_loss": 0.0, "distillation_loss": 0.3156958818435669, "epoch": 10.43, "learning_rate": 6.877617921703468e-05, "loss": 0.3077, "step": 28850, "task_loss": 0.5917555093765259 }, { "compression_loss": 0.0, "distillation_loss": 0.5137543082237244, "epoch": 10.43, "learning_rate": 6.875525930632653e-05, "loss": 0.3528, "step": 28860, "task_loss": 0.5057504177093506 }, { "compression_loss": 0.0, "distillation_loss": 0.2843777537345886, "epoch": 10.43, "learning_rate": 6.873416534215064e-05, "loss": 0.3364, "step": 28870, "task_loss": 0.560868501663208 }, { "compression_loss": 0.0, "distillation_loss": 0.2687450051307678, "epoch": 10.44, "learning_rate": 6.871289743327455e-05, "loss": 0.3915, "step": 28880, "task_loss": 0.39322781562805176 }, { "compression_loss": 0.0, "distillation_loss": 0.4328731894493103, "epoch": 10.44, "learning_rate": 6.869145568936275e-05, "loss": 0.3473, "step": 28890, "task_loss": 1.012038230895996 }, { "compression_loss": 0.0, "distillation_loss": 0.385903537273407, "epoch": 10.44, "learning_rate": 6.866984022097612e-05, "loss": 0.371, "step": 28900, "task_loss": 0.6390801668167114 }, { "compression_loss": 0.0, "distillation_loss": 0.32300782203674316, "epoch": 10.45, "learning_rate": 6.864805113957123e-05, "loss": 0.3252, "step": 28910, "task_loss": 0.3792077302932739 }, { "compression_loss": 0.0, "distillation_loss": 0.23234272003173828, "epoch": 10.45, "learning_rate": 6.862608855749995e-05, "loss": 0.3353, "step": 28920, "task_loss": 0.5149093866348267 }, { "compression_loss": 0.0, "distillation_loss": 0.2324933558702469, "epoch": 10.46, "learning_rate": 6.860395258800871e-05, "loss": 0.3615, "step": 28930, "task_loss": 0.41214847564697266 }, { "compression_loss": 0.0, "distillation_loss": 0.2846713066101074, "epoch": 10.46, "learning_rate": 6.858164334523806e-05, "loss": 0.3071, "step": 28940, "task_loss": 0.3972511887550354 }, { "compression_loss": 0.0, "distillation_loss": 0.329755038022995, "epoch": 10.46, "learning_rate": 6.85591609442219e-05, "loss": 0.3385, "step": 28950, "task_loss": 0.5056531429290771 }, { "compression_loss": 0.0, "distillation_loss": 0.27635079622268677, "epoch": 10.47, "learning_rate": 6.853650550088709e-05, "loss": 0.3617, "step": 28960, "task_loss": 0.3319745659828186 }, { "compression_loss": 0.0, "distillation_loss": 0.24840697646141052, "epoch": 10.47, "learning_rate": 6.85136771320527e-05, "loss": 0.3252, "step": 28970, "task_loss": 0.514814019203186 }, { "compression_loss": 0.0, "distillation_loss": 0.28714248538017273, "epoch": 10.47, "learning_rate": 6.849067595542946e-05, "loss": 0.3363, "step": 28980, "task_loss": 0.4475414752960205 }, { "compression_loss": 0.0, "distillation_loss": 0.34752196073532104, "epoch": 10.48, "learning_rate": 6.846750208961921e-05, "loss": 0.3385, "step": 28990, "task_loss": 0.3642599582672119 }, { "compression_loss": 0.0, "distillation_loss": 0.31241142749786377, "epoch": 10.48, "learning_rate": 6.844415565411416e-05, "loss": 0.3484, "step": 29000, "task_loss": 0.38981834053993225 }, { "epoch": 10.48, "eval_exact_match": 82.66792809839167, "eval_f1": 89.50805048559363, "step": 29000 }, { "compression_loss": 0.0, "distillation_loss": 0.43216896057128906, "epoch": 10.48, "learning_rate": 6.84206367692964e-05, "loss": 0.3545, "step": 29010, "task_loss": 0.4858664274215698 }, { "compression_loss": 0.0, "distillation_loss": 0.4173024296760559, "epoch": 10.49, "learning_rate": 6.83969455564372e-05, "loss": 0.3654, "step": 29020, "task_loss": 0.564123272895813 }, { "compression_loss": 0.0, "distillation_loss": 0.2556716203689575, "epoch": 10.49, "learning_rate": 6.837308213769643e-05, "loss": 0.352, "step": 29030, "task_loss": 0.6481924653053284 }, { "compression_loss": 0.0, "distillation_loss": 0.2759532332420349, "epoch": 10.5, "learning_rate": 6.834904663612188e-05, "loss": 0.3315, "step": 29040, "task_loss": 0.5259366035461426 }, { "compression_loss": 0.0, "distillation_loss": 0.3986692428588867, "epoch": 10.5, "learning_rate": 6.832483917564871e-05, "loss": 0.3774, "step": 29050, "task_loss": 0.8505685329437256 }, { "compression_loss": 0.0, "distillation_loss": 0.353090763092041, "epoch": 10.5, "learning_rate": 6.830045988109869e-05, "loss": 0.3574, "step": 29060, "task_loss": 0.39217841625213623 }, { "compression_loss": 0.0, "distillation_loss": 0.46030208468437195, "epoch": 10.51, "learning_rate": 6.827590887817969e-05, "loss": 0.3201, "step": 29070, "task_loss": 0.6554713249206543 }, { "compression_loss": 0.0, "distillation_loss": 0.30041784048080444, "epoch": 10.51, "learning_rate": 6.825118629348493e-05, "loss": 0.3514, "step": 29080, "task_loss": 0.2941610813140869 }, { "compression_loss": 0.0, "distillation_loss": 0.4227467179298401, "epoch": 10.51, "learning_rate": 6.822629225449237e-05, "loss": 0.361, "step": 29090, "task_loss": 0.520158052444458 }, { "compression_loss": 0.0, "distillation_loss": 0.32006293535232544, "epoch": 10.52, "learning_rate": 6.820122688956404e-05, "loss": 0.3093, "step": 29100, "task_loss": 0.8387197256088257 }, { "compression_loss": 0.0, "distillation_loss": 0.4144279956817627, "epoch": 10.52, "learning_rate": 6.817599032794539e-05, "loss": 0.4, "step": 29110, "task_loss": 0.5196925401687622 }, { "compression_loss": 0.0, "distillation_loss": 0.36042001843452454, "epoch": 10.52, "learning_rate": 6.815058269976462e-05, "loss": 0.3619, "step": 29120, "task_loss": 0.47894126176834106 }, { "compression_loss": 0.0, "distillation_loss": 0.2716860771179199, "epoch": 10.53, "learning_rate": 6.8125004136032e-05, "loss": 0.3666, "step": 29130, "task_loss": 0.32598868012428284 }, { "compression_loss": 0.0, "distillation_loss": 0.3188129663467407, "epoch": 10.53, "learning_rate": 6.809925476863924e-05, "loss": 0.3423, "step": 29140, "task_loss": 0.672299861907959 }, { "compression_loss": 0.0, "distillation_loss": 0.26386719942092896, "epoch": 10.53, "learning_rate": 6.807333473035868e-05, "loss": 0.3565, "step": 29150, "task_loss": 0.4383131265640259 }, { "compression_loss": 0.0, "distillation_loss": 0.27759337425231934, "epoch": 10.54, "learning_rate": 6.80498608827427e-05, "loss": 0.3565, "step": 29160, "task_loss": 0.5900576114654541 }, { "compression_loss": 0.0, "distillation_loss": 0.32816818356513977, "epoch": 10.54, "learning_rate": 6.802361693871463e-05, "loss": 0.3393, "step": 29170, "task_loss": 0.5355024933815002 }, { "compression_loss": 0.0, "distillation_loss": 0.41895249485969543, "epoch": 10.55, "learning_rate": 6.799720271381286e-05, "loss": 0.3376, "step": 29180, "task_loss": 0.832222044467926 }, { "compression_loss": 0.0, "distillation_loss": 0.33841216564178467, "epoch": 10.55, "learning_rate": 6.797061834423806e-05, "loss": 0.3537, "step": 29190, "task_loss": 0.6101768016815186 }, { "compression_loss": 0.0, "distillation_loss": 0.28902673721313477, "epoch": 10.55, "learning_rate": 6.794386396706813e-05, "loss": 0.3269, "step": 29200, "task_loss": 0.2802298069000244 }, { "compression_loss": 0.0, "distillation_loss": 0.336454302072525, "epoch": 10.56, "learning_rate": 6.791693972025767e-05, "loss": 0.3511, "step": 29210, "task_loss": 0.5762617588043213 }, { "compression_loss": 0.0, "distillation_loss": 0.31067967414855957, "epoch": 10.56, "learning_rate": 6.788984574263712e-05, "loss": 0.3879, "step": 29220, "task_loss": 0.5875623226165771 }, { "compression_loss": 0.0, "distillation_loss": 0.32384225726127625, "epoch": 10.56, "learning_rate": 6.786258217391215e-05, "loss": 0.3467, "step": 29230, "task_loss": 0.5701868534088135 }, { "compression_loss": 0.0, "distillation_loss": 0.37212711572647095, "epoch": 10.57, "learning_rate": 6.78351491546629e-05, "loss": 0.3791, "step": 29240, "task_loss": 0.7481470108032227 }, { "compression_loss": 0.0, "distillation_loss": 0.3019322156906128, "epoch": 10.57, "learning_rate": 6.780754682634324e-05, "loss": 0.3331, "step": 29250, "task_loss": 0.5364646911621094 }, { "epoch": 10.57, "eval_exact_match": 82.82876064333018, "eval_f1": 89.56640654091615, "step": 29250 }, { "compression_loss": 0.0, "distillation_loss": 0.43777012825012207, "epoch": 10.57, "learning_rate": 6.777977533128004e-05, "loss": 0.3748, "step": 29260, "task_loss": 0.9455623626708984 }, { "compression_loss": 0.0, "distillation_loss": 0.4065621495246887, "epoch": 10.58, "learning_rate": 6.775183481267248e-05, "loss": 0.3454, "step": 29270, "task_loss": 0.46658486127853394 }, { "compression_loss": 0.0, "distillation_loss": 0.5213733911514282, "epoch": 10.58, "learning_rate": 6.772372541459127e-05, "loss": 0.3868, "step": 29280, "task_loss": 0.6847914457321167 }, { "compression_loss": 0.0, "distillation_loss": 0.26462650299072266, "epoch": 10.59, "learning_rate": 6.769544728197792e-05, "loss": 0.3352, "step": 29290, "task_loss": 0.2257605642080307 }, { "compression_loss": 0.0, "distillation_loss": 0.2530308961868286, "epoch": 10.59, "learning_rate": 6.766700056064398e-05, "loss": 0.3316, "step": 29300, "task_loss": 0.6150197982788086 }, { "compression_loss": 0.0, "distillation_loss": 0.30782073736190796, "epoch": 10.59, "learning_rate": 6.763838539727032e-05, "loss": 0.3285, "step": 29310, "task_loss": 0.5591270923614502 }, { "compression_loss": 0.0, "distillation_loss": 0.4068589210510254, "epoch": 10.6, "learning_rate": 6.760960193940634e-05, "loss": 0.4125, "step": 29320, "task_loss": 0.6238890886306763 }, { "compression_loss": 0.0, "distillation_loss": 0.3537275195121765, "epoch": 10.6, "learning_rate": 6.758065033546923e-05, "loss": 0.3695, "step": 29330, "task_loss": 0.33286938071250916 }, { "compression_loss": 0.0, "distillation_loss": 0.3277410864830017, "epoch": 10.6, "learning_rate": 6.755153073474321e-05, "loss": 0.3383, "step": 29340, "task_loss": 0.6027262806892395 }, { "compression_loss": 0.0, "distillation_loss": 0.3822060227394104, "epoch": 10.61, "learning_rate": 6.752224328737871e-05, "loss": 0.3328, "step": 29350, "task_loss": 0.5942087173461914 }, { "compression_loss": 0.0, "distillation_loss": 0.5594038367271423, "epoch": 10.61, "learning_rate": 6.749278814439167e-05, "loss": 0.402, "step": 29360, "task_loss": 0.8234184980392456 }, { "compression_loss": 0.0, "distillation_loss": 0.38665011525154114, "epoch": 10.61, "learning_rate": 6.74631654576627e-05, "loss": 0.3533, "step": 29370, "task_loss": 0.835229218006134 }, { "compression_loss": 0.0, "distillation_loss": 0.4332032799720764, "epoch": 10.62, "learning_rate": 6.743337537993633e-05, "loss": 0.3692, "step": 29380, "task_loss": 0.5134978890419006 }, { "compression_loss": 0.0, "distillation_loss": 0.34394896030426025, "epoch": 10.62, "learning_rate": 6.740341806482025e-05, "loss": 0.3678, "step": 29390, "task_loss": 0.38100263476371765 }, { "compression_loss": 0.0, "distillation_loss": 0.3152540326118469, "epoch": 10.63, "learning_rate": 6.737329366678442e-05, "loss": 0.3637, "step": 29400, "task_loss": 0.8232011198997498 }, { "compression_loss": 0.0, "distillation_loss": 0.44516491889953613, "epoch": 10.63, "learning_rate": 6.734300234116038e-05, "loss": 0.3467, "step": 29410, "task_loss": 0.7560466527938843 }, { "compression_loss": 0.0, "distillation_loss": 0.28295183181762695, "epoch": 10.63, "learning_rate": 6.731254424414039e-05, "loss": 0.3554, "step": 29420, "task_loss": 0.6221187114715576 }, { "compression_loss": 0.0, "distillation_loss": 0.2875472605228424, "epoch": 10.64, "learning_rate": 6.728191953277663e-05, "loss": 0.338, "step": 29430, "task_loss": 0.6836280822753906 }, { "compression_loss": 0.0, "distillation_loss": 0.26682746410369873, "epoch": 10.64, "learning_rate": 6.725112836498043e-05, "loss": 0.3611, "step": 29440, "task_loss": 0.6012932062149048 }, { "compression_loss": 0.0, "distillation_loss": 0.32244402170181274, "epoch": 10.64, "learning_rate": 6.722017089952138e-05, "loss": 0.3602, "step": 29450, "task_loss": 0.39122363924980164 }, { "compression_loss": 0.0, "distillation_loss": 0.36310338973999023, "epoch": 10.65, "learning_rate": 6.71890472960266e-05, "loss": 0.3738, "step": 29460, "task_loss": 0.5470183491706848 }, { "compression_loss": 0.0, "distillation_loss": 0.3462553918361664, "epoch": 10.65, "learning_rate": 6.715775771497985e-05, "loss": 0.367, "step": 29470, "task_loss": 0.5989048480987549 }, { "compression_loss": 0.0, "distillation_loss": 0.35961276292800903, "epoch": 10.65, "learning_rate": 6.712630231772072e-05, "loss": 0.3563, "step": 29480, "task_loss": 0.6379199028015137 }, { "compression_loss": 0.0, "distillation_loss": 0.30847465991973877, "epoch": 10.66, "learning_rate": 6.709468126644384e-05, "loss": 0.3509, "step": 29490, "task_loss": 0.7017759084701538 }, { "compression_loss": 0.0, "distillation_loss": 0.3288659453392029, "epoch": 10.66, "learning_rate": 6.706289472419797e-05, "loss": 0.3393, "step": 29500, "task_loss": 0.6862675547599792 }, { "epoch": 10.66, "eval_exact_match": 82.18543046357615, "eval_f1": 89.15288807465497, "step": 29500 }, { "compression_loss": 0.0, "distillation_loss": 0.39969784021377563, "epoch": 10.66, "learning_rate": 6.703094285488522e-05, "loss": 0.3586, "step": 29510, "task_loss": 0.5755327939987183 }, { "compression_loss": 0.0, "distillation_loss": 0.33747461438179016, "epoch": 10.67, "learning_rate": 6.699882582326016e-05, "loss": 0.3234, "step": 29520, "task_loss": 0.8560225963592529 }, { "compression_loss": 0.0, "distillation_loss": 0.6065595149993896, "epoch": 10.67, "learning_rate": 6.6966543794929e-05, "loss": 0.3863, "step": 29530, "task_loss": 0.9350003004074097 }, { "compression_loss": 0.0, "distillation_loss": 0.2929680347442627, "epoch": 10.68, "learning_rate": 6.693409693634875e-05, "loss": 0.337, "step": 29540, "task_loss": 0.4647179841995239 }, { "compression_loss": 0.0, "distillation_loss": 0.2577773332595825, "epoch": 10.68, "learning_rate": 6.69014854148263e-05, "loss": 0.3784, "step": 29550, "task_loss": 0.2627255618572235 }, { "compression_loss": 0.0, "distillation_loss": 0.3602387309074402, "epoch": 10.68, "learning_rate": 6.686870939851766e-05, "loss": 0.3755, "step": 29560, "task_loss": 0.4507540464401245 }, { "compression_loss": 0.0, "distillation_loss": 0.34514838457107544, "epoch": 10.69, "learning_rate": 6.683576905642695e-05, "loss": 0.3542, "step": 29570, "task_loss": 0.6163294315338135 }, { "compression_loss": 0.0, "distillation_loss": 0.3738999664783478, "epoch": 10.69, "learning_rate": 6.680266455840568e-05, "loss": 0.3251, "step": 29580, "task_loss": 0.40096384286880493 }, { "compression_loss": 0.0, "distillation_loss": 0.37798354029655457, "epoch": 10.69, "learning_rate": 6.676939607515175e-05, "loss": 0.3634, "step": 29590, "task_loss": 0.8661670684814453 }, { "compression_loss": 0.0, "distillation_loss": 0.36543959379196167, "epoch": 10.7, "learning_rate": 6.673596377820867e-05, "loss": 0.3391, "step": 29600, "task_loss": 0.6424458026885986 }, { "compression_loss": 0.0, "distillation_loss": 0.39285749197006226, "epoch": 10.7, "learning_rate": 6.67023678399646e-05, "loss": 0.3876, "step": 29610, "task_loss": 0.49506786465644836 }, { "compression_loss": 0.0, "distillation_loss": 0.35833126306533813, "epoch": 10.7, "learning_rate": 6.666860843365148e-05, "loss": 0.3749, "step": 29620, "task_loss": 0.40317094326019287 }, { "compression_loss": 0.0, "distillation_loss": 0.32828640937805176, "epoch": 10.71, "learning_rate": 6.663468573334417e-05, "loss": 0.3612, "step": 29630, "task_loss": 0.4777117371559143 }, { "compression_loss": 0.0, "distillation_loss": 0.44851982593536377, "epoch": 10.71, "learning_rate": 6.660059991395954e-05, "loss": 0.3595, "step": 29640, "task_loss": 0.5143475532531738 }, { "compression_loss": 0.0, "distillation_loss": 0.4900306761264801, "epoch": 10.72, "learning_rate": 6.656635115125549e-05, "loss": 0.3457, "step": 29650, "task_loss": 0.8377711772918701 }, { "compression_loss": 0.0, "distillation_loss": 0.29536741971969604, "epoch": 10.72, "learning_rate": 6.65319396218302e-05, "loss": 0.3508, "step": 29660, "task_loss": 0.6646140813827515 }, { "compression_loss": 0.0, "distillation_loss": 0.2504414916038513, "epoch": 10.72, "learning_rate": 6.649736550312107e-05, "loss": 0.401, "step": 29670, "task_loss": 0.3634515702724457 }, { "compression_loss": 0.0, "distillation_loss": 0.4538690745830536, "epoch": 10.73, "learning_rate": 6.646262897340388e-05, "loss": 0.3476, "step": 29680, "task_loss": 0.3508909344673157 }, { "compression_loss": 0.0, "distillation_loss": 0.3044033646583557, "epoch": 10.73, "learning_rate": 6.642773021179186e-05, "loss": 0.3712, "step": 29690, "task_loss": 0.4467262625694275 }, { "compression_loss": 0.0, "distillation_loss": 0.32180851697921753, "epoch": 10.73, "learning_rate": 6.639266939823477e-05, "loss": 0.3639, "step": 29700, "task_loss": 0.6727160215377808 }, { "compression_loss": 0.0, "distillation_loss": 0.3948644995689392, "epoch": 10.74, "learning_rate": 6.635744671351794e-05, "loss": 0.3736, "step": 29710, "task_loss": 0.30169928073883057 }, { "compression_loss": 0.0, "distillation_loss": 0.5408639907836914, "epoch": 10.74, "learning_rate": 6.632206233926139e-05, "loss": 0.4084, "step": 29720, "task_loss": 0.41215962171554565 }, { "compression_loss": 0.0, "distillation_loss": 0.3110385537147522, "epoch": 10.74, "learning_rate": 6.628651645791885e-05, "loss": 0.3506, "step": 29730, "task_loss": 0.32455557584762573 }, { "compression_loss": 0.0, "distillation_loss": 0.4405866861343384, "epoch": 10.75, "learning_rate": 6.625080925277681e-05, "loss": 0.3797, "step": 29740, "task_loss": 0.5964502096176147 }, { "compression_loss": 0.0, "distillation_loss": 0.3487781882286072, "epoch": 10.75, "learning_rate": 6.621494090795367e-05, "loss": 0.3952, "step": 29750, "task_loss": 0.4532567262649536 }, { "epoch": 10.75, "eval_exact_match": 82.44087038789026, "eval_f1": 89.45854412702076, "step": 29750 }, { "compression_loss": 0.0, "distillation_loss": 0.28924113512039185, "epoch": 10.76, "learning_rate": 6.617891160839865e-05, "loss": 0.3319, "step": 29760, "task_loss": 0.6428242921829224 }, { "compression_loss": 0.0, "distillation_loss": 0.3873733580112457, "epoch": 10.76, "learning_rate": 6.614272153989095e-05, "loss": 0.3655, "step": 29770, "task_loss": 0.48227518796920776 }, { "compression_loss": 0.0, "distillation_loss": 0.3506860136985779, "epoch": 10.76, "learning_rate": 6.61063708890387e-05, "loss": 0.3383, "step": 29780, "task_loss": 0.4459092915058136 }, { "compression_loss": 0.0, "distillation_loss": 0.29523494839668274, "epoch": 10.77, "learning_rate": 6.606985984327813e-05, "loss": 0.3294, "step": 29790, "task_loss": 0.5016797780990601 }, { "compression_loss": 0.0, "distillation_loss": 0.3241945803165436, "epoch": 10.77, "learning_rate": 6.603318859087243e-05, "loss": 0.355, "step": 29800, "task_loss": 0.35577547550201416 }, { "compression_loss": 0.0, "distillation_loss": 0.30834487080574036, "epoch": 10.77, "learning_rate": 6.599635732091092e-05, "loss": 0.3293, "step": 29810, "task_loss": 0.6570316553115845 }, { "compression_loss": 0.0, "distillation_loss": 0.4538179636001587, "epoch": 10.78, "learning_rate": 6.595936622330802e-05, "loss": 0.3694, "step": 29820, "task_loss": 1.0713361501693726 }, { "compression_loss": 0.0, "distillation_loss": 0.3451743721961975, "epoch": 10.78, "learning_rate": 6.592221548880224e-05, "loss": 0.3395, "step": 29830, "task_loss": 0.3584628403186798 }, { "compression_loss": 0.0, "distillation_loss": 0.5398263931274414, "epoch": 10.78, "learning_rate": 6.58849053089553e-05, "loss": 0.3693, "step": 29840, "task_loss": 0.9976974725723267 }, { "compression_loss": 0.0, "distillation_loss": 0.4781019687652588, "epoch": 10.79, "learning_rate": 6.584743587615102e-05, "loss": 0.3554, "step": 29850, "task_loss": 0.7893520593643188 }, { "compression_loss": 0.0, "distillation_loss": 0.27053534984588623, "epoch": 10.79, "learning_rate": 6.580980738359438e-05, "loss": 0.383, "step": 29860, "task_loss": 0.35242271423339844 }, { "compression_loss": 0.0, "distillation_loss": 0.3637334704399109, "epoch": 10.8, "learning_rate": 6.577202002531056e-05, "loss": 0.3162, "step": 29870, "task_loss": 0.5553810596466064 }, { "compression_loss": 0.0, "distillation_loss": 0.3257104754447937, "epoch": 10.8, "learning_rate": 6.573407399614388e-05, "loss": 0.3478, "step": 29880, "task_loss": 0.5849871635437012 }, { "compression_loss": 0.0, "distillation_loss": 0.29262852668762207, "epoch": 10.8, "learning_rate": 6.569596949175681e-05, "loss": 0.3707, "step": 29890, "task_loss": 0.6378922462463379 }, { "compression_loss": 0.0, "distillation_loss": 0.28541576862335205, "epoch": 10.81, "learning_rate": 6.5657706708629e-05, "loss": 0.3268, "step": 29900, "task_loss": 0.3432478904724121 }, { "compression_loss": 0.0, "distillation_loss": 0.455171138048172, "epoch": 10.81, "learning_rate": 6.561928584405624e-05, "loss": 0.3743, "step": 29910, "task_loss": 0.7067415714263916 }, { "compression_loss": 0.0, "distillation_loss": 0.4353346824645996, "epoch": 10.81, "learning_rate": 6.558070709614942e-05, "loss": 0.3513, "step": 29920, "task_loss": 0.6395390629768372 }, { "compression_loss": 0.0, "distillation_loss": 0.313425213098526, "epoch": 10.82, "learning_rate": 6.55419706638335e-05, "loss": 0.3437, "step": 29930, "task_loss": 1.3602879047393799 }, { "compression_loss": 0.0, "distillation_loss": 0.3399485647678375, "epoch": 10.82, "learning_rate": 6.550307674684662e-05, "loss": 0.3678, "step": 29940, "task_loss": 0.3591665029525757 }, { "compression_loss": 0.0, "distillation_loss": 0.3630330562591553, "epoch": 10.82, "learning_rate": 6.546402554573885e-05, "loss": 0.3401, "step": 29950, "task_loss": 0.3910972774028778 }, { "compression_loss": 0.0, "distillation_loss": 0.47807592153549194, "epoch": 10.83, "learning_rate": 6.54248172618713e-05, "loss": 0.3686, "step": 29960, "task_loss": 0.49961668252944946 }, { "compression_loss": 0.0, "distillation_loss": 0.3700833320617676, "epoch": 10.83, "learning_rate": 6.538545209741511e-05, "loss": 0.3815, "step": 29970, "task_loss": 0.7051327228546143 }, { "compression_loss": 0.0, "distillation_loss": 0.28954607248306274, "epoch": 10.83, "learning_rate": 6.534593025535028e-05, "loss": 0.3727, "step": 29980, "task_loss": 0.5030256509780884 }, { "compression_loss": 0.0, "distillation_loss": 0.3162761926651001, "epoch": 10.84, "learning_rate": 6.530625193946472e-05, "loss": 0.3309, "step": 29990, "task_loss": 0.4224050045013428 }, { "compression_loss": 0.0, "distillation_loss": 0.43589645624160767, "epoch": 10.84, "learning_rate": 6.526641735435317e-05, "loss": 0.3636, "step": 30000, "task_loss": 0.6760845184326172 }, { "epoch": 10.84, "eval_exact_match": 82.28949858088932, "eval_f1": 89.30660591571832, "step": 30000 }, { "compression_loss": 0.0, "distillation_loss": 0.47988027334213257, "epoch": 10.85, "learning_rate": 6.522642670541613e-05, "loss": 0.3823, "step": 30010, "task_loss": 0.8904911279678345 }, { "compression_loss": 0.0, "distillation_loss": 0.2862057089805603, "epoch": 10.85, "learning_rate": 6.518628019885888e-05, "loss": 0.3367, "step": 30020, "task_loss": 0.6241217851638794 }, { "compression_loss": 0.0, "distillation_loss": 0.3560543656349182, "epoch": 10.85, "learning_rate": 6.514597804169025e-05, "loss": 0.3666, "step": 30030, "task_loss": 0.6103063821792603 }, { "compression_loss": 0.0, "distillation_loss": 0.6194554567337036, "epoch": 10.86, "learning_rate": 6.510552044172176e-05, "loss": 0.412, "step": 30040, "task_loss": 0.8107380270957947 }, { "compression_loss": 0.0, "distillation_loss": 0.3812370300292969, "epoch": 10.86, "learning_rate": 6.506490760756639e-05, "loss": 0.3513, "step": 30050, "task_loss": 0.5243598818778992 }, { "compression_loss": 0.0, "distillation_loss": 0.3137257695198059, "epoch": 10.86, "learning_rate": 6.502413974863753e-05, "loss": 0.3283, "step": 30060, "task_loss": 0.411754310131073 }, { "compression_loss": 0.0, "distillation_loss": 0.2372526228427887, "epoch": 10.87, "learning_rate": 6.498321707514802e-05, "loss": 0.3206, "step": 30070, "task_loss": 0.6143356561660767 }, { "compression_loss": 0.0, "distillation_loss": 0.36465486884117126, "epoch": 10.87, "learning_rate": 6.494213979810891e-05, "loss": 0.3486, "step": 30080, "task_loss": 0.6931987404823303 }, { "compression_loss": 0.0, "distillation_loss": 0.5073741674423218, "epoch": 10.87, "learning_rate": 6.490090812932844e-05, "loss": 0.3895, "step": 30090, "task_loss": 0.9303778409957886 }, { "compression_loss": 0.0, "distillation_loss": 0.2504512071609497, "epoch": 10.88, "learning_rate": 6.485952228141097e-05, "loss": 0.3479, "step": 30100, "task_loss": 0.41266775131225586 }, { "compression_loss": 0.0, "distillation_loss": 0.3641784191131592, "epoch": 10.88, "learning_rate": 6.481798246775586e-05, "loss": 0.3401, "step": 30110, "task_loss": 0.5535559058189392 }, { "compression_loss": 0.0, "distillation_loss": 0.46111181378364563, "epoch": 10.89, "learning_rate": 6.477628890255634e-05, "loss": 0.3689, "step": 30120, "task_loss": 0.7662212252616882 }, { "compression_loss": 0.0, "distillation_loss": 0.36023229360580444, "epoch": 10.89, "learning_rate": 6.473444180079845e-05, "loss": 0.3828, "step": 30130, "task_loss": 0.5160745978355408 }, { "compression_loss": 0.0, "distillation_loss": 0.33962923288345337, "epoch": 10.89, "learning_rate": 6.469244137825993e-05, "loss": 0.3318, "step": 30140, "task_loss": 0.8273515701293945 }, { "compression_loss": 0.0, "distillation_loss": 0.3233642280101776, "epoch": 10.9, "learning_rate": 6.465028785150908e-05, "loss": 0.3746, "step": 30150, "task_loss": 0.4669772982597351 }, { "compression_loss": 0.0, "distillation_loss": 0.39144349098205566, "epoch": 10.9, "learning_rate": 6.460798143790366e-05, "loss": 0.332, "step": 30160, "task_loss": 0.5270528793334961 }, { "compression_loss": 0.0, "distillation_loss": 0.44592922925949097, "epoch": 10.9, "learning_rate": 6.456552235558976e-05, "loss": 0.3701, "step": 30170, "task_loss": 1.0220377445220947 }, { "compression_loss": 0.0, "distillation_loss": 0.3141802251338959, "epoch": 10.91, "learning_rate": 6.452291082350068e-05, "loss": 0.3544, "step": 30180, "task_loss": 0.2899113893508911 }, { "compression_loss": 0.0, "distillation_loss": 0.28677743673324585, "epoch": 10.91, "learning_rate": 6.448014706135582e-05, "loss": 0.3924, "step": 30190, "task_loss": 0.26135581731796265 }, { "compression_loss": 0.0, "distillation_loss": 0.32510054111480713, "epoch": 10.91, "learning_rate": 6.443723128965951e-05, "loss": 0.3469, "step": 30200, "task_loss": 0.88126540184021 }, { "compression_loss": 0.0, "distillation_loss": 0.3127478361129761, "epoch": 10.92, "learning_rate": 6.439416372969992e-05, "loss": 0.3498, "step": 30210, "task_loss": 0.8732039332389832 }, { "compression_loss": 0.0, "distillation_loss": 0.2943447232246399, "epoch": 10.92, "learning_rate": 6.435094460354784e-05, "loss": 0.3535, "step": 30220, "task_loss": 0.645883321762085 }, { "compression_loss": 0.0, "distillation_loss": 0.3853033185005188, "epoch": 10.93, "learning_rate": 6.430757413405562e-05, "loss": 0.3697, "step": 30230, "task_loss": 0.7212446928024292 }, { "compression_loss": 0.0, "distillation_loss": 0.3296411633491516, "epoch": 10.93, "learning_rate": 6.426405254485603e-05, "loss": 0.3912, "step": 30240, "task_loss": 0.6896770000457764 }, { "compression_loss": 0.0, "distillation_loss": 0.31863152980804443, "epoch": 10.93, "learning_rate": 6.422038006036097e-05, "loss": 0.3574, "step": 30250, "task_loss": 0.39320269227027893 }, { "epoch": 10.93, "eval_exact_match": 82.65846736045411, "eval_f1": 89.52884792493833, "step": 30250 }, { "compression_loss": 0.0, "distillation_loss": 0.4146077036857605, "epoch": 10.94, "learning_rate": 6.417655690576046e-05, "loss": 0.4061, "step": 30260, "task_loss": 0.8556808233261108 }, { "compression_loss": 0.0, "distillation_loss": 0.36979159712791443, "epoch": 10.94, "learning_rate": 6.413258330702145e-05, "loss": 0.377, "step": 30270, "task_loss": 0.4253246486186981 }, { "compression_loss": 0.0, "distillation_loss": 0.30879682302474976, "epoch": 10.94, "learning_rate": 6.408845949088657e-05, "loss": 0.3524, "step": 30280, "task_loss": 0.4455922245979309 }, { "compression_loss": 0.0, "distillation_loss": 0.32917648553848267, "epoch": 10.95, "learning_rate": 6.404418568487308e-05, "loss": 0.3711, "step": 30290, "task_loss": 0.7015597820281982 }, { "compression_loss": 0.0, "distillation_loss": 0.33461427688598633, "epoch": 10.95, "learning_rate": 6.39997621172716e-05, "loss": 0.3623, "step": 30300, "task_loss": 0.3214136064052582 }, { "compression_loss": 0.0, "distillation_loss": 0.3308192491531372, "epoch": 10.95, "learning_rate": 6.395518901714497e-05, "loss": 0.3402, "step": 30310, "task_loss": 0.5261917114257812 }, { "compression_loss": 0.0, "distillation_loss": 0.3672281503677368, "epoch": 10.96, "learning_rate": 6.391046661432711e-05, "loss": 0.3823, "step": 30320, "task_loss": 0.616168737411499 }, { "compression_loss": 0.0, "distillation_loss": 0.4320986270904541, "epoch": 10.96, "learning_rate": 6.386559513942175e-05, "loss": 0.3622, "step": 30330, "task_loss": 0.6258487701416016 }, { "compression_loss": 0.0, "distillation_loss": 0.38706454634666443, "epoch": 10.96, "learning_rate": 6.38205748238013e-05, "loss": 0.3706, "step": 30340, "task_loss": 0.9309755563735962 }, { "compression_loss": 0.0, "distillation_loss": 0.35249724984169006, "epoch": 10.97, "learning_rate": 6.377540589960567e-05, "loss": 0.3604, "step": 30350, "task_loss": 0.4586070775985718 }, { "compression_loss": 0.0, "distillation_loss": 0.31285613775253296, "epoch": 10.97, "learning_rate": 6.373008859974099e-05, "loss": 0.3438, "step": 30360, "task_loss": 0.6232818365097046 }, { "compression_loss": 0.0, "distillation_loss": 0.3567231297492981, "epoch": 10.98, "learning_rate": 6.36846231578785e-05, "loss": 0.3905, "step": 30370, "task_loss": 0.9104863405227661 }, { "compression_loss": 0.0, "distillation_loss": 0.21704626083374023, "epoch": 10.98, "learning_rate": 6.363900980845333e-05, "loss": 0.3709, "step": 30380, "task_loss": 0.4989585876464844 }, { "compression_loss": 0.0, "distillation_loss": 0.35998252034187317, "epoch": 10.98, "learning_rate": 6.359324878666324e-05, "loss": 0.3687, "step": 30390, "task_loss": 0.31220775842666626 }, { "compression_loss": 0.0, "distillation_loss": 0.24193412065505981, "epoch": 10.99, "learning_rate": 6.354734032846744e-05, "loss": 0.374, "step": 30400, "task_loss": 0.6692792177200317 }, { "compression_loss": 0.0, "distillation_loss": 0.3294738829135895, "epoch": 10.99, "learning_rate": 6.350128467058539e-05, "loss": 0.3461, "step": 30410, "task_loss": 0.4882463812828064 }, { "compression_loss": 0.0, "distillation_loss": 0.35924404859542847, "epoch": 10.99, "learning_rate": 6.345508205049552e-05, "loss": 0.3747, "step": 30420, "task_loss": 0.5941590070724487 }, { "compression_loss": 0.0, "distillation_loss": 0.3159148693084717, "epoch": 11.0, "learning_rate": 6.340873270643411e-05, "loss": 0.3673, "step": 30430, "task_loss": 0.6530588865280151 }, { "compression_loss": 0.0, "distillation_loss": 0.40394383668899536, "epoch": 11.0, "learning_rate": 6.336223687739394e-05, "loss": 0.3517, "step": 30440, "task_loss": 0.5973001718521118 }, { "compression_loss": 0.0, "distillation_loss": 0.2577652335166931, "epoch": 11.0, "learning_rate": 6.331559480312315e-05, "loss": 0.3301, "step": 30450, "task_loss": 0.4629456698894501 }, { "compression_loss": 0.0, "distillation_loss": 0.18141883611679077, "epoch": 11.01, "learning_rate": 6.326880672412396e-05, "loss": 0.3134, "step": 30460, "task_loss": 0.34045690298080444 }, { "compression_loss": 0.0, "distillation_loss": 0.33387744426727295, "epoch": 11.01, "learning_rate": 6.322187288165144e-05, "loss": 0.3303, "step": 30470, "task_loss": 0.6234238743782043 }, { "compression_loss": 0.0, "distillation_loss": 0.30332377552986145, "epoch": 11.02, "learning_rate": 6.317479351771226e-05, "loss": 0.3449, "step": 30480, "task_loss": 0.3982439637184143 }, { "compression_loss": 0.0, "distillation_loss": 0.2568157911300659, "epoch": 11.02, "learning_rate": 6.312756887506345e-05, "loss": 0.3402, "step": 30490, "task_loss": 0.42569881677627563 }, { "compression_loss": 0.0, "distillation_loss": 0.34609246253967285, "epoch": 11.02, "learning_rate": 6.308019919721113e-05, "loss": 0.3115, "step": 30500, "task_loss": 0.29865169525146484 }, { "epoch": 11.02, "eval_exact_match": 82.52601702932829, "eval_f1": 89.58780871354091, "step": 30500 }, { "compression_loss": 0.0, "distillation_loss": 0.30228573083877563, "epoch": 11.03, "learning_rate": 6.303268472840934e-05, "loss": 0.3476, "step": 30510, "task_loss": 0.5901381969451904 }, { "compression_loss": 0.0, "distillation_loss": 0.39503413438796997, "epoch": 11.03, "learning_rate": 6.29850257136586e-05, "loss": 0.3726, "step": 30520, "task_loss": 0.6809341907501221 }, { "compression_loss": 0.0, "distillation_loss": 0.34292078018188477, "epoch": 11.03, "learning_rate": 6.293722239870485e-05, "loss": 0.3374, "step": 30530, "task_loss": 0.6341568827629089 }, { "compression_loss": 0.0, "distillation_loss": 0.2626557946205139, "epoch": 11.04, "learning_rate": 6.288927503003805e-05, "loss": 0.3437, "step": 30540, "task_loss": 0.8239531517028809 }, { "compression_loss": 0.0, "distillation_loss": 0.25144004821777344, "epoch": 11.04, "learning_rate": 6.284118385489095e-05, "loss": 0.3219, "step": 30550, "task_loss": 0.44134482741355896 }, { "compression_loss": 0.0, "distillation_loss": 0.3792293667793274, "epoch": 11.04, "learning_rate": 6.279294912123784e-05, "loss": 0.3665, "step": 30560, "task_loss": 0.43918779492378235 }, { "compression_loss": 0.0, "distillation_loss": 0.5096983313560486, "epoch": 11.05, "learning_rate": 6.27445710777932e-05, "loss": 0.3373, "step": 30570, "task_loss": 0.5208146572113037 }, { "compression_loss": 0.0, "distillation_loss": 0.4361113905906677, "epoch": 11.05, "learning_rate": 6.269604997401051e-05, "loss": 0.3685, "step": 30580, "task_loss": 0.9114473462104797 }, { "compression_loss": 0.0, "distillation_loss": 0.3823733329772949, "epoch": 11.06, "learning_rate": 6.264738606008087e-05, "loss": 0.3577, "step": 30590, "task_loss": 0.5972809195518494 }, { "compression_loss": 0.0, "distillation_loss": 0.2892557382583618, "epoch": 11.06, "learning_rate": 6.259857958693182e-05, "loss": 0.3116, "step": 30600, "task_loss": 0.4515109956264496 }, { "compression_loss": 0.0, "distillation_loss": 0.225788414478302, "epoch": 11.06, "learning_rate": 6.254963080622591e-05, "loss": 0.3125, "step": 30610, "task_loss": 0.40221279859542847 }, { "compression_loss": 0.0, "distillation_loss": 0.31782832741737366, "epoch": 11.07, "learning_rate": 6.250053997035956e-05, "loss": 0.3688, "step": 30620, "task_loss": 0.5184889435768127 }, { "compression_loss": 0.0, "distillation_loss": 0.33337557315826416, "epoch": 11.07, "learning_rate": 6.245130733246159e-05, "loss": 0.3247, "step": 30630, "task_loss": 0.5070463418960571 }, { "compression_loss": 0.0, "distillation_loss": 0.33691734075546265, "epoch": 11.07, "learning_rate": 6.240193314639205e-05, "loss": 0.328, "step": 30640, "task_loss": 0.4712425470352173 }, { "compression_loss": 0.0, "distillation_loss": 0.3276863694190979, "epoch": 11.08, "learning_rate": 6.235241766674084e-05, "loss": 0.3553, "step": 30650, "task_loss": 0.6992507576942444 }, { "compression_loss": 0.0, "distillation_loss": 0.4350316822528839, "epoch": 11.08, "learning_rate": 6.230276114882642e-05, "loss": 0.369, "step": 30660, "task_loss": 0.7506589889526367 }, { "compression_loss": 0.0, "distillation_loss": 0.32287871837615967, "epoch": 11.08, "learning_rate": 6.225296384869451e-05, "loss": 0.3442, "step": 30670, "task_loss": 0.5573731660842896 }, { "compression_loss": 0.0, "distillation_loss": 0.275474488735199, "epoch": 11.09, "learning_rate": 6.220302602311674e-05, "loss": 0.3197, "step": 30680, "task_loss": 0.7555659413337708 }, { "compression_loss": 0.0, "distillation_loss": 0.21844923496246338, "epoch": 11.09, "learning_rate": 6.21529479295893e-05, "loss": 0.3129, "step": 30690, "task_loss": 0.4042033851146698 }, { "compression_loss": 0.0, "distillation_loss": 0.3810504376888275, "epoch": 11.1, "learning_rate": 6.21027298263317e-05, "loss": 0.3421, "step": 30700, "task_loss": 0.4075656235218048 }, { "compression_loss": 0.0, "distillation_loss": 0.2908656597137451, "epoch": 11.1, "learning_rate": 6.205237197228537e-05, "loss": 0.3134, "step": 30710, "task_loss": 0.87837815284729 }, { "compression_loss": 0.0, "distillation_loss": 0.321632444858551, "epoch": 11.1, "learning_rate": 6.200187462711232e-05, "loss": 0.3111, "step": 30720, "task_loss": 0.4441836476325989 }, { "compression_loss": 0.0, "distillation_loss": 0.25605639815330505, "epoch": 11.11, "learning_rate": 6.195123805119386e-05, "loss": 0.3282, "step": 30730, "task_loss": 0.5329892039299011 }, { "compression_loss": 0.0, "distillation_loss": 0.3184843063354492, "epoch": 11.11, "learning_rate": 6.19004625056292e-05, "loss": 0.3756, "step": 30740, "task_loss": 0.49759477376937866 }, { "compression_loss": 0.0, "distillation_loss": 0.2887711226940155, "epoch": 11.11, "learning_rate": 6.184954825223412e-05, "loss": 0.3403, "step": 30750, "task_loss": 0.390583336353302 }, { "epoch": 11.11, "eval_exact_match": 82.57332071901608, "eval_f1": 89.61192290821421, "step": 30750 }, { "compression_loss": 0.0, "distillation_loss": 0.4305284321308136, "epoch": 11.12, "learning_rate": 6.179849555353966e-05, "loss": 0.3243, "step": 30760, "task_loss": 0.6925809383392334 }, { "compression_loss": 0.0, "distillation_loss": 0.3064586818218231, "epoch": 11.12, "learning_rate": 6.174730467279065e-05, "loss": 0.3077, "step": 30770, "task_loss": 0.49236008524894714 }, { "compression_loss": 0.0, "distillation_loss": 0.37597641348838806, "epoch": 11.12, "learning_rate": 6.169597587394453e-05, "loss": 0.311, "step": 30780, "task_loss": 1.0985610485076904 }, { "compression_loss": 0.0, "distillation_loss": 0.25383347272872925, "epoch": 11.13, "learning_rate": 6.164450942166983e-05, "loss": 0.302, "step": 30790, "task_loss": 0.2893102169036865 }, { "compression_loss": 0.0, "distillation_loss": 0.29239368438720703, "epoch": 11.13, "learning_rate": 6.159290558134487e-05, "loss": 0.3136, "step": 30800, "task_loss": 0.43395623564720154 }, { "compression_loss": 0.0, "distillation_loss": 0.3338686227798462, "epoch": 11.13, "learning_rate": 6.154116461905642e-05, "loss": 0.2958, "step": 30810, "task_loss": 0.33142733573913574 }, { "compression_loss": 0.0, "distillation_loss": 0.4973904490470886, "epoch": 11.14, "learning_rate": 6.14892868015983e-05, "loss": 0.3597, "step": 30820, "task_loss": 0.8295789957046509 }, { "compression_loss": 0.0, "distillation_loss": 0.3147459030151367, "epoch": 11.14, "learning_rate": 6.143727239646995e-05, "loss": 0.3328, "step": 30830, "task_loss": 0.5006129145622253 }, { "compression_loss": 0.0, "distillation_loss": 0.37410253286361694, "epoch": 11.15, "learning_rate": 6.138512167187514e-05, "loss": 0.322, "step": 30840, "task_loss": 0.4317512512207031 }, { "compression_loss": 0.0, "distillation_loss": 0.26965123414993286, "epoch": 11.15, "learning_rate": 6.133283489672054e-05, "loss": 0.3166, "step": 30850, "task_loss": 0.7003058791160583 }, { "compression_loss": 0.0, "distillation_loss": 0.3355432450771332, "epoch": 11.15, "learning_rate": 6.128041234061437e-05, "loss": 0.3247, "step": 30860, "task_loss": 0.6356285810470581 }, { "compression_loss": 0.0, "distillation_loss": 0.3858422636985779, "epoch": 11.16, "learning_rate": 6.122785427386493e-05, "loss": 0.3423, "step": 30870, "task_loss": 0.6549724340438843 }, { "compression_loss": 0.0, "distillation_loss": 0.28326427936553955, "epoch": 11.16, "learning_rate": 6.117516096747929e-05, "loss": 0.3442, "step": 30880, "task_loss": 0.4158441424369812 }, { "compression_loss": 0.0, "distillation_loss": 0.24142545461654663, "epoch": 11.16, "learning_rate": 6.112233269316187e-05, "loss": 0.2961, "step": 30890, "task_loss": 0.7818669080734253 }, { "compression_loss": 0.0, "distillation_loss": 0.3277119994163513, "epoch": 11.17, "learning_rate": 6.106936972331298e-05, "loss": 0.3308, "step": 30900, "task_loss": 0.7212725877761841 }, { "compression_loss": 0.0, "distillation_loss": 0.26108160614967346, "epoch": 11.17, "learning_rate": 6.101627233102756e-05, "loss": 0.3228, "step": 30910, "task_loss": 0.43422454595565796 }, { "compression_loss": 0.0, "distillation_loss": 0.28188270330429077, "epoch": 11.17, "learning_rate": 6.096304079009358e-05, "loss": 0.3063, "step": 30920, "task_loss": 0.39963170886039734 }, { "compression_loss": 0.0, "distillation_loss": 0.3516286015510559, "epoch": 11.18, "learning_rate": 6.090967537499077e-05, "loss": 0.3139, "step": 30930, "task_loss": 0.4946768879890442 }, { "compression_loss": 0.0, "distillation_loss": 0.4153703451156616, "epoch": 11.18, "learning_rate": 6.085617636088917e-05, "loss": 0.3593, "step": 30940, "task_loss": 0.6110119223594666 }, { "compression_loss": 0.0, "distillation_loss": 0.3178633451461792, "epoch": 11.19, "learning_rate": 6.080254402364767e-05, "loss": 0.3265, "step": 30950, "task_loss": 0.6084519624710083 }, { "compression_loss": 0.0, "distillation_loss": 0.29074785113334656, "epoch": 11.19, "learning_rate": 6.074877863981264e-05, "loss": 0.3544, "step": 30960, "task_loss": 0.6702461242675781 }, { "compression_loss": 0.0, "distillation_loss": 0.3396759331226349, "epoch": 11.19, "learning_rate": 6.06948804866165e-05, "loss": 0.3124, "step": 30970, "task_loss": 0.5206983685493469 }, { "compression_loss": 0.0, "distillation_loss": 0.25989753007888794, "epoch": 11.2, "learning_rate": 6.0640849841976206e-05, "loss": 0.3126, "step": 30980, "task_loss": 0.25708910822868347 }, { "compression_loss": 0.0, "distillation_loss": 0.2514250874519348, "epoch": 11.2, "learning_rate": 6.058668698449197e-05, "loss": 0.3122, "step": 30990, "task_loss": 0.6891814470291138 }, { "compression_loss": 0.0, "distillation_loss": 0.25748398900032043, "epoch": 11.2, "learning_rate": 6.0532392193445684e-05, "loss": 0.3463, "step": 31000, "task_loss": 0.2810893654823303 }, { "epoch": 11.2, "eval_exact_match": 82.61116367076632, "eval_f1": 89.39437638190044, "step": 31000 }, { "compression_loss": 0.0, "distillation_loss": 0.3203161358833313, "epoch": 11.21, "learning_rate": 6.047796574879955e-05, "loss": 0.3683, "step": 31010, "task_loss": 0.6424096822738647 }, { "compression_loss": 0.0, "distillation_loss": 0.2515981197357178, "epoch": 11.21, "learning_rate": 6.0423407931194616e-05, "loss": 0.3159, "step": 31020, "task_loss": 0.38904130458831787 }, { "compression_loss": 0.0, "distillation_loss": 0.21264244616031647, "epoch": 11.21, "learning_rate": 6.036871902194934e-05, "loss": 0.3023, "step": 31030, "task_loss": 0.4546867609024048 }, { "compression_loss": 0.0, "distillation_loss": 0.36801475286483765, "epoch": 11.22, "learning_rate": 6.031389930305813e-05, "loss": 0.3127, "step": 31040, "task_loss": 0.9397456645965576 }, { "compression_loss": 0.0, "distillation_loss": 0.2525968551635742, "epoch": 11.22, "learning_rate": 6.025894905718988e-05, "loss": 0.3171, "step": 31050, "task_loss": 0.3194689154624939 }, { "compression_loss": 0.0, "distillation_loss": 0.3747779130935669, "epoch": 11.23, "learning_rate": 6.0203868567686544e-05, "loss": 0.3413, "step": 31060, "task_loss": 0.3353426456451416 }, { "compression_loss": 0.0, "distillation_loss": 0.3535149097442627, "epoch": 11.23, "learning_rate": 6.014865811856164e-05, "loss": 0.3619, "step": 31070, "task_loss": 0.5516100525856018 }, { "compression_loss": 0.0, "distillation_loss": 0.26662665605545044, "epoch": 11.23, "learning_rate": 6.009331799449884e-05, "loss": 0.3077, "step": 31080, "task_loss": 0.58455491065979 }, { "compression_loss": 0.0, "distillation_loss": 0.31872040033340454, "epoch": 11.24, "learning_rate": 6.003784848085037e-05, "loss": 0.3497, "step": 31090, "task_loss": 0.7595039010047913 }, { "compression_loss": 0.0, "distillation_loss": 0.348061740398407, "epoch": 11.24, "learning_rate": 5.998224986363576e-05, "loss": 0.337, "step": 31100, "task_loss": 0.7282963395118713 }, { "compression_loss": 0.0, "distillation_loss": 0.4052058160305023, "epoch": 11.24, "learning_rate": 5.992652242954014e-05, "loss": 0.3312, "step": 31110, "task_loss": 0.7850685119628906 }, { "compression_loss": 0.0, "distillation_loss": 0.22310039401054382, "epoch": 11.25, "learning_rate": 5.987066646591291e-05, "loss": 0.3091, "step": 31120, "task_loss": 0.4512239396572113 }, { "compression_loss": 0.0, "distillation_loss": 0.3533927798271179, "epoch": 11.25, "learning_rate": 5.9814682260766194e-05, "loss": 0.3151, "step": 31130, "task_loss": 0.6712297201156616 }, { "compression_loss": 0.0, "distillation_loss": 0.2797766923904419, "epoch": 11.25, "learning_rate": 5.975857010277339e-05, "loss": 0.3103, "step": 31140, "task_loss": 0.7752123475074768 }, { "compression_loss": 0.0, "distillation_loss": 0.25427502393722534, "epoch": 11.26, "learning_rate": 5.9702330281267646e-05, "loss": 0.3234, "step": 31150, "task_loss": 0.39408910274505615 }, { "compression_loss": 0.0, "distillation_loss": 0.31670671701431274, "epoch": 11.26, "learning_rate": 5.9645963086240405e-05, "loss": 0.3193, "step": 31160, "task_loss": 0.3765723705291748 }, { "compression_loss": 0.0, "distillation_loss": 0.38054102659225464, "epoch": 11.26, "learning_rate": 5.958946880833986e-05, "loss": 0.3457, "step": 31170, "task_loss": 0.794228196144104 }, { "compression_loss": 0.0, "distillation_loss": 0.38272950053215027, "epoch": 11.27, "learning_rate": 5.953284773886952e-05, "loss": 0.3371, "step": 31180, "task_loss": 0.8740808367729187 }, { "compression_loss": 0.0, "distillation_loss": 0.4635079801082611, "epoch": 11.27, "learning_rate": 5.947610016978664e-05, "loss": 0.3692, "step": 31190, "task_loss": 0.7308275699615479 }, { "compression_loss": 0.0, "distillation_loss": 0.5193721055984497, "epoch": 11.28, "learning_rate": 5.94192263937008e-05, "loss": 0.3636, "step": 31200, "task_loss": 0.47289326786994934 }, { "compression_loss": 0.0, "distillation_loss": 0.42167362570762634, "epoch": 11.28, "learning_rate": 5.936222670387228e-05, "loss": 0.3795, "step": 31210, "task_loss": 0.5789846777915955 }, { "compression_loss": 0.0, "distillation_loss": 0.17998720705509186, "epoch": 11.28, "learning_rate": 5.930510139421068e-05, "loss": 0.3465, "step": 31220, "task_loss": 0.12794144451618195 }, { "compression_loss": 0.0, "distillation_loss": 0.2598450183868408, "epoch": 11.29, "learning_rate": 5.924785075927328e-05, "loss": 0.295, "step": 31230, "task_loss": 0.22558437287807465 }, { "compression_loss": 0.0, "distillation_loss": 0.31792667508125305, "epoch": 11.29, "learning_rate": 5.919047509426362e-05, "loss": 0.3208, "step": 31240, "task_loss": 0.40455853939056396 }, { "compression_loss": 0.0, "distillation_loss": 0.27053987979888916, "epoch": 11.29, "learning_rate": 5.913297469502991e-05, "loss": 0.3305, "step": 31250, "task_loss": 0.8206726312637329 }, { "epoch": 11.29, "eval_exact_match": 82.97067171239357, "eval_f1": 89.6492007370016, "step": 31250 }, { "compression_loss": 0.0, "distillation_loss": 0.3249356746673584, "epoch": 11.3, "learning_rate": 5.907534985806355e-05, "loss": 0.3421, "step": 31260, "task_loss": 1.1308776140213013 }, { "compression_loss": 0.0, "distillation_loss": 0.3523188829421997, "epoch": 11.3, "learning_rate": 5.901760088049758e-05, "loss": 0.3354, "step": 31270, "task_loss": 0.3868599534034729 }, { "compression_loss": 0.0, "distillation_loss": 0.34525448083877563, "epoch": 11.3, "learning_rate": 5.895972806010516e-05, "loss": 0.3391, "step": 31280, "task_loss": 0.7810451984405518 }, { "compression_loss": 0.0, "distillation_loss": 0.297976553440094, "epoch": 11.31, "learning_rate": 5.890173169529798e-05, "loss": 0.2762, "step": 31290, "task_loss": 0.4372379779815674 }, { "compression_loss": 0.0, "distillation_loss": 0.252549409866333, "epoch": 11.31, "learning_rate": 5.884361208512483e-05, "loss": 0.3142, "step": 31300, "task_loss": 0.6390154361724854 }, { "compression_loss": 0.0, "distillation_loss": 0.24922886490821838, "epoch": 11.32, "learning_rate": 5.878536952926994e-05, "loss": 0.3699, "step": 31310, "task_loss": 0.2871250510215759 }, { "compression_loss": 0.0, "distillation_loss": 0.20339345932006836, "epoch": 11.32, "learning_rate": 5.872700432805154e-05, "loss": 0.2908, "step": 31320, "task_loss": 0.5091993808746338 }, { "compression_loss": 0.0, "distillation_loss": 0.24334312975406647, "epoch": 11.32, "learning_rate": 5.866851678242021e-05, "loss": 0.3197, "step": 31330, "task_loss": 0.23363611102104187 }, { "compression_loss": 0.0, "distillation_loss": 0.26076406240463257, "epoch": 11.33, "learning_rate": 5.86099071939574e-05, "loss": 0.3054, "step": 31340, "task_loss": 0.5186704397201538 }, { "compression_loss": 0.0, "distillation_loss": 0.20574674010276794, "epoch": 11.33, "learning_rate": 5.855117586487388e-05, "loss": 0.3113, "step": 31350, "task_loss": 0.47972744703292847 }, { "compression_loss": 0.0, "distillation_loss": 0.21276235580444336, "epoch": 11.33, "learning_rate": 5.849232309800812e-05, "loss": 0.3232, "step": 31360, "task_loss": 0.5652778148651123 }, { "compression_loss": 0.0, "distillation_loss": 0.2867165505886078, "epoch": 11.34, "learning_rate": 5.8433349196824764e-05, "loss": 0.3078, "step": 31370, "task_loss": 0.45484477281570435 }, { "compression_loss": 0.0, "distillation_loss": 0.47911810874938965, "epoch": 11.34, "learning_rate": 5.83742544654131e-05, "loss": 0.3217, "step": 31380, "task_loss": 0.4997098445892334 }, { "compression_loss": 0.0, "distillation_loss": 0.2617582082748413, "epoch": 11.34, "learning_rate": 5.831503920848542e-05, "loss": 0.3012, "step": 31390, "task_loss": 0.45006659626960754 }, { "compression_loss": 0.0, "distillation_loss": 0.33783483505249023, "epoch": 11.35, "learning_rate": 5.825570373137551e-05, "loss": 0.3159, "step": 31400, "task_loss": 0.509252667427063 }, { "compression_loss": 0.0, "distillation_loss": 0.29730767011642456, "epoch": 11.35, "learning_rate": 5.819624834003702e-05, "loss": 0.3595, "step": 31410, "task_loss": 0.6149415373802185 }, { "compression_loss": 0.0, "distillation_loss": 0.3289676308631897, "epoch": 11.36, "learning_rate": 5.8136673341041975e-05, "loss": 0.3254, "step": 31420, "task_loss": 0.471310019493103 }, { "compression_loss": 0.0, "distillation_loss": 0.21676236391067505, "epoch": 11.36, "learning_rate": 5.807697904157908e-05, "loss": 0.3152, "step": 31430, "task_loss": 0.29653164744377136 }, { "compression_loss": 0.0, "distillation_loss": 0.3164607584476471, "epoch": 11.36, "learning_rate": 5.801716574945222e-05, "loss": 0.3203, "step": 31440, "task_loss": 0.6140503883361816 }, { "compression_loss": 0.0, "distillation_loss": 0.3576650619506836, "epoch": 11.37, "learning_rate": 5.795723377307885e-05, "loss": 0.3473, "step": 31450, "task_loss": 0.5120994448661804 }, { "compression_loss": 0.0, "distillation_loss": 0.28827202320098877, "epoch": 11.37, "learning_rate": 5.789718342148839e-05, "loss": 0.317, "step": 31460, "task_loss": 0.6643293499946594 }, { "compression_loss": 0.0, "distillation_loss": 0.27452167868614197, "epoch": 11.37, "learning_rate": 5.783701500432064e-05, "loss": 0.3256, "step": 31470, "task_loss": 0.4244112968444824 }, { "compression_loss": 0.0, "distillation_loss": 0.3297208547592163, "epoch": 11.38, "learning_rate": 5.777672883182419e-05, "loss": 0.3106, "step": 31480, "task_loss": 0.4949456453323364 }, { "compression_loss": 0.0, "distillation_loss": 0.28091195225715637, "epoch": 11.38, "learning_rate": 5.771632521485482e-05, "loss": 0.3386, "step": 31490, "task_loss": 0.4345235526561737 }, { "compression_loss": 0.0, "distillation_loss": 0.22229045629501343, "epoch": 11.38, "learning_rate": 5.76558044648739e-05, "loss": 0.3329, "step": 31500, "task_loss": 0.3153355121612549 }, { "epoch": 11.38, "eval_exact_match": 82.72469252601702, "eval_f1": 89.59696763050977, "step": 31500 }, { "compression_loss": 0.0, "distillation_loss": 0.22494566440582275, "epoch": 11.39, "learning_rate": 5.7595166893946776e-05, "loss": 0.325, "step": 31510, "task_loss": 0.46599242091178894 }, { "compression_loss": 0.0, "distillation_loss": 0.302801251411438, "epoch": 11.39, "learning_rate": 5.7534412814741126e-05, "loss": 0.3226, "step": 31520, "task_loss": 0.5004099607467651 }, { "compression_loss": 0.0, "distillation_loss": 0.24632063508033752, "epoch": 11.4, "learning_rate": 5.747354254052542e-05, "loss": 0.3322, "step": 31530, "task_loss": 0.31375330686569214 }, { "compression_loss": 0.0, "distillation_loss": 0.3572053015232086, "epoch": 11.4, "learning_rate": 5.741255638516727e-05, "loss": 0.313, "step": 31540, "task_loss": 0.5052705407142639 }, { "compression_loss": 0.0, "distillation_loss": 0.2660755217075348, "epoch": 11.4, "learning_rate": 5.7351454663131803e-05, "loss": 0.289, "step": 31550, "task_loss": 0.6535313129425049 }, { "compression_loss": 0.0, "distillation_loss": 0.2776443064212799, "epoch": 11.41, "learning_rate": 5.729023768948003e-05, "loss": 0.3646, "step": 31560, "task_loss": 0.2539638578891754 }, { "compression_loss": 0.0, "distillation_loss": 0.437635600566864, "epoch": 11.41, "learning_rate": 5.7228905779867264e-05, "loss": 0.3812, "step": 31570, "task_loss": 0.6456525325775146 }, { "compression_loss": 0.0, "distillation_loss": 0.29459071159362793, "epoch": 11.41, "learning_rate": 5.7167459250541455e-05, "loss": 0.4021, "step": 31580, "task_loss": 0.6220089197158813 }, { "compression_loss": 0.0, "distillation_loss": 0.2666949927806854, "epoch": 11.42, "learning_rate": 5.710589841834156e-05, "loss": 0.3197, "step": 31590, "task_loss": 0.3189229965209961 }, { "compression_loss": 0.0, "distillation_loss": 0.3018212914466858, "epoch": 11.42, "learning_rate": 5.704422360069595e-05, "loss": 0.3131, "step": 31600, "task_loss": 0.5461843013763428 }, { "compression_loss": 0.0, "distillation_loss": 0.20376259088516235, "epoch": 11.42, "learning_rate": 5.69824351156207e-05, "loss": 0.3089, "step": 31610, "task_loss": 0.4694615602493286 }, { "compression_loss": 0.0, "distillation_loss": 0.30408358573913574, "epoch": 11.43, "learning_rate": 5.692053328171803e-05, "loss": 0.3011, "step": 31620, "task_loss": 0.26619333028793335 }, { "compression_loss": 0.0, "distillation_loss": 0.2672536075115204, "epoch": 11.43, "learning_rate": 5.685851841817462e-05, "loss": 0.3006, "step": 31630, "task_loss": 0.3129875063896179 }, { "compression_loss": 0.0, "distillation_loss": 0.39254266023635864, "epoch": 11.43, "learning_rate": 5.679639084475993e-05, "loss": 0.3127, "step": 31640, "task_loss": 0.5990224480628967 }, { "compression_loss": 0.0, "distillation_loss": 0.3391401171684265, "epoch": 11.44, "learning_rate": 5.6734150881824656e-05, "loss": 0.3575, "step": 31650, "task_loss": 0.7453941106796265 }, { "compression_loss": 0.0, "distillation_loss": 0.38718274235725403, "epoch": 11.44, "learning_rate": 5.667179885029895e-05, "loss": 0.3365, "step": 31660, "task_loss": 0.9288395047187805 }, { "compression_loss": 0.0, "distillation_loss": 0.43893879652023315, "epoch": 11.45, "learning_rate": 5.660933507169086e-05, "loss": 0.3261, "step": 31670, "task_loss": 0.6180750131607056 }, { "compression_loss": 0.0, "distillation_loss": 0.32078641653060913, "epoch": 11.45, "learning_rate": 5.654675986808465e-05, "loss": 0.3462, "step": 31680, "task_loss": 0.8182356953620911 }, { "compression_loss": 0.0, "distillation_loss": 0.32552042603492737, "epoch": 11.45, "learning_rate": 5.6484073562139083e-05, "loss": 0.3571, "step": 31690, "task_loss": 0.369057297706604 }, { "compression_loss": 0.0, "distillation_loss": 0.29520851373672485, "epoch": 11.46, "learning_rate": 5.642127647708586e-05, "loss": 0.3246, "step": 31700, "task_loss": 0.45395147800445557 }, { "compression_loss": 0.0, "distillation_loss": 0.3191979229450226, "epoch": 11.46, "learning_rate": 5.635836893672784e-05, "loss": 0.3323, "step": 31710, "task_loss": 0.4409838318824768 }, { "compression_loss": 0.0, "distillation_loss": 0.22453919053077698, "epoch": 11.46, "learning_rate": 5.6295351265437475e-05, "loss": 0.3511, "step": 31720, "task_loss": 0.3913407623767853 }, { "compression_loss": 0.0, "distillation_loss": 0.24045340716838837, "epoch": 11.47, "learning_rate": 5.6232223788155074e-05, "loss": 0.3016, "step": 31730, "task_loss": 0.4056016206741333 }, { "compression_loss": 0.0, "distillation_loss": 0.28492245078086853, "epoch": 11.47, "learning_rate": 5.616898683038712e-05, "loss": 0.3164, "step": 31740, "task_loss": 0.5923171043395996 }, { "compression_loss": 0.0, "distillation_loss": 0.22334977984428406, "epoch": 11.47, "learning_rate": 5.610564071820462e-05, "loss": 0.3048, "step": 31750, "task_loss": 0.5784961581230164 }, { "epoch": 11.47, "eval_exact_match": 82.30842005676443, "eval_f1": 89.281748835677, "step": 31750 }, { "compression_loss": 0.0, "distillation_loss": 0.25941359996795654, "epoch": 11.48, "learning_rate": 5.6042185778241474e-05, "loss": 0.3379, "step": 31760, "task_loss": 0.6838859915733337 }, { "compression_loss": 0.0, "distillation_loss": 0.30071139335632324, "epoch": 11.48, "learning_rate": 5.5978622337692656e-05, "loss": 0.3302, "step": 31770, "task_loss": 0.4620378911495209 }, { "compression_loss": 0.0, "distillation_loss": 0.2648431062698364, "epoch": 11.49, "learning_rate": 5.5914950724312644e-05, "loss": 0.3303, "step": 31780, "task_loss": 0.5686721801757812 }, { "compression_loss": 0.0, "distillation_loss": 0.34629106521606445, "epoch": 11.49, "learning_rate": 5.5851171266413694e-05, "loss": 0.323, "step": 31790, "task_loss": 0.44669267535209656 }, { "compression_loss": 0.0, "distillation_loss": 0.2935123145580292, "epoch": 11.49, "learning_rate": 5.578728429286414e-05, "loss": 0.3134, "step": 31800, "task_loss": 0.4752408564090729 }, { "compression_loss": 0.0, "distillation_loss": 0.28745779395103455, "epoch": 11.5, "learning_rate": 5.5723290133086686e-05, "loss": 0.3296, "step": 31810, "task_loss": 0.6825571060180664 }, { "compression_loss": 0.0, "distillation_loss": 0.3426585793495178, "epoch": 11.5, "learning_rate": 5.5659189117056755e-05, "loss": 0.317, "step": 31820, "task_loss": 0.5306519269943237 }, { "compression_loss": 0.0, "distillation_loss": 0.2709750533103943, "epoch": 11.5, "learning_rate": 5.5594981575300746e-05, "loss": 0.3061, "step": 31830, "task_loss": 0.470102995634079 }, { "compression_loss": 0.0, "distillation_loss": 0.2657717168331146, "epoch": 11.51, "learning_rate": 5.5530667838894306e-05, "loss": 0.2997, "step": 31840, "task_loss": 0.36683228611946106 }, { "compression_loss": 0.0, "distillation_loss": 0.3502514958381653, "epoch": 11.51, "learning_rate": 5.546624823946071e-05, "loss": 0.3257, "step": 31850, "task_loss": 0.5826776623725891 }, { "compression_loss": 0.0, "distillation_loss": 0.26688486337661743, "epoch": 11.51, "learning_rate": 5.5401723109169074e-05, "loss": 0.2944, "step": 31860, "task_loss": 0.4791288375854492 }, { "compression_loss": 0.0, "distillation_loss": 0.2857460379600525, "epoch": 11.52, "learning_rate": 5.5337092780732664e-05, "loss": 0.3741, "step": 31870, "task_loss": 0.541092574596405 }, { "compression_loss": 0.0, "distillation_loss": 0.4241938591003418, "epoch": 11.52, "learning_rate": 5.5272357587407176e-05, "loss": 0.3506, "step": 31880, "task_loss": 0.7306923866271973 }, { "compression_loss": 0.0, "distillation_loss": 0.3634362816810608, "epoch": 11.53, "learning_rate": 5.520751786298905e-05, "loss": 0.3177, "step": 31890, "task_loss": 0.46968650817871094 }, { "compression_loss": 0.0, "distillation_loss": 0.2074853479862213, "epoch": 11.53, "learning_rate": 5.51425739418137e-05, "loss": 0.3507, "step": 31900, "task_loss": 0.3567825257778168 }, { "compression_loss": 0.0, "distillation_loss": 0.32588648796081543, "epoch": 11.53, "learning_rate": 5.507752615875383e-05, "loss": 0.3368, "step": 31910, "task_loss": 0.8394519686698914 }, { "compression_loss": 0.0, "distillation_loss": 0.23712632060050964, "epoch": 11.54, "learning_rate": 5.501237484921767e-05, "loss": 0.3403, "step": 31920, "task_loss": 0.6089857816696167 }, { "compression_loss": 0.0, "distillation_loss": 0.4756302237510681, "epoch": 11.54, "learning_rate": 5.494712034914728e-05, "loss": 0.3444, "step": 31930, "task_loss": 0.49841880798339844 }, { "compression_loss": 0.0, "distillation_loss": 0.32165443897247314, "epoch": 11.54, "learning_rate": 5.488176299501683e-05, "loss": 0.3121, "step": 31940, "task_loss": 0.2868342995643616 }, { "compression_loss": 0.0, "distillation_loss": 0.3431640863418579, "epoch": 11.55, "learning_rate": 5.4816303123830796e-05, "loss": 0.3307, "step": 31950, "task_loss": 0.5774615406990051 }, { "compression_loss": 0.0, "distillation_loss": 0.32206475734710693, "epoch": 11.55, "learning_rate": 5.4750741073122284e-05, "loss": 0.3721, "step": 31960, "task_loss": 0.7175636887550354 }, { "compression_loss": 0.0, "distillation_loss": 0.2849663197994232, "epoch": 11.55, "learning_rate": 5.4685077180951276e-05, "loss": 0.3497, "step": 31970, "task_loss": 0.49566948413848877 }, { "compression_loss": 0.0, "distillation_loss": 0.2988675832748413, "epoch": 11.56, "learning_rate": 5.461931178590289e-05, "loss": 0.317, "step": 31980, "task_loss": 0.7831475734710693 }, { "compression_loss": 0.0, "distillation_loss": 0.28928619623184204, "epoch": 11.56, "learning_rate": 5.4553445227085605e-05, "loss": 0.3441, "step": 31990, "task_loss": 0.38043415546417236 }, { "compression_loss": 0.0, "distillation_loss": 0.3057170510292053, "epoch": 11.56, "learning_rate": 5.4487477844129556e-05, "loss": 0.2937, "step": 32000, "task_loss": 0.5053399801254272 }, { "epoch": 11.56, "eval_exact_match": 82.34626300851467, "eval_f1": 89.30547427778144, "step": 32000 }, { "compression_loss": 0.0, "distillation_loss": 0.27672266960144043, "epoch": 11.57, "learning_rate": 5.442140997718475e-05, "loss": 0.3506, "step": 32010, "task_loss": 0.31775251030921936 }, { "compression_loss": 0.0, "distillation_loss": 0.44757071137428284, "epoch": 11.57, "learning_rate": 5.4355241966919324e-05, "loss": 0.3183, "step": 32020, "task_loss": 0.7788525819778442 }, { "compression_loss": 0.0, "distillation_loss": 0.27162885665893555, "epoch": 11.58, "learning_rate": 5.428897415451778e-05, "loss": 0.3686, "step": 32030, "task_loss": 0.6087551712989807 }, { "compression_loss": 0.0, "distillation_loss": 0.3032134175300598, "epoch": 11.58, "learning_rate": 5.422260688167926e-05, "loss": 0.3345, "step": 32040, "task_loss": 0.33256518840789795 }, { "compression_loss": 0.0, "distillation_loss": 0.3117225170135498, "epoch": 11.58, "learning_rate": 5.415614049061573e-05, "loss": 0.3204, "step": 32050, "task_loss": 0.4947884678840637 }, { "compression_loss": 0.0, "distillation_loss": 0.3810727894306183, "epoch": 11.59, "learning_rate": 5.408957532405025e-05, "loss": 0.3141, "step": 32060, "task_loss": 0.6038593053817749 }, { "compression_loss": 0.0, "distillation_loss": 0.30839699506759644, "epoch": 11.59, "learning_rate": 5.4022911725215214e-05, "loss": 0.3569, "step": 32070, "task_loss": 0.44963526725769043 }, { "compression_loss": 0.0, "distillation_loss": 0.2700961232185364, "epoch": 11.59, "learning_rate": 5.395615003785054e-05, "loss": 0.2955, "step": 32080, "task_loss": 0.42938604950904846 }, { "compression_loss": 0.0, "distillation_loss": 0.27627208828926086, "epoch": 11.6, "learning_rate": 5.388929060620194e-05, "loss": 0.3578, "step": 32090, "task_loss": 0.7172139883041382 }, { "compression_loss": 0.0, "distillation_loss": 0.217061847448349, "epoch": 11.6, "learning_rate": 5.3822333775019146e-05, "loss": 0.3172, "step": 32100, "task_loss": 0.2939155697822571 }, { "compression_loss": 0.0, "distillation_loss": 0.3464398980140686, "epoch": 11.6, "learning_rate": 5.375527988955407e-05, "loss": 0.2948, "step": 32110, "task_loss": 0.6745935678482056 }, { "compression_loss": 0.0, "distillation_loss": 0.26883465051651, "epoch": 11.61, "learning_rate": 5.36881292955591e-05, "loss": 0.3402, "step": 32120, "task_loss": 0.5124499797821045 }, { "compression_loss": 0.0, "distillation_loss": 0.29246842861175537, "epoch": 11.61, "learning_rate": 5.36208823392853e-05, "loss": 0.3084, "step": 32130, "task_loss": 0.4192900061607361 }, { "compression_loss": 0.0, "distillation_loss": 0.34540215134620667, "epoch": 11.62, "learning_rate": 5.3553539367480557e-05, "loss": 0.3292, "step": 32140, "task_loss": 0.6095007658004761 }, { "compression_loss": 0.0, "distillation_loss": 0.2868419885635376, "epoch": 11.62, "learning_rate": 5.34861007273879e-05, "loss": 0.336, "step": 32150, "task_loss": 0.6032599210739136 }, { "compression_loss": 0.0, "distillation_loss": 0.3358624577522278, "epoch": 11.62, "learning_rate": 5.341856676674362e-05, "loss": 0.2955, "step": 32160, "task_loss": 0.44446879625320435 }, { "compression_loss": 0.0, "distillation_loss": 0.3103525638580322, "epoch": 11.63, "learning_rate": 5.335093783377554e-05, "loss": 0.3357, "step": 32170, "task_loss": 0.4964514672756195 }, { "compression_loss": 0.0, "distillation_loss": 0.2463071644306183, "epoch": 11.63, "learning_rate": 5.328321427720118e-05, "loss": 0.328, "step": 32180, "task_loss": 0.960116982460022 }, { "compression_loss": 0.0, "distillation_loss": 0.38493576645851135, "epoch": 11.63, "learning_rate": 5.321539644622596e-05, "loss": 0.3408, "step": 32190, "task_loss": 0.4280901253223419 }, { "compression_loss": 0.0, "distillation_loss": 0.28553837537765503, "epoch": 11.64, "learning_rate": 5.314748469054143e-05, "loss": 0.3255, "step": 32200, "task_loss": 0.525431752204895 }, { "compression_loss": 0.0, "distillation_loss": 0.2808997631072998, "epoch": 11.64, "learning_rate": 5.307947936032344e-05, "loss": 0.3177, "step": 32210, "task_loss": 0.23566016554832458 }, { "compression_loss": 0.0, "distillation_loss": 0.4643576145172119, "epoch": 11.64, "learning_rate": 5.301138080623034e-05, "loss": 0.3763, "step": 32220, "task_loss": 0.7409594058990479 }, { "compression_loss": 0.0, "distillation_loss": 0.2858960032463074, "epoch": 11.65, "learning_rate": 5.294318937940116e-05, "loss": 0.3012, "step": 32230, "task_loss": 0.6493170261383057 }, { "compression_loss": 0.0, "distillation_loss": 0.30860576033592224, "epoch": 11.65, "learning_rate": 5.287490543145385e-05, "loss": 0.3199, "step": 32240, "task_loss": 0.3269897401332855 }, { "compression_loss": 0.0, "distillation_loss": 0.5268182158470154, "epoch": 11.66, "learning_rate": 5.280652931448339e-05, "loss": 0.3442, "step": 32250, "task_loss": 0.6542600989341736 }, { "epoch": 11.66, "eval_exact_match": 83.08420056764427, "eval_f1": 89.76224592743083, "step": 32250 }, { "compression_loss": 0.0, "distillation_loss": 0.33490172028541565, "epoch": 11.66, "learning_rate": 5.273806138106004e-05, "loss": 0.3572, "step": 32260, "task_loss": 0.6601828932762146 }, { "compression_loss": 0.0, "distillation_loss": 0.34004050493240356, "epoch": 11.66, "learning_rate": 5.266950198422749e-05, "loss": 0.3248, "step": 32270, "task_loss": 0.6301026344299316 }, { "compression_loss": 0.0, "distillation_loss": 0.295710027217865, "epoch": 11.67, "learning_rate": 5.260085147750104e-05, "loss": 0.315, "step": 32280, "task_loss": 0.3331633508205414 }, { "compression_loss": 0.0, "distillation_loss": 0.35243725776672363, "epoch": 11.67, "learning_rate": 5.253211021486578e-05, "loss": 0.3147, "step": 32290, "task_loss": 0.6616904735565186 }, { "compression_loss": 0.0, "distillation_loss": 0.30269598960876465, "epoch": 11.67, "learning_rate": 5.246327855077479e-05, "loss": 0.3256, "step": 32300, "task_loss": 0.541825532913208 }, { "compression_loss": 0.0, "distillation_loss": 0.28160130977630615, "epoch": 11.68, "learning_rate": 5.239435684014727e-05, "loss": 0.3064, "step": 32310, "task_loss": 0.45904257893562317 }, { "compression_loss": 0.0, "distillation_loss": 0.46022456884384155, "epoch": 11.68, "learning_rate": 5.232534543836673e-05, "loss": 0.3411, "step": 32320, "task_loss": 0.5846478939056396 }, { "compression_loss": 0.0, "distillation_loss": 0.2371748834848404, "epoch": 11.68, "learning_rate": 5.225624470127917e-05, "loss": 0.33, "step": 32330, "task_loss": 0.49647220969200134 }, { "compression_loss": 0.0, "distillation_loss": 0.2978493571281433, "epoch": 11.69, "learning_rate": 5.218705498519123e-05, "loss": 0.3187, "step": 32340, "task_loss": 0.6566059589385986 }, { "compression_loss": 0.0, "distillation_loss": 0.25985074043273926, "epoch": 11.69, "learning_rate": 5.211777664686834e-05, "loss": 0.3058, "step": 32350, "task_loss": 0.6025593280792236 }, { "compression_loss": 0.0, "distillation_loss": 0.30825042724609375, "epoch": 11.69, "learning_rate": 5.2048410043532935e-05, "loss": 0.3376, "step": 32360, "task_loss": 0.31907224655151367 }, { "compression_loss": 0.0, "distillation_loss": 0.3601059317588806, "epoch": 11.7, "learning_rate": 5.197895553286251e-05, "loss": 0.3641, "step": 32370, "task_loss": 0.500454843044281 }, { "compression_loss": 0.0, "distillation_loss": 0.25591498613357544, "epoch": 11.7, "learning_rate": 5.190941347298791e-05, "loss": 0.34, "step": 32380, "task_loss": 0.4937874972820282 }, { "compression_loss": 0.0, "distillation_loss": 0.29595816135406494, "epoch": 11.71, "learning_rate": 5.1839784222491365e-05, "loss": 0.3345, "step": 32390, "task_loss": 0.30738627910614014 }, { "compression_loss": 0.0, "distillation_loss": 0.26647788286209106, "epoch": 11.71, "learning_rate": 5.1770068140404696e-05, "loss": 0.351, "step": 32400, "task_loss": 0.4921760559082031 }, { "compression_loss": 0.0, "distillation_loss": 0.286870539188385, "epoch": 11.71, "learning_rate": 5.170026558620748e-05, "loss": 0.2885, "step": 32410, "task_loss": 0.5846974849700928 }, { "compression_loss": 0.0, "distillation_loss": 0.3173612952232361, "epoch": 11.72, "learning_rate": 5.1630376919825164e-05, "loss": 0.3107, "step": 32420, "task_loss": 0.6225560307502747 }, { "compression_loss": 0.0, "distillation_loss": 0.31489187479019165, "epoch": 11.72, "learning_rate": 5.15604025016272e-05, "loss": 0.3051, "step": 32430, "task_loss": 0.4998015761375427 }, { "compression_loss": 0.0, "distillation_loss": 0.3452304005622864, "epoch": 11.72, "learning_rate": 5.149034269242522e-05, "loss": 0.3372, "step": 32440, "task_loss": 0.6053261756896973 }, { "compression_loss": 0.0, "distillation_loss": 0.26079899072647095, "epoch": 11.73, "learning_rate": 5.1420197853471154e-05, "loss": 0.3191, "step": 32450, "task_loss": 0.5432778596878052 }, { "compression_loss": 0.0, "distillation_loss": 0.3432601988315582, "epoch": 11.73, "learning_rate": 5.13499683464554e-05, "loss": 0.3461, "step": 32460, "task_loss": 0.9408321976661682 }, { "compression_loss": 0.0, "distillation_loss": 0.22599393129348755, "epoch": 11.73, "learning_rate": 5.1279654533504885e-05, "loss": 0.2994, "step": 32470, "task_loss": 0.27330297231674194 }, { "compression_loss": 0.0, "distillation_loss": 0.33337026834487915, "epoch": 11.74, "learning_rate": 5.12092567771813e-05, "loss": 0.3018, "step": 32480, "task_loss": 0.4547075927257538 }, { "compression_loss": 0.0, "distillation_loss": 0.3493223786354065, "epoch": 11.74, "learning_rate": 5.1138775440479126e-05, "loss": 0.325, "step": 32490, "task_loss": 0.41317543387413025 }, { "compression_loss": 0.0, "distillation_loss": 0.4307718575000763, "epoch": 11.75, "learning_rate": 5.106821088682384e-05, "loss": 0.3321, "step": 32500, "task_loss": 1.1518049240112305 }, { "epoch": 11.75, "eval_exact_match": 82.68684957426679, "eval_f1": 89.72324307540484, "step": 32500 }, { "compression_loss": 0.0, "distillation_loss": 0.4119787812232971, "epoch": 11.75, "learning_rate": 5.0997563480070024e-05, "loss": 0.3619, "step": 32510, "task_loss": 0.5112029314041138 }, { "compression_loss": 0.0, "distillation_loss": 0.255260705947876, "epoch": 11.75, "learning_rate": 5.093391027566244e-05, "loss": 0.3288, "step": 32520, "task_loss": 1.0041379928588867 }, { "compression_loss": 0.0, "distillation_loss": 0.3400477170944214, "epoch": 11.76, "learning_rate": 5.086310645196928e-05, "loss": 0.3147, "step": 32530, "task_loss": 0.41630157828330994 }, { "compression_loss": 0.0, "distillation_loss": 0.293648362159729, "epoch": 11.76, "learning_rate": 5.079222083276504e-05, "loss": 0.3078, "step": 32540, "task_loss": 0.4691644608974457 }, { "compression_loss": 0.0, "distillation_loss": 0.3460580110549927, "epoch": 11.76, "learning_rate": 5.072125378355978e-05, "loss": 0.3015, "step": 32550, "task_loss": 0.6114804744720459 }, { "compression_loss": 0.0, "distillation_loss": 0.2778604030609131, "epoch": 11.77, "learning_rate": 5.0650205670283475e-05, "loss": 0.2822, "step": 32560, "task_loss": 0.5171606540679932 }, { "compression_loss": 0.0, "distillation_loss": 0.2882919907569885, "epoch": 11.77, "learning_rate": 5.057907685928408e-05, "loss": 0.3564, "step": 32570, "task_loss": 0.25201505422592163 }, { "compression_loss": 0.0, "distillation_loss": 0.2875009775161743, "epoch": 11.77, "learning_rate": 5.050786771732567e-05, "loss": 0.3119, "step": 32580, "task_loss": 0.5717591047286987 }, { "compression_loss": 0.0, "distillation_loss": 0.3220732808113098, "epoch": 11.78, "learning_rate": 5.043657861158653e-05, "loss": 0.3817, "step": 32590, "task_loss": 0.6619873046875 }, { "compression_loss": 0.0, "distillation_loss": 0.29478880763053894, "epoch": 11.78, "learning_rate": 5.036520990965726e-05, "loss": 0.3383, "step": 32600, "task_loss": 0.4968605041503906 }, { "compression_loss": 0.0, "distillation_loss": 0.2925856113433838, "epoch": 11.79, "learning_rate": 5.029376197953888e-05, "loss": 0.3088, "step": 32610, "task_loss": 0.5669705271720886 }, { "compression_loss": 0.0, "distillation_loss": 0.3187752366065979, "epoch": 11.79, "learning_rate": 5.022223518964095e-05, "loss": 0.3481, "step": 32620, "task_loss": 0.3745642900466919 }, { "compression_loss": 0.0, "distillation_loss": 0.3223860263824463, "epoch": 11.79, "learning_rate": 5.015062990877964e-05, "loss": 0.3286, "step": 32630, "task_loss": 0.4044720530509949 }, { "compression_loss": 0.0, "distillation_loss": 0.29211077094078064, "epoch": 11.8, "learning_rate": 5.007894650617588e-05, "loss": 0.3262, "step": 32640, "task_loss": 0.3009093999862671 }, { "compression_loss": 0.0, "distillation_loss": 0.28116658329963684, "epoch": 11.8, "learning_rate": 5.0007185351453374e-05, "loss": 0.3063, "step": 32650, "task_loss": 0.6478732824325562 }, { "compression_loss": 0.0, "distillation_loss": 0.28009843826293945, "epoch": 11.8, "learning_rate": 4.9935346814636785e-05, "loss": 0.3234, "step": 32660, "task_loss": 0.6960561275482178 }, { "compression_loss": 0.0, "distillation_loss": 0.42953410744667053, "epoch": 11.81, "learning_rate": 4.9863431266149745e-05, "loss": 0.3186, "step": 32670, "task_loss": 0.5555198192596436 }, { "compression_loss": 0.0, "distillation_loss": 0.36337924003601074, "epoch": 11.81, "learning_rate": 4.979143907681301e-05, "loss": 0.3635, "step": 32680, "task_loss": 0.5978571176528931 }, { "compression_loss": 0.0, "distillation_loss": 0.24997106194496155, "epoch": 11.81, "learning_rate": 4.9719370617842544e-05, "loss": 0.3219, "step": 32690, "task_loss": 0.2275729477405548 }, { "compression_loss": 0.0, "distillation_loss": 0.35133838653564453, "epoch": 11.82, "learning_rate": 4.964722626084752e-05, "loss": 0.3278, "step": 32700, "task_loss": 0.3992236852645874 }, { "compression_loss": 0.0, "distillation_loss": 0.23231764137744904, "epoch": 11.82, "learning_rate": 4.9575006377828535e-05, "loss": 0.3368, "step": 32710, "task_loss": 0.3263181447982788 }, { "compression_loss": 0.0, "distillation_loss": 0.21223102509975433, "epoch": 11.83, "learning_rate": 4.9502711341175553e-05, "loss": 0.2913, "step": 32720, "task_loss": 0.4191708564758301 }, { "compression_loss": 0.0, "distillation_loss": 0.41620898246765137, "epoch": 11.83, "learning_rate": 4.9430341523666124e-05, "loss": 0.3523, "step": 32730, "task_loss": 0.41136446595191956 }, { "compression_loss": 0.0, "distillation_loss": 0.40111666917800903, "epoch": 11.83, "learning_rate": 4.935789729846335e-05, "loss": 0.3663, "step": 32740, "task_loss": 0.567755937576294 }, { "compression_loss": 0.0, "distillation_loss": 0.2847909927368164, "epoch": 11.84, "learning_rate": 4.928537903911402e-05, "loss": 0.326, "step": 32750, "task_loss": 0.5023891925811768 }, { "epoch": 11.84, "eval_exact_match": 83.04635761589404, "eval_f1": 89.84907706561316, "step": 32750 }, { "compression_loss": 0.0, "distillation_loss": 0.30441999435424805, "epoch": 11.84, "learning_rate": 4.921278711954666e-05, "loss": 0.3179, "step": 32760, "task_loss": 0.5857506990432739 }, { "compression_loss": 0.0, "distillation_loss": 0.24043527245521545, "epoch": 11.84, "learning_rate": 4.9140121914069626e-05, "loss": 0.3814, "step": 32770, "task_loss": 0.7202674150466919 }, { "compression_loss": 0.0, "distillation_loss": 0.3235805332660675, "epoch": 11.85, "learning_rate": 4.9067383797369146e-05, "loss": 0.331, "step": 32780, "task_loss": 1.0481606721878052 }, { "compression_loss": 0.0, "distillation_loss": 0.23604023456573486, "epoch": 11.85, "learning_rate": 4.899457314450742e-05, "loss": 0.3057, "step": 32790, "task_loss": 0.3260385990142822 }, { "compression_loss": 0.0, "distillation_loss": 0.31892240047454834, "epoch": 11.85, "learning_rate": 4.8921690330920637e-05, "loss": 0.3378, "step": 32800, "task_loss": 0.5068680047988892 }, { "compression_loss": 0.0, "distillation_loss": 0.30003371834754944, "epoch": 11.86, "learning_rate": 4.884873573241711e-05, "loss": 0.3413, "step": 32810, "task_loss": 0.8090519905090332 }, { "compression_loss": 0.0, "distillation_loss": 0.3101547956466675, "epoch": 11.86, "learning_rate": 4.8775709725175277e-05, "loss": 0.3439, "step": 32820, "task_loss": 0.572382926940918 }, { "compression_loss": 0.0, "distillation_loss": 0.2574950158596039, "epoch": 11.86, "learning_rate": 4.870261268574178e-05, "loss": 0.3328, "step": 32830, "task_loss": 0.7044030427932739 }, { "compression_loss": 0.0, "distillation_loss": 0.27755796909332275, "epoch": 11.87, "learning_rate": 4.8629444991029544e-05, "loss": 0.3105, "step": 32840, "task_loss": 0.3686292767524719 }, { "compression_loss": 0.0, "distillation_loss": 0.2532276511192322, "epoch": 11.87, "learning_rate": 4.8556207018315793e-05, "loss": 0.3486, "step": 32850, "task_loss": 0.602851390838623 }, { "compression_loss": 0.0, "distillation_loss": 0.23359063267707825, "epoch": 11.88, "learning_rate": 4.848289914524017e-05, "loss": 0.3187, "step": 32860, "task_loss": 0.5139017105102539 }, { "compression_loss": 0.0, "distillation_loss": 0.33160653710365295, "epoch": 11.88, "learning_rate": 4.840952174980269e-05, "loss": 0.3345, "step": 32870, "task_loss": 0.6995959281921387 }, { "compression_loss": 0.0, "distillation_loss": 0.3129425644874573, "epoch": 11.88, "learning_rate": 4.8336075210361884e-05, "loss": 0.3458, "step": 32880, "task_loss": 0.4224226474761963 }, { "compression_loss": 0.0, "distillation_loss": 0.3381350040435791, "epoch": 11.89, "learning_rate": 4.826255990563282e-05, "loss": 0.3631, "step": 32890, "task_loss": 0.5300081968307495 }, { "compression_loss": 0.0, "distillation_loss": 0.32694685459136963, "epoch": 11.89, "learning_rate": 4.8188976214685125e-05, "loss": 0.3714, "step": 32900, "task_loss": 0.3771377205848694 }, { "compression_loss": 0.0, "distillation_loss": 0.35713815689086914, "epoch": 11.89, "learning_rate": 4.811532451694104e-05, "loss": 0.3172, "step": 32910, "task_loss": 0.4956726133823395 }, { "compression_loss": 0.0, "distillation_loss": 0.2920774519443512, "epoch": 11.9, "learning_rate": 4.8041605192173505e-05, "loss": 0.3005, "step": 32920, "task_loss": 0.30127841234207153 }, { "compression_loss": 0.0, "distillation_loss": 0.4189726710319519, "epoch": 11.9, "learning_rate": 4.796781862050413e-05, "loss": 0.3277, "step": 32930, "task_loss": 0.741553544998169 }, { "compression_loss": 0.0, "distillation_loss": 0.3866938054561615, "epoch": 11.9, "learning_rate": 4.789396518240132e-05, "loss": 0.3125, "step": 32940, "task_loss": 0.5937274694442749 }, { "compression_loss": 0.0, "distillation_loss": 0.26409783959388733, "epoch": 11.91, "learning_rate": 4.782004525867819e-05, "loss": 0.3068, "step": 32950, "task_loss": 0.2822565734386444 }, { "compression_loss": 0.0, "distillation_loss": 0.2546120285987854, "epoch": 11.91, "learning_rate": 4.774605923049076e-05, "loss": 0.3082, "step": 32960, "task_loss": 0.7707274556159973 }, { "compression_loss": 0.0, "distillation_loss": 0.5309508442878723, "epoch": 11.92, "learning_rate": 4.767200747933586e-05, "loss": 0.3719, "step": 32970, "task_loss": 0.5996285676956177 }, { "compression_loss": 0.0, "distillation_loss": 0.350615531206131, "epoch": 11.92, "learning_rate": 4.759789038704922e-05, "loss": 0.3197, "step": 32980, "task_loss": 0.44299453496932983 }, { "compression_loss": 0.0, "distillation_loss": 0.2523714303970337, "epoch": 11.92, "learning_rate": 4.7523708335803504e-05, "loss": 0.3624, "step": 32990, "task_loss": 0.663590669631958 }, { "compression_loss": 0.0, "distillation_loss": 0.24829280376434326, "epoch": 11.93, "learning_rate": 4.7449461708106276e-05, "loss": 0.294, "step": 33000, "task_loss": 0.3095482289791107 }, { "epoch": 11.93, "eval_exact_match": 82.90444654683066, "eval_f1": 89.76255523912135, "step": 33000 }, { "compression_loss": 0.0, "distillation_loss": 0.3185913562774658, "epoch": 11.93, "learning_rate": 4.7375150886798146e-05, "loss": 0.2944, "step": 33010, "task_loss": 0.35980331897735596 }, { "compression_loss": 0.0, "distillation_loss": 0.28336402773857117, "epoch": 11.93, "learning_rate": 4.73007762550507e-05, "loss": 0.3057, "step": 33020, "task_loss": 0.38741469383239746 }, { "compression_loss": 0.0, "distillation_loss": 0.329383909702301, "epoch": 11.94, "learning_rate": 4.722633819636453e-05, "loss": 0.3433, "step": 33030, "task_loss": 0.597511351108551 }, { "compression_loss": 0.0, "distillation_loss": 0.24982626736164093, "epoch": 11.94, "learning_rate": 4.715183709456731e-05, "loss": 0.3088, "step": 33040, "task_loss": 0.361911416053772 }, { "compression_loss": 0.0, "distillation_loss": 0.40126579999923706, "epoch": 11.94, "learning_rate": 4.707727333381177e-05, "loss": 0.3145, "step": 33050, "task_loss": 0.30749595165252686 }, { "compression_loss": 0.0, "distillation_loss": 0.2451639622449875, "epoch": 11.95, "learning_rate": 4.7002647298573725e-05, "loss": 0.3047, "step": 33060, "task_loss": 0.4949933886528015 }, { "compression_loss": 0.0, "distillation_loss": 0.22195953130722046, "epoch": 11.95, "learning_rate": 4.692795937365013e-05, "loss": 0.3003, "step": 33070, "task_loss": 0.4203670620918274 }, { "compression_loss": 0.0, "distillation_loss": 0.2641502618789673, "epoch": 11.96, "learning_rate": 4.685320994415701e-05, "loss": 0.3094, "step": 33080, "task_loss": 0.35039326548576355 }, { "compression_loss": 0.0, "distillation_loss": 0.39277034997940063, "epoch": 11.96, "learning_rate": 4.677839939552759e-05, "loss": 0.3251, "step": 33090, "task_loss": 0.46645432710647583 }, { "compression_loss": 0.0, "distillation_loss": 0.32009559869766235, "epoch": 11.96, "learning_rate": 4.670352811351019e-05, "loss": 0.3152, "step": 33100, "task_loss": 0.6483212113380432 }, { "compression_loss": 0.0, "distillation_loss": 0.2656344175338745, "epoch": 11.97, "learning_rate": 4.662859648416633e-05, "loss": 0.3348, "step": 33110, "task_loss": 0.69025719165802 }, { "compression_loss": 0.0, "distillation_loss": 0.3949262499809265, "epoch": 11.97, "learning_rate": 4.655360489386869e-05, "loss": 0.3289, "step": 33120, "task_loss": 0.7546541690826416 }, { "compression_loss": 0.0, "distillation_loss": 0.32530757784843445, "epoch": 11.97, "learning_rate": 4.647855372929912e-05, "loss": 0.3205, "step": 33130, "task_loss": 0.35394996404647827 }, { "compression_loss": 0.0, "distillation_loss": 0.21367156505584717, "epoch": 11.98, "learning_rate": 4.640344337744667e-05, "loss": 0.3238, "step": 33140, "task_loss": 0.46600231528282166 }, { "compression_loss": 0.0, "distillation_loss": 0.2911337614059448, "epoch": 11.98, "learning_rate": 4.6328274225605556e-05, "loss": 0.3662, "step": 33150, "task_loss": 0.4676685035228729 }, { "compression_loss": 0.0, "distillation_loss": 0.28591835498809814, "epoch": 11.98, "learning_rate": 4.625304666137321e-05, "loss": 0.3161, "step": 33160, "task_loss": 0.36528101563453674 }, { "compression_loss": 0.0, "distillation_loss": 0.42281341552734375, "epoch": 11.99, "learning_rate": 4.617776107264826e-05, "loss": 0.3452, "step": 33170, "task_loss": 0.449604868888855 }, { "compression_loss": 0.0, "distillation_loss": 0.35083454847335815, "epoch": 11.99, "learning_rate": 4.6102417847628494e-05, "loss": 0.3073, "step": 33180, "task_loss": 0.4036543667316437 }, { "compression_loss": 0.0, "distillation_loss": 0.2771892547607422, "epoch": 11.99, "learning_rate": 4.602701737480895e-05, "loss": 0.3069, "step": 33190, "task_loss": 0.48936066031455994 }, { "compression_loss": 0.0, "distillation_loss": 0.30712735652923584, "epoch": 12.0, "learning_rate": 4.595156004297978e-05, "loss": 0.2947, "step": 33200, "task_loss": 0.6743454337120056 }, { "compression_loss": 0.0, "distillation_loss": 0.2192678451538086, "epoch": 12.0, "learning_rate": 4.5876046241224384e-05, "loss": 0.3159, "step": 33210, "task_loss": 0.6000691652297974 }, { "compression_loss": 0.0, "distillation_loss": 0.30531418323516846, "epoch": 12.01, "learning_rate": 4.580047635891733e-05, "loss": 0.2964, "step": 33220, "task_loss": 0.49903973937034607 }, { "compression_loss": 0.0, "distillation_loss": 0.2905973196029663, "epoch": 12.01, "learning_rate": 4.572485078572231e-05, "loss": 0.2861, "step": 33230, "task_loss": 0.5353321433067322 }, { "compression_loss": 0.0, "distillation_loss": 0.25237756967544556, "epoch": 12.01, "learning_rate": 4.564916991159023e-05, "loss": 0.257, "step": 33240, "task_loss": 0.4064103364944458 }, { "compression_loss": 0.0, "distillation_loss": 0.27284255623817444, "epoch": 12.02, "learning_rate": 4.5573434126757125e-05, "loss": 0.2949, "step": 33250, "task_loss": 0.46959832310676575 }, { "epoch": 12.02, "eval_exact_match": 83.06527909176916, "eval_f1": 89.86909660147846, "step": 33250 }, { "compression_loss": 0.0, "distillation_loss": 0.3078652620315552, "epoch": 12.02, "learning_rate": 4.5497643821742164e-05, "loss": 0.2734, "step": 33260, "task_loss": 0.5497879981994629 }, { "compression_loss": 0.0, "distillation_loss": 0.3155045509338379, "epoch": 12.02, "learning_rate": 4.5421799387345656e-05, "loss": 0.2881, "step": 33270, "task_loss": 0.45438718795776367 }, { "compression_loss": 0.0, "distillation_loss": 0.28334158658981323, "epoch": 12.03, "learning_rate": 4.534590121464699e-05, "loss": 0.2867, "step": 33280, "task_loss": 0.575757622718811 }, { "compression_loss": 0.0, "distillation_loss": 0.22371214628219604, "epoch": 12.03, "learning_rate": 4.526994969500268e-05, "loss": 0.2901, "step": 33290, "task_loss": 0.511568546295166 }, { "compression_loss": 0.0, "distillation_loss": 0.2883744239807129, "epoch": 12.03, "learning_rate": 4.5193945220044304e-05, "loss": 0.2849, "step": 33300, "task_loss": 0.4415782392024994 }, { "compression_loss": 0.0, "distillation_loss": 0.3361637592315674, "epoch": 12.04, "learning_rate": 4.5117888181676476e-05, "loss": 0.3318, "step": 33310, "task_loss": 0.5499709844589233 }, { "compression_loss": 0.0, "distillation_loss": 0.31543105840682983, "epoch": 12.04, "learning_rate": 4.5041778972074884e-05, "loss": 0.3009, "step": 33320, "task_loss": 0.6422604322433472 }, { "compression_loss": 0.0, "distillation_loss": 0.34402894973754883, "epoch": 12.05, "learning_rate": 4.4965617983684185e-05, "loss": 0.3017, "step": 33330, "task_loss": 0.5380134582519531 }, { "compression_loss": 0.0, "distillation_loss": 0.2677575945854187, "epoch": 12.05, "learning_rate": 4.4889405609216065e-05, "loss": 0.313, "step": 33340, "task_loss": 0.6588090658187866 }, { "compression_loss": 0.0, "distillation_loss": 0.26906073093414307, "epoch": 12.05, "learning_rate": 4.481314224164713e-05, "loss": 0.2905, "step": 33350, "task_loss": 0.5632128715515137 }, { "compression_loss": 0.0, "distillation_loss": 0.25012487173080444, "epoch": 12.06, "learning_rate": 4.473682827421697e-05, "loss": 0.3141, "step": 33360, "task_loss": 0.5552922487258911 }, { "compression_loss": 0.0, "distillation_loss": 0.2697087526321411, "epoch": 12.06, "learning_rate": 4.4660464100426054e-05, "loss": 0.298, "step": 33370, "task_loss": 0.8239359855651855 }, { "compression_loss": 0.0, "distillation_loss": 0.24572470784187317, "epoch": 12.06, "learning_rate": 4.458405011403374e-05, "loss": 0.3245, "step": 33380, "task_loss": 0.5354356169700623 }, { "compression_loss": 0.0, "distillation_loss": 0.27262991666793823, "epoch": 12.07, "learning_rate": 4.450758670905624e-05, "loss": 0.2973, "step": 33390, "task_loss": 0.6752634048461914 }, { "compression_loss": 0.0, "distillation_loss": 0.3405141830444336, "epoch": 12.07, "learning_rate": 4.4431074279764585e-05, "loss": 0.2833, "step": 33400, "task_loss": 0.645380973815918 }, { "compression_loss": 0.0, "distillation_loss": 0.26243701577186584, "epoch": 12.07, "learning_rate": 4.4354513220682575e-05, "loss": 0.2856, "step": 33410, "task_loss": 0.2894069254398346 }, { "compression_loss": 0.0, "distillation_loss": 0.2766551673412323, "epoch": 12.08, "learning_rate": 4.427790392658479e-05, "loss": 0.2967, "step": 33420, "task_loss": 0.4259956181049347 }, { "compression_loss": 0.0, "distillation_loss": 0.295553982257843, "epoch": 12.08, "learning_rate": 4.4201246792494514e-05, "loss": 0.2812, "step": 33430, "task_loss": 0.4812880754470825 }, { "compression_loss": 0.0, "distillation_loss": 0.27736884355545044, "epoch": 12.09, "learning_rate": 4.41245422136817e-05, "loss": 0.2961, "step": 33440, "task_loss": 0.7917148470878601 }, { "compression_loss": 0.0, "distillation_loss": 0.29041779041290283, "epoch": 12.09, "learning_rate": 4.404779058566094e-05, "loss": 0.2964, "step": 33450, "task_loss": 0.6089922189712524 }, { "compression_loss": 0.0, "distillation_loss": 0.22682823240756989, "epoch": 12.09, "learning_rate": 4.397099230418947e-05, "loss": 0.2697, "step": 33460, "task_loss": 0.293168842792511 }, { "compression_loss": 0.0, "distillation_loss": 0.3263562321662903, "epoch": 12.1, "learning_rate": 4.3894147765265014e-05, "loss": 0.2857, "step": 33470, "task_loss": 0.5664174556732178 }, { "compression_loss": 0.0, "distillation_loss": 0.24178797006607056, "epoch": 12.1, "learning_rate": 4.381725736512387e-05, "loss": 0.304, "step": 33480, "task_loss": 0.3365327715873718 }, { "compression_loss": 0.0, "distillation_loss": 0.22949285805225372, "epoch": 12.1, "learning_rate": 4.374032150023881e-05, "loss": 0.2844, "step": 33490, "task_loss": 0.2510908842086792 }, { "compression_loss": 0.0, "distillation_loss": 0.3991518020629883, "epoch": 12.11, "learning_rate": 4.366334056731701e-05, "loss": 0.3076, "step": 33500, "task_loss": 0.7375933527946472 }, { "epoch": 12.11, "eval_exact_match": 82.7057710501419, "eval_f1": 89.65179404829924, "step": 33500 }, { "compression_loss": 0.0, "distillation_loss": 0.24318742752075195, "epoch": 12.11, "learning_rate": 4.358631496329804e-05, "loss": 0.2705, "step": 33510, "task_loss": 0.706390917301178 }, { "compression_loss": 0.0, "distillation_loss": 0.3634289801120758, "epoch": 12.11, "learning_rate": 4.350924508535184e-05, "loss": 0.2967, "step": 33520, "task_loss": 0.8388668298721313 }, { "compression_loss": 0.0, "distillation_loss": 0.2415129542350769, "epoch": 12.12, "learning_rate": 4.3432131330876594e-05, "loss": 0.2609, "step": 33530, "task_loss": 0.35680413246154785 }, { "compression_loss": 0.0, "distillation_loss": 0.23396481573581696, "epoch": 12.12, "learning_rate": 4.335497409749677e-05, "loss": 0.3141, "step": 33540, "task_loss": 0.7150663137435913 }, { "compression_loss": 0.0, "distillation_loss": 0.3096352219581604, "epoch": 12.13, "learning_rate": 4.327777378306099e-05, "loss": 0.2872, "step": 33550, "task_loss": 0.46862053871154785 }, { "compression_loss": 0.0, "distillation_loss": 0.2673993408679962, "epoch": 12.13, "learning_rate": 4.3200530785640036e-05, "loss": 0.2759, "step": 33560, "task_loss": 0.5018470883369446 }, { "compression_loss": 0.0, "distillation_loss": 0.21115058660507202, "epoch": 12.13, "learning_rate": 4.312324550352479e-05, "loss": 0.2761, "step": 33570, "task_loss": 0.22329533100128174 }, { "compression_loss": 0.0, "distillation_loss": 0.3349929749965668, "epoch": 12.14, "learning_rate": 4.3045918335224154e-05, "loss": 0.3091, "step": 33580, "task_loss": 0.651825487613678 }, { "compression_loss": 0.0, "distillation_loss": 0.24410584568977356, "epoch": 12.14, "learning_rate": 4.296854967946301e-05, "loss": 0.2903, "step": 33590, "task_loss": 0.5682234764099121 }, { "compression_loss": 0.0, "distillation_loss": 0.3624010980129242, "epoch": 12.14, "learning_rate": 4.289113993518015e-05, "loss": 0.2888, "step": 33600, "task_loss": 0.6332154870033264 }, { "compression_loss": 0.0, "distillation_loss": 0.25731930136680603, "epoch": 12.15, "learning_rate": 4.2813689501526246e-05, "loss": 0.3136, "step": 33610, "task_loss": 0.5591039061546326 }, { "compression_loss": 0.0, "distillation_loss": 0.256285697221756, "epoch": 12.15, "learning_rate": 4.2736198777861805e-05, "loss": 0.285, "step": 33620, "task_loss": 0.45655739307403564 }, { "compression_loss": 0.0, "distillation_loss": 0.21870087087154388, "epoch": 12.15, "learning_rate": 4.265866816375503e-05, "loss": 0.2756, "step": 33630, "task_loss": 0.47334593534469604 }, { "compression_loss": 0.0, "distillation_loss": 0.20090892910957336, "epoch": 12.16, "learning_rate": 4.258109805897985e-05, "loss": 0.2775, "step": 33640, "task_loss": 0.42821335792541504 }, { "compression_loss": 0.0, "distillation_loss": 0.28893759846687317, "epoch": 12.16, "learning_rate": 4.2503488863513794e-05, "loss": 0.2808, "step": 33650, "task_loss": 0.40764930844306946 }, { "compression_loss": 0.0, "distillation_loss": 0.21737822890281677, "epoch": 12.16, "learning_rate": 4.242584097753599e-05, "loss": 0.266, "step": 33660, "task_loss": 0.5878552198410034 }, { "compression_loss": 0.0, "distillation_loss": 0.2558310329914093, "epoch": 12.17, "learning_rate": 4.2348154801425044e-05, "loss": 0.2763, "step": 33670, "task_loss": 0.48859918117523193 }, { "compression_loss": 0.0, "distillation_loss": 0.26445093750953674, "epoch": 12.17, "learning_rate": 4.227043073575699e-05, "loss": 0.2772, "step": 33680, "task_loss": 0.4119749367237091 }, { "compression_loss": 0.0, "distillation_loss": 0.23118102550506592, "epoch": 12.18, "learning_rate": 4.219266918130327e-05, "loss": 0.2699, "step": 33690, "task_loss": 0.5234097242355347 }, { "compression_loss": 0.0, "distillation_loss": 0.22412827610969543, "epoch": 12.18, "learning_rate": 4.211487053902858e-05, "loss": 0.2676, "step": 33700, "task_loss": 0.4862504005432129 }, { "compression_loss": 0.0, "distillation_loss": 0.24602150917053223, "epoch": 12.18, "learning_rate": 4.203703521008891e-05, "loss": 0.2841, "step": 33710, "task_loss": 0.6615269184112549 }, { "compression_loss": 0.0, "distillation_loss": 0.2593457102775574, "epoch": 12.19, "learning_rate": 4.195916359582938e-05, "loss": 0.2693, "step": 33720, "task_loss": 0.8488281965255737 }, { "compression_loss": 0.0, "distillation_loss": 0.27883416414260864, "epoch": 12.19, "learning_rate": 4.188125609778219e-05, "loss": 0.3318, "step": 33730, "task_loss": 0.54170823097229 }, { "compression_loss": 0.0, "distillation_loss": 0.2291126549243927, "epoch": 12.19, "learning_rate": 4.180331311766464e-05, "loss": 0.3152, "step": 33740, "task_loss": 0.2940157651901245 }, { "compression_loss": 0.0, "distillation_loss": 0.2450833022594452, "epoch": 12.2, "learning_rate": 4.1725335057376915e-05, "loss": 0.2688, "step": 33750, "task_loss": 0.35134434700012207 }, { "epoch": 12.2, "eval_exact_match": 83.12204351939451, "eval_f1": 89.85422216477365, "step": 33750 }, { "compression_loss": 0.0, "distillation_loss": 0.2731408178806305, "epoch": 12.2, "learning_rate": 4.164732231900013e-05, "loss": 0.2812, "step": 33760, "task_loss": 0.45388272404670715 }, { "compression_loss": 0.0, "distillation_loss": 0.23454083502292633, "epoch": 12.2, "learning_rate": 4.15692753047942e-05, "loss": 0.2674, "step": 33770, "task_loss": 0.18816009163856506 }, { "compression_loss": 0.0, "distillation_loss": 0.25454145669937134, "epoch": 12.21, "learning_rate": 4.149119441719576e-05, "loss": 0.3194, "step": 33780, "task_loss": 0.46342360973358154 }, { "compression_loss": 0.0, "distillation_loss": 0.21274858713150024, "epoch": 12.21, "learning_rate": 4.141308005881614e-05, "loss": 0.2808, "step": 33790, "task_loss": 0.5964909195899963 }, { "compression_loss": 0.0, "distillation_loss": 0.2354692816734314, "epoch": 12.22, "learning_rate": 4.133493263243922e-05, "loss": 0.2998, "step": 33800, "task_loss": 0.20641979575157166 }, { "compression_loss": 0.0, "distillation_loss": 0.19088518619537354, "epoch": 12.22, "learning_rate": 4.1256752541019415e-05, "loss": 0.2725, "step": 33810, "task_loss": 0.28886133432388306 }, { "compression_loss": 0.0, "distillation_loss": 0.3029336631298065, "epoch": 12.22, "learning_rate": 4.1178540187679585e-05, "loss": 0.2913, "step": 33820, "task_loss": 0.6475502252578735 }, { "compression_loss": 0.0, "distillation_loss": 0.2550453841686249, "epoch": 12.23, "learning_rate": 4.1100295975708904e-05, "loss": 0.2773, "step": 33830, "task_loss": 0.3866943120956421 }, { "compression_loss": 0.0, "distillation_loss": 0.26052963733673096, "epoch": 12.23, "learning_rate": 4.102202030856085e-05, "loss": 0.2875, "step": 33840, "task_loss": 0.4841282367706299 }, { "compression_loss": 0.0, "distillation_loss": 0.36123284697532654, "epoch": 12.23, "learning_rate": 4.0943713589851066e-05, "loss": 0.2902, "step": 33850, "task_loss": 0.39048629999160767 }, { "compression_loss": 0.0, "distillation_loss": 0.25504371523857117, "epoch": 12.24, "learning_rate": 4.086537622335534e-05, "loss": 0.2961, "step": 33860, "task_loss": 0.6866843700408936 }, { "compression_loss": 0.0, "distillation_loss": 0.23795445263385773, "epoch": 12.24, "learning_rate": 4.0787008613007484e-05, "loss": 0.2726, "step": 33870, "task_loss": 0.35257768630981445 }, { "compression_loss": 0.0, "distillation_loss": 0.23061011731624603, "epoch": 12.24, "learning_rate": 4.070861116289723e-05, "loss": 0.2754, "step": 33880, "task_loss": 0.5739815831184387 }, { "compression_loss": 0.0, "distillation_loss": 0.35582464933395386, "epoch": 12.25, "learning_rate": 4.063018427726821e-05, "loss": 0.3302, "step": 33890, "task_loss": 0.5090781450271606 }, { "compression_loss": 0.0, "distillation_loss": 0.19990065693855286, "epoch": 12.25, "learning_rate": 4.05517283605158e-05, "loss": 0.2506, "step": 33900, "task_loss": 0.41246160864830017 }, { "compression_loss": 0.0, "distillation_loss": 0.19301462173461914, "epoch": 12.26, "learning_rate": 4.047324381718511e-05, "loss": 0.2695, "step": 33910, "task_loss": 0.27115631103515625 }, { "compression_loss": 0.0, "distillation_loss": 0.2891654968261719, "epoch": 12.26, "learning_rate": 4.039473105196883e-05, "loss": 0.2885, "step": 33920, "task_loss": 0.4743727445602417 }, { "compression_loss": 0.0, "distillation_loss": 0.21699106693267822, "epoch": 12.26, "learning_rate": 4.031619046970517e-05, "loss": 0.2759, "step": 33930, "task_loss": 0.4584755301475525 }, { "compression_loss": 0.0, "distillation_loss": 0.2696276903152466, "epoch": 12.27, "learning_rate": 4.02376224753758e-05, "loss": 0.2789, "step": 33940, "task_loss": 0.42498350143432617 }, { "compression_loss": 0.0, "distillation_loss": 0.3191455602645874, "epoch": 12.27, "learning_rate": 4.01590274741037e-05, "loss": 0.3073, "step": 33950, "task_loss": 0.5300410985946655 }, { "compression_loss": 0.0, "distillation_loss": 0.23330631852149963, "epoch": 12.27, "learning_rate": 4.008040587115112e-05, "loss": 0.2813, "step": 33960, "task_loss": 0.3184712529182434 }, { "compression_loss": 0.0, "distillation_loss": 0.2462092638015747, "epoch": 12.28, "learning_rate": 4.000175807191752e-05, "loss": 0.2358, "step": 33970, "task_loss": 0.5075340270996094 }, { "compression_loss": 0.0, "distillation_loss": 0.27791628241539, "epoch": 12.28, "learning_rate": 3.992308448193736e-05, "loss": 0.275, "step": 33980, "task_loss": 0.5956826210021973 }, { "compression_loss": 0.0, "distillation_loss": 0.28359711170196533, "epoch": 12.28, "learning_rate": 3.984438550687815e-05, "loss": 0.2697, "step": 33990, "task_loss": 0.415661484003067 }, { "compression_loss": 0.0, "distillation_loss": 0.28219592571258545, "epoch": 12.29, "learning_rate": 3.976566155253826e-05, "loss": 0.3357, "step": 34000, "task_loss": 0.6862603425979614 }, { "epoch": 12.29, "eval_exact_match": 83.07473982970671, "eval_f1": 89.81688188992857, "step": 34000 }, { "compression_loss": 0.0, "distillation_loss": 0.23105570673942566, "epoch": 12.29, "learning_rate": 3.9686913024844855e-05, "loss": 0.2561, "step": 34010, "task_loss": 0.22716933488845825 }, { "compression_loss": 0.0, "distillation_loss": 0.23710978031158447, "epoch": 12.29, "learning_rate": 3.960814032985186e-05, "loss": 0.2952, "step": 34020, "task_loss": 0.43777596950531006 }, { "compression_loss": 0.0, "distillation_loss": 0.3221627175807953, "epoch": 12.3, "learning_rate": 3.952934387373775e-05, "loss": 0.3072, "step": 34030, "task_loss": 0.644108235836029 }, { "compression_loss": 0.0, "distillation_loss": 0.31833896040916443, "epoch": 12.3, "learning_rate": 3.945052406280356e-05, "loss": 0.2708, "step": 34040, "task_loss": 0.3173179626464844 }, { "compression_loss": 0.0, "distillation_loss": 0.20409566164016724, "epoch": 12.31, "learning_rate": 3.937168130347074e-05, "loss": 0.3017, "step": 34050, "task_loss": 0.2745077610015869 }, { "compression_loss": 0.0, "distillation_loss": 0.21003592014312744, "epoch": 12.31, "learning_rate": 3.9292816002279055e-05, "loss": 0.2715, "step": 34060, "task_loss": 0.48984411358833313 }, { "compression_loss": 0.0, "distillation_loss": 0.24261227250099182, "epoch": 12.31, "learning_rate": 3.921392856588455e-05, "loss": 0.2858, "step": 34070, "task_loss": 0.5235458016395569 }, { "compression_loss": 0.0, "distillation_loss": 0.2940651476383209, "epoch": 12.32, "learning_rate": 3.9135019401057337e-05, "loss": 0.2886, "step": 34080, "task_loss": 0.642257571220398 }, { "compression_loss": 0.0, "distillation_loss": 0.36813968420028687, "epoch": 12.32, "learning_rate": 3.9056088914679635e-05, "loss": 0.2887, "step": 34090, "task_loss": 0.4939306974411011 }, { "compression_loss": 0.0, "distillation_loss": 0.2847978472709656, "epoch": 12.32, "learning_rate": 3.897713751374355e-05, "loss": 0.275, "step": 34100, "task_loss": 0.32528945803642273 }, { "compression_loss": 0.0, "distillation_loss": 0.23599255084991455, "epoch": 12.33, "learning_rate": 3.889816560534907e-05, "loss": 0.2762, "step": 34110, "task_loss": 0.4346529543399811 }, { "compression_loss": 0.0, "distillation_loss": 0.23587912321090698, "epoch": 12.33, "learning_rate": 3.881917359670191e-05, "loss": 0.3107, "step": 34120, "task_loss": 0.9650452136993408 }, { "compression_loss": 0.0, "distillation_loss": 0.35129642486572266, "epoch": 12.33, "learning_rate": 3.874016189511141e-05, "loss": 0.2857, "step": 34130, "task_loss": 0.754157543182373 }, { "compression_loss": 0.0, "distillation_loss": 0.21880073845386505, "epoch": 12.34, "learning_rate": 3.8661130907988505e-05, "loss": 0.2876, "step": 34140, "task_loss": 0.7854479551315308 }, { "compression_loss": 0.0, "distillation_loss": 0.23840254545211792, "epoch": 12.34, "learning_rate": 3.85820810428435e-05, "loss": 0.2877, "step": 34150, "task_loss": 0.46722444891929626 }, { "compression_loss": 0.0, "distillation_loss": 0.28145772218704224, "epoch": 12.35, "learning_rate": 3.850301270728412e-05, "loss": 0.2828, "step": 34160, "task_loss": 0.25351595878601074 }, { "compression_loss": 0.0, "distillation_loss": 0.24102526903152466, "epoch": 12.35, "learning_rate": 3.8423926309013267e-05, "loss": 0.3016, "step": 34170, "task_loss": 0.599195659160614 }, { "compression_loss": 0.0, "distillation_loss": 0.2735949456691742, "epoch": 12.35, "learning_rate": 3.834482225582701e-05, "loss": 0.2608, "step": 34180, "task_loss": 0.5106882452964783 }, { "compression_loss": 0.0, "distillation_loss": 0.30498093366622925, "epoch": 12.36, "learning_rate": 3.826570095561245e-05, "loss": 0.2833, "step": 34190, "task_loss": 0.5841542482376099 }, { "compression_loss": 0.0, "distillation_loss": 0.26933300495147705, "epoch": 12.36, "learning_rate": 3.8186562816345615e-05, "loss": 0.2951, "step": 34200, "task_loss": 0.6943976879119873 }, { "compression_loss": 0.0, "distillation_loss": 0.23551398515701294, "epoch": 12.36, "learning_rate": 3.8107408246089365e-05, "loss": 0.2878, "step": 34210, "task_loss": 0.5216627717018127 }, { "compression_loss": 0.0, "distillation_loss": 0.16981089115142822, "epoch": 12.37, "learning_rate": 3.802823765299128e-05, "loss": 0.2675, "step": 34220, "task_loss": 0.4077721834182739 }, { "compression_loss": 0.0, "distillation_loss": 0.32236146926879883, "epoch": 12.37, "learning_rate": 3.794905144528156e-05, "loss": 0.2892, "step": 34230, "task_loss": 0.38724401593208313 }, { "compression_loss": 0.0, "distillation_loss": 0.3376367688179016, "epoch": 12.37, "learning_rate": 3.7869850031270916e-05, "loss": 0.3007, "step": 34240, "task_loss": 0.293702632188797 }, { "compression_loss": 0.0, "distillation_loss": 0.3073074519634247, "epoch": 12.38, "learning_rate": 3.7790633819348476e-05, "loss": 0.3085, "step": 34250, "task_loss": 0.5922950506210327 }, { "epoch": 12.38, "eval_exact_match": 83.11258278145695, "eval_f1": 89.88753195084642, "step": 34250 }, { "compression_loss": 0.0, "distillation_loss": 0.35342901945114136, "epoch": 12.38, "learning_rate": 3.771140321797967e-05, "loss": 0.3038, "step": 34260, "task_loss": 0.5709468126296997 }, { "compression_loss": 0.0, "distillation_loss": 0.329917311668396, "epoch": 12.39, "learning_rate": 3.7632158635704116e-05, "loss": 0.2948, "step": 34270, "task_loss": 0.670756995677948 }, { "compression_loss": 0.0, "distillation_loss": 0.2711179554462433, "epoch": 12.39, "learning_rate": 3.755290048113352e-05, "loss": 0.2785, "step": 34280, "task_loss": 0.5022716522216797 }, { "compression_loss": 0.0, "distillation_loss": 0.27370622754096985, "epoch": 12.39, "learning_rate": 3.747362916294959e-05, "loss": 0.2923, "step": 34290, "task_loss": 0.7954187393188477 }, { "compression_loss": 0.0, "distillation_loss": 0.31736844778060913, "epoch": 12.4, "learning_rate": 3.7394345089901914e-05, "loss": 0.2757, "step": 34300, "task_loss": 0.5828770399093628 }, { "compression_loss": 0.0, "distillation_loss": 0.2652580440044403, "epoch": 12.4, "learning_rate": 3.731504867080579e-05, "loss": 0.2822, "step": 34310, "task_loss": 0.7285773754119873 }, { "compression_loss": 0.0, "distillation_loss": 0.31226301193237305, "epoch": 12.4, "learning_rate": 3.723574031454026e-05, "loss": 0.278, "step": 34320, "task_loss": 0.802804708480835 }, { "compression_loss": 0.0, "distillation_loss": 0.33275431394577026, "epoch": 12.41, "learning_rate": 3.715642043004586e-05, "loss": 0.2922, "step": 34330, "task_loss": 0.6281599402427673 }, { "compression_loss": 0.0, "distillation_loss": 0.28947165608406067, "epoch": 12.41, "learning_rate": 3.7077089426322586e-05, "loss": 0.3, "step": 34340, "task_loss": 0.38332265615463257 }, { "compression_loss": 0.0, "distillation_loss": 0.28193265199661255, "epoch": 12.41, "learning_rate": 3.699774771242778e-05, "loss": 0.2769, "step": 34350, "task_loss": 0.4479888677597046 }, { "compression_loss": 0.0, "distillation_loss": 0.22736775875091553, "epoch": 12.42, "learning_rate": 3.691839569747399e-05, "loss": 0.2843, "step": 34360, "task_loss": 0.5466663837432861 }, { "compression_loss": 0.0, "distillation_loss": 0.23163321614265442, "epoch": 12.42, "learning_rate": 3.68390337906269e-05, "loss": 0.2998, "step": 34370, "task_loss": 0.5594272613525391 }, { "compression_loss": 0.0, "distillation_loss": 0.22144640982151031, "epoch": 12.43, "learning_rate": 3.675966240110316e-05, "loss": 0.2973, "step": 34380, "task_loss": 0.4239853024482727 }, { "compression_loss": 0.0, "distillation_loss": 0.24784286320209503, "epoch": 12.43, "learning_rate": 3.668028193816837e-05, "loss": 0.2847, "step": 34390, "task_loss": 0.42308953404426575 }, { "compression_loss": 0.0, "distillation_loss": 0.245379239320755, "epoch": 12.43, "learning_rate": 3.6600892811134887e-05, "loss": 0.2732, "step": 34400, "task_loss": 0.4906768500804901 }, { "compression_loss": 0.0, "distillation_loss": 0.24077722430229187, "epoch": 12.44, "learning_rate": 3.652149542935974e-05, "loss": 0.2933, "step": 34410, "task_loss": 0.7623639702796936 }, { "compression_loss": 0.0, "distillation_loss": 0.27252477407455444, "epoch": 12.44, "learning_rate": 3.644209020224254e-05, "loss": 0.2908, "step": 34420, "task_loss": 0.45984989404678345 }, { "compression_loss": 0.0, "distillation_loss": 0.2691285014152527, "epoch": 12.44, "learning_rate": 3.6362677539223316e-05, "loss": 0.3059, "step": 34430, "task_loss": 0.5970430970191956 }, { "compression_loss": 0.0, "distillation_loss": 0.23063981533050537, "epoch": 12.45, "learning_rate": 3.628325784978048e-05, "loss": 0.2843, "step": 34440, "task_loss": 0.48113882541656494 }, { "compression_loss": 0.0, "distillation_loss": 0.3235628306865692, "epoch": 12.45, "learning_rate": 3.620383154342866e-05, "loss": 0.2779, "step": 34450, "task_loss": 0.42377451062202454 }, { "compression_loss": 0.0, "distillation_loss": 0.26905784010887146, "epoch": 12.45, "learning_rate": 3.612439902971659e-05, "loss": 0.2975, "step": 34460, "task_loss": 0.45585575699806213 }, { "compression_loss": 0.0, "distillation_loss": 0.26199162006378174, "epoch": 12.46, "learning_rate": 3.604496071822503e-05, "loss": 0.296, "step": 34470, "task_loss": 0.3905210494995117 }, { "compression_loss": 0.0, "distillation_loss": 0.24511463940143585, "epoch": 12.46, "learning_rate": 3.596551701856461e-05, "loss": 0.2921, "step": 34480, "task_loss": 0.36547398567199707 }, { "compression_loss": 0.0, "distillation_loss": 0.23199574649333954, "epoch": 12.46, "learning_rate": 3.5886068340373774e-05, "loss": 0.2606, "step": 34490, "task_loss": 0.40434420108795166 }, { "compression_loss": 0.0, "distillation_loss": 0.20520880818367004, "epoch": 12.47, "learning_rate": 3.580661509331662e-05, "loss": 0.2554, "step": 34500, "task_loss": 0.35260075330734253 }, { "epoch": 12.47, "eval_exact_match": 83.46263008514664, "eval_f1": 90.16885980331539, "step": 34500 }, { "compression_loss": 0.0, "distillation_loss": 0.2748667597770691, "epoch": 12.47, "learning_rate": 3.572715768708081e-05, "loss": 0.2598, "step": 34510, "task_loss": 0.41463881731033325 }, { "compression_loss": 0.0, "distillation_loss": 0.2754099369049072, "epoch": 12.48, "learning_rate": 3.564769653137545e-05, "loss": 0.3053, "step": 34520, "task_loss": 0.4715232253074646 }, { "compression_loss": 0.0, "distillation_loss": 0.21734142303466797, "epoch": 12.48, "learning_rate": 3.556823203592897e-05, "loss": 0.2774, "step": 34530, "task_loss": 0.37835949659347534 }, { "compression_loss": 0.0, "distillation_loss": 0.25816744565963745, "epoch": 12.48, "learning_rate": 3.548876461048703e-05, "loss": 0.2882, "step": 34540, "task_loss": 0.409110426902771 }, { "compression_loss": 0.0, "distillation_loss": 0.3097730875015259, "epoch": 12.49, "learning_rate": 3.5409294664810414e-05, "loss": 0.2758, "step": 34550, "task_loss": 0.6100963354110718 }, { "compression_loss": 0.0, "distillation_loss": 0.28760984539985657, "epoch": 12.49, "learning_rate": 3.5329822608672863e-05, "loss": 0.3113, "step": 34560, "task_loss": 0.49667757749557495 }, { "compression_loss": 0.0, "distillation_loss": 0.558922290802002, "epoch": 12.49, "learning_rate": 3.52582962923918e-05, "loss": 0.3384, "step": 34570, "task_loss": 0.5127227306365967 }, { "compression_loss": 0.0, "distillation_loss": 0.27977892756462097, "epoch": 12.5, "learning_rate": 3.517882135534249e-05, "loss": 0.2669, "step": 34580, "task_loss": 0.39960137009620667 }, { "compression_loss": 0.0, "distillation_loss": 0.253059446811676, "epoch": 12.5, "learning_rate": 3.509934549623014e-05, "loss": 0.2768, "step": 34590, "task_loss": 0.5317621231079102 }, { "compression_loss": 0.0, "distillation_loss": 0.2835026681423187, "epoch": 12.5, "learning_rate": 3.501986912485901e-05, "loss": 0.2904, "step": 34600, "task_loss": 0.4835074841976166 }, { "compression_loss": 0.0, "distillation_loss": 0.25000596046447754, "epoch": 12.51, "learning_rate": 3.494039265103597e-05, "loss": 0.2765, "step": 34610, "task_loss": 0.3324766755104065 }, { "compression_loss": 0.0, "distillation_loss": 0.21621635556221008, "epoch": 12.51, "learning_rate": 3.486091648456848e-05, "loss": 0.2912, "step": 34620, "task_loss": 0.3933340907096863 }, { "compression_loss": 0.0, "distillation_loss": 0.17932358384132385, "epoch": 12.52, "learning_rate": 3.478144103526236e-05, "loss": 0.269, "step": 34630, "task_loss": 0.4845110774040222 }, { "compression_loss": 0.0, "distillation_loss": 0.23999449610710144, "epoch": 12.52, "learning_rate": 3.470196671291976e-05, "loss": 0.2881, "step": 34640, "task_loss": 0.4917372763156891 }, { "compression_loss": 0.0, "distillation_loss": 0.405902624130249, "epoch": 12.52, "learning_rate": 3.4622493927337014e-05, "loss": 0.2936, "step": 34650, "task_loss": 0.43812593817710876 }, { "compression_loss": 0.0, "distillation_loss": 0.2771126925945282, "epoch": 12.53, "learning_rate": 3.4543023088302515e-05, "loss": 0.297, "step": 34660, "task_loss": 0.2483316957950592 }, { "compression_loss": 0.0, "distillation_loss": 0.23978747427463531, "epoch": 12.53, "learning_rate": 3.4463554605594636e-05, "loss": 0.3009, "step": 34670, "task_loss": 0.6507888436317444 }, { "compression_loss": 0.0, "distillation_loss": 0.2043863832950592, "epoch": 12.53, "learning_rate": 3.438408888897961e-05, "loss": 0.282, "step": 34680, "task_loss": 0.5260913372039795 }, { "compression_loss": 0.0, "distillation_loss": 0.2058839201927185, "epoch": 12.54, "learning_rate": 3.4304626348209364e-05, "loss": 0.2655, "step": 34690, "task_loss": 0.3453657031059265 }, { "compression_loss": 0.0, "distillation_loss": 0.39385679364204407, "epoch": 12.54, "learning_rate": 3.4225167393019515e-05, "loss": 0.2896, "step": 34700, "task_loss": 0.8374224305152893 }, { "compression_loss": 0.0, "distillation_loss": 0.28174975514411926, "epoch": 12.54, "learning_rate": 3.4145712433127126e-05, "loss": 0.2754, "step": 34710, "task_loss": 0.2922405004501343 }, { "compression_loss": 0.0, "distillation_loss": 0.22185012698173523, "epoch": 12.55, "learning_rate": 3.4066261878228706e-05, "loss": 0.2613, "step": 34720, "task_loss": 0.3246780335903168 }, { "compression_loss": 0.0, "distillation_loss": 0.2959524989128113, "epoch": 12.55, "learning_rate": 3.398681613799803e-05, "loss": 0.2727, "step": 34730, "task_loss": 0.3286215662956238 }, { "compression_loss": 0.0, "distillation_loss": 0.17410224676132202, "epoch": 12.56, "learning_rate": 3.390737562208405e-05, "loss": 0.2764, "step": 34740, "task_loss": 0.3103204667568207 }, { "compression_loss": 0.0, "distillation_loss": 0.26146069169044495, "epoch": 12.56, "learning_rate": 3.3827940740108764e-05, "loss": 0.3019, "step": 34750, "task_loss": 0.49389928579330444 }, { "epoch": 12.56, "eval_exact_match": 83.30179754020814, "eval_f1": 89.99643864393832, "step": 34750 }, { "compression_loss": 0.0, "distillation_loss": 0.17524585127830505, "epoch": 12.56, "learning_rate": 3.374851190166516e-05, "loss": 0.2547, "step": 34760, "task_loss": 0.23321254551410675 }, { "compression_loss": 0.0, "distillation_loss": 0.3621963858604431, "epoch": 12.57, "learning_rate": 3.366908951631501e-05, "loss": 0.3203, "step": 34770, "task_loss": 0.8130776882171631 }, { "compression_loss": 0.0, "distillation_loss": 0.26339295506477356, "epoch": 12.57, "learning_rate": 3.3589673993586866e-05, "loss": 0.2794, "step": 34780, "task_loss": 0.4487806558609009 }, { "compression_loss": 0.0, "distillation_loss": 0.2658938765525818, "epoch": 12.57, "learning_rate": 3.351026574297384e-05, "loss": 0.2782, "step": 34790, "task_loss": 0.472459614276886 }, { "compression_loss": 0.0, "distillation_loss": 0.22098635137081146, "epoch": 12.58, "learning_rate": 3.343086517393162e-05, "loss": 0.2921, "step": 34800, "task_loss": 0.45475247502326965 }, { "compression_loss": 0.0, "distillation_loss": 0.2744857370853424, "epoch": 12.58, "learning_rate": 3.335147269587619e-05, "loss": 0.2564, "step": 34810, "task_loss": 0.5082106590270996 }, { "compression_loss": 0.0, "distillation_loss": 0.22003915905952454, "epoch": 12.58, "learning_rate": 3.327208871818189e-05, "loss": 0.2594, "step": 34820, "task_loss": 0.3585473895072937 }, { "compression_loss": 0.0, "distillation_loss": 0.34640657901763916, "epoch": 12.59, "learning_rate": 3.3192713650179204e-05, "loss": 0.3076, "step": 34830, "task_loss": 0.5335635542869568 }, { "compression_loss": 0.0, "distillation_loss": 0.26574617624282837, "epoch": 12.59, "learning_rate": 3.311334790115267e-05, "loss": 0.2817, "step": 34840, "task_loss": 0.4539814591407776 }, { "compression_loss": 0.0, "distillation_loss": 0.25745895504951477, "epoch": 12.59, "learning_rate": 3.303399188033876e-05, "loss": 0.2834, "step": 34850, "task_loss": 0.32244840264320374 }, { "compression_loss": 0.0, "distillation_loss": 0.21593958139419556, "epoch": 12.6, "learning_rate": 3.295464599692384e-05, "loss": 0.2987, "step": 34860, "task_loss": 0.6063805818557739 }, { "compression_loss": 0.0, "distillation_loss": 0.25682130455970764, "epoch": 12.6, "learning_rate": 3.287531066004193e-05, "loss": 0.2643, "step": 34870, "task_loss": 0.38414889574050903 }, { "compression_loss": 0.0, "distillation_loss": 0.2547285556793213, "epoch": 12.61, "learning_rate": 3.279598627877273e-05, "loss": 0.2656, "step": 34880, "task_loss": 0.31116461753845215 }, { "compression_loss": 0.0, "distillation_loss": 0.2546624541282654, "epoch": 12.61, "learning_rate": 3.2716673262139395e-05, "loss": 0.2939, "step": 34890, "task_loss": 0.37165433168411255 }, { "compression_loss": 0.0, "distillation_loss": 0.289786696434021, "epoch": 12.61, "learning_rate": 3.2637372019106546e-05, "loss": 0.265, "step": 34900, "task_loss": 0.6424096822738647 }, { "compression_loss": 0.0, "distillation_loss": 0.3912149667739868, "epoch": 12.62, "learning_rate": 3.255808295857803e-05, "loss": 0.2967, "step": 34910, "task_loss": 0.8441682457923889 }, { "compression_loss": 0.0, "distillation_loss": 0.2348523736000061, "epoch": 12.62, "learning_rate": 3.247880648939492e-05, "loss": 0.2771, "step": 34920, "task_loss": 0.5961621999740601 }, { "compression_loss": 0.0, "distillation_loss": 0.30086714029312134, "epoch": 12.62, "learning_rate": 3.239954302033335e-05, "loss": 0.2532, "step": 34930, "task_loss": 0.5938539505004883 }, { "compression_loss": 0.0, "distillation_loss": 0.24177315831184387, "epoch": 12.63, "learning_rate": 3.232029296010241e-05, "loss": 0.266, "step": 34940, "task_loss": 0.607329249382019 }, { "compression_loss": 0.0, "distillation_loss": 0.2449285387992859, "epoch": 12.63, "learning_rate": 3.224105671734206e-05, "loss": 0.2816, "step": 34950, "task_loss": 0.5142256617546082 }, { "compression_loss": 0.0, "distillation_loss": 0.3181091248989105, "epoch": 12.63, "learning_rate": 3.2161834700621026e-05, "loss": 0.2875, "step": 34960, "task_loss": 0.6336065530776978 }, { "compression_loss": 0.0, "distillation_loss": 0.23876434564590454, "epoch": 12.64, "learning_rate": 3.2082627318434634e-05, "loss": 0.2967, "step": 34970, "task_loss": 0.4739626944065094 }, { "compression_loss": 0.0, "distillation_loss": 0.2478228509426117, "epoch": 12.64, "learning_rate": 3.2003434979202836e-05, "loss": 0.2556, "step": 34980, "task_loss": 0.5977648496627808 }, { "compression_loss": 0.0, "distillation_loss": 0.3044435977935791, "epoch": 12.65, "learning_rate": 3.19242580912679e-05, "loss": 0.2845, "step": 34990, "task_loss": 0.6682428121566772 }, { "compression_loss": 0.0, "distillation_loss": 0.25522321462631226, "epoch": 12.65, "learning_rate": 3.1845097062892545e-05, "loss": 0.2961, "step": 35000, "task_loss": 0.6363422870635986 }, { "epoch": 12.65, "eval_exact_match": 83.68968779564806, "eval_f1": 90.17109381218279, "step": 35000 }, { "compression_loss": 0.0, "distillation_loss": 0.3207133710384369, "epoch": 12.65, "learning_rate": 3.1765952302257604e-05, "loss": 0.2852, "step": 35010, "task_loss": 0.6862952709197998 }, { "compression_loss": 0.0, "distillation_loss": 0.2543371319770813, "epoch": 12.66, "learning_rate": 3.1686824217460105e-05, "loss": 0.2625, "step": 35020, "task_loss": 0.505595326423645 }, { "compression_loss": 0.0, "distillation_loss": 0.2681245505809784, "epoch": 12.66, "learning_rate": 3.160771321651105e-05, "loss": 0.2769, "step": 35030, "task_loss": 0.6629229187965393 }, { "compression_loss": 0.0, "distillation_loss": 0.2507745027542114, "epoch": 12.66, "learning_rate": 3.152861970733336e-05, "loss": 0.2827, "step": 35040, "task_loss": 0.600731611251831 }, { "compression_loss": 0.0, "distillation_loss": 0.19638881087303162, "epoch": 12.67, "learning_rate": 3.144954409775978e-05, "loss": 0.2881, "step": 35050, "task_loss": 0.39548712968826294 }, { "compression_loss": 0.0, "distillation_loss": 0.2732619047164917, "epoch": 12.67, "learning_rate": 3.1370486795530724e-05, "loss": 0.2787, "step": 35060, "task_loss": 0.4209481477737427 }, { "compression_loss": 0.0, "distillation_loss": 0.21758723258972168, "epoch": 12.67, "learning_rate": 3.129144820829223e-05, "loss": 0.289, "step": 35070, "task_loss": 0.27617648243904114 }, { "compression_loss": 0.0, "distillation_loss": 0.26905715465545654, "epoch": 12.68, "learning_rate": 3.1212428743593856e-05, "loss": 0.2781, "step": 35080, "task_loss": 0.3222195506095886 }, { "compression_loss": 0.0, "distillation_loss": 0.309938907623291, "epoch": 12.68, "learning_rate": 3.113342880888649e-05, "loss": 0.3007, "step": 35090, "task_loss": 0.4127185642719269 }, { "compression_loss": 0.0, "distillation_loss": 0.23867590725421906, "epoch": 12.69, "learning_rate": 3.10544488115204e-05, "loss": 0.2763, "step": 35100, "task_loss": 0.7365835905075073 }, { "compression_loss": 0.0, "distillation_loss": 0.26832306385040283, "epoch": 12.69, "learning_rate": 3.097548915874299e-05, "loss": 0.2743, "step": 35110, "task_loss": 0.39295315742492676 }, { "compression_loss": 0.0, "distillation_loss": 0.4001271724700928, "epoch": 12.69, "learning_rate": 3.08965502576968e-05, "loss": 0.3228, "step": 35120, "task_loss": 1.1282687187194824 }, { "compression_loss": 0.0, "distillation_loss": 0.25665977597236633, "epoch": 12.7, "learning_rate": 3.081763251541732e-05, "loss": 0.2797, "step": 35130, "task_loss": 0.37274888157844543 }, { "compression_loss": 0.0, "distillation_loss": 0.238399475812912, "epoch": 12.7, "learning_rate": 3.0746624974438324e-05, "loss": 0.303, "step": 35140, "task_loss": 0.32780641317367554 }, { "compression_loss": 0.0, "distillation_loss": 0.20166674256324768, "epoch": 12.7, "learning_rate": 3.0667748554806034e-05, "loss": 0.2461, "step": 35150, "task_loss": 0.15192970633506775 }, { "compression_loss": 0.0, "distillation_loss": 0.20919165015220642, "epoch": 12.71, "learning_rate": 3.058889447371902e-05, "loss": 0.2623, "step": 35160, "task_loss": 0.45256832242012024 }, { "compression_loss": 0.0, "distillation_loss": 0.24076633155345917, "epoch": 12.71, "learning_rate": 3.0510063137775374e-05, "loss": 0.2748, "step": 35170, "task_loss": 0.4359937608242035 }, { "compression_loss": 0.0, "distillation_loss": 0.24580487608909607, "epoch": 12.71, "learning_rate": 3.0431254953456025e-05, "loss": 0.3164, "step": 35180, "task_loss": 0.3960850238800049 }, { "compression_loss": 0.0, "distillation_loss": 0.3059123754501343, "epoch": 12.72, "learning_rate": 3.0352470327122443e-05, "loss": 0.2723, "step": 35190, "task_loss": 0.7698179483413696 }, { "compression_loss": 0.0, "distillation_loss": 0.2135799527168274, "epoch": 12.72, "learning_rate": 3.0273709665014673e-05, "loss": 0.2579, "step": 35200, "task_loss": 0.4562651515007019 }, { "compression_loss": 0.0, "distillation_loss": 0.2941896915435791, "epoch": 12.72, "learning_rate": 3.0194973373249165e-05, "loss": 0.2959, "step": 35210, "task_loss": 0.7255433797836304 }, { "compression_loss": 0.0, "distillation_loss": 0.26296886801719666, "epoch": 12.73, "learning_rate": 3.011626185781672e-05, "loss": 0.278, "step": 35220, "task_loss": 0.3990821838378906 }, { "compression_loss": 0.0, "distillation_loss": 0.22505085170269012, "epoch": 12.73, "learning_rate": 3.0037575524580362e-05, "loss": 0.2853, "step": 35230, "task_loss": 0.31899845600128174 }, { "compression_loss": 0.0, "distillation_loss": 0.355307400226593, "epoch": 12.74, "learning_rate": 2.9958914779273314e-05, "loss": 0.2866, "step": 35240, "task_loss": 1.150566577911377 }, { "compression_loss": 0.0, "distillation_loss": 0.20547062158584595, "epoch": 12.74, "learning_rate": 2.988028002749679e-05, "loss": 0.2836, "step": 35250, "task_loss": 0.3349851667881012 }, { "epoch": 12.74, "eval_exact_match": 83.08420056764427, "eval_f1": 89.95081892512363, "step": 35250 }, { "compression_loss": 0.0, "distillation_loss": 0.31211623549461365, "epoch": 12.74, "learning_rate": 2.980167167471807e-05, "loss": 0.2894, "step": 35260, "task_loss": 0.522201657295227 }, { "compression_loss": 0.0, "distillation_loss": 0.20951145887374878, "epoch": 12.75, "learning_rate": 2.9723090126268194e-05, "loss": 0.2801, "step": 35270, "task_loss": 0.3398115634918213 }, { "compression_loss": 0.0, "distillation_loss": 0.25808507204055786, "epoch": 12.75, "learning_rate": 2.9644535787340092e-05, "loss": 0.2669, "step": 35280, "task_loss": 0.536829948425293 }, { "compression_loss": 0.0, "distillation_loss": 0.2925964295864105, "epoch": 12.75, "learning_rate": 2.9566009062986328e-05, "loss": 0.3058, "step": 35290, "task_loss": 0.49623316526412964 }, { "compression_loss": 0.0, "distillation_loss": 0.2753320336341858, "epoch": 12.76, "learning_rate": 2.9487510358117113e-05, "loss": 0.2797, "step": 35300, "task_loss": 0.265572190284729 }, { "compression_loss": 0.0, "distillation_loss": 0.193668395280838, "epoch": 12.76, "learning_rate": 2.9409040077498153e-05, "loss": 0.2522, "step": 35310, "task_loss": 0.2860652804374695 }, { "compression_loss": 0.0, "distillation_loss": 0.2943766713142395, "epoch": 12.76, "learning_rate": 2.9330598625748617e-05, "loss": 0.2603, "step": 35320, "task_loss": 0.687859296798706 }, { "compression_loss": 0.0, "distillation_loss": 0.22939196228981018, "epoch": 12.77, "learning_rate": 2.925218640733899e-05, "loss": 0.2641, "step": 35330, "task_loss": 0.977037787437439 }, { "compression_loss": 0.0, "distillation_loss": 0.262288898229599, "epoch": 12.77, "learning_rate": 2.9173803826589055e-05, "loss": 0.2829, "step": 35340, "task_loss": 0.4137219786643982 }, { "compression_loss": 0.0, "distillation_loss": 0.2971470355987549, "epoch": 12.78, "learning_rate": 2.909545128766573e-05, "loss": 0.2854, "step": 35350, "task_loss": 0.44294825196266174 }, { "compression_loss": 0.0, "distillation_loss": 0.27984118461608887, "epoch": 12.78, "learning_rate": 2.9017129194581096e-05, "loss": 0.26, "step": 35360, "task_loss": 0.3375154733657837 }, { "compression_loss": 0.0, "distillation_loss": 0.2255486100912094, "epoch": 12.78, "learning_rate": 2.893883795119015e-05, "loss": 0.2937, "step": 35370, "task_loss": 0.46902692317962646 }, { "compression_loss": 0.0, "distillation_loss": 0.299260675907135, "epoch": 12.79, "learning_rate": 2.8860577961188912e-05, "loss": 0.2452, "step": 35380, "task_loss": 0.5150102972984314 }, { "compression_loss": 0.0, "distillation_loss": 0.2901751399040222, "epoch": 12.79, "learning_rate": 2.8782349628112184e-05, "loss": 0.2674, "step": 35390, "task_loss": 1.1621662378311157 }, { "compression_loss": 0.0, "distillation_loss": 0.2164711356163025, "epoch": 12.79, "learning_rate": 2.870415335533157e-05, "loss": 0.2524, "step": 35400, "task_loss": 0.32596057653427124 }, { "compression_loss": 0.0, "distillation_loss": 0.20497137308120728, "epoch": 12.8, "learning_rate": 2.8625989546053335e-05, "loss": 0.2764, "step": 35410, "task_loss": 0.3471028506755829 }, { "compression_loss": 0.0, "distillation_loss": 0.26700571179389954, "epoch": 12.8, "learning_rate": 2.8547858603316378e-05, "loss": 0.3007, "step": 35420, "task_loss": 0.29583919048309326 }, { "compression_loss": 0.0, "distillation_loss": 0.27736860513687134, "epoch": 12.8, "learning_rate": 2.846976092999009e-05, "loss": 0.2877, "step": 35430, "task_loss": 0.562250018119812 }, { "compression_loss": 0.0, "distillation_loss": 0.23967581987380981, "epoch": 12.81, "learning_rate": 2.8391696928772366e-05, "loss": 0.2601, "step": 35440, "task_loss": 0.7210532426834106 }, { "compression_loss": 0.0, "distillation_loss": 0.25436583161354065, "epoch": 12.81, "learning_rate": 2.831366700218741e-05, "loss": 0.2864, "step": 35450, "task_loss": 0.4490795135498047 }, { "compression_loss": 0.0, "distillation_loss": 0.25366324186325073, "epoch": 12.82, "learning_rate": 2.8235671552583812e-05, "loss": 0.274, "step": 35460, "task_loss": 0.7202346324920654 }, { "compression_loss": 0.0, "distillation_loss": 0.24929600954055786, "epoch": 12.82, "learning_rate": 2.8157710982132277e-05, "loss": 0.3073, "step": 35470, "task_loss": 0.7886189222335815 }, { "compression_loss": 0.0, "distillation_loss": 0.21746213734149933, "epoch": 12.82, "learning_rate": 2.8079785692823776e-05, "loss": 0.2845, "step": 35480, "task_loss": 0.2715526819229126 }, { "compression_loss": 0.0, "distillation_loss": 0.24432429671287537, "epoch": 12.83, "learning_rate": 2.800189608646727e-05, "loss": 0.256, "step": 35490, "task_loss": 0.4983975291252136 }, { "compression_loss": 0.0, "distillation_loss": 0.28279516100883484, "epoch": 12.83, "learning_rate": 2.7924042564687786e-05, "loss": 0.2715, "step": 35500, "task_loss": 0.5286850333213806 }, { "epoch": 12.83, "eval_exact_match": 83.34910122989594, "eval_f1": 90.11317455780872, "step": 35500 }, { "compression_loss": 0.0, "distillation_loss": 0.22306914627552032, "epoch": 12.83, "learning_rate": 2.7846225528924238e-05, "loss": 0.2723, "step": 35510, "task_loss": 0.4120517373085022 }, { "compression_loss": 0.0, "distillation_loss": 0.252077579498291, "epoch": 12.84, "learning_rate": 2.7768445380427447e-05, "loss": 0.2739, "step": 35520, "task_loss": 0.3027448356151581 }, { "compression_loss": 0.0, "distillation_loss": 0.2874928116798401, "epoch": 12.84, "learning_rate": 2.7690702520258e-05, "loss": 0.2884, "step": 35530, "task_loss": 0.663945198059082 }, { "compression_loss": 0.0, "distillation_loss": 0.3042541742324829, "epoch": 12.84, "learning_rate": 2.7612997349284244e-05, "loss": 0.2923, "step": 35540, "task_loss": 0.8597861528396606 }, { "compression_loss": 0.0, "distillation_loss": 0.34958600997924805, "epoch": 12.85, "learning_rate": 2.753533026818014e-05, "loss": 0.2926, "step": 35550, "task_loss": 0.43330618739128113 }, { "compression_loss": 0.0, "distillation_loss": 0.265541136264801, "epoch": 12.85, "learning_rate": 2.7457701677423323e-05, "loss": 0.2763, "step": 35560, "task_loss": 0.4258151054382324 }, { "compression_loss": 0.0, "distillation_loss": 0.25206151604652405, "epoch": 12.86, "learning_rate": 2.7380111977292854e-05, "loss": 0.2988, "step": 35570, "task_loss": 0.5166367888450623 }, { "compression_loss": 0.0, "distillation_loss": 0.26590320467948914, "epoch": 12.86, "learning_rate": 2.7302561567867363e-05, "loss": 0.2942, "step": 35580, "task_loss": 0.9321345686912537 }, { "compression_loss": 0.0, "distillation_loss": 0.19803495705127716, "epoch": 12.86, "learning_rate": 2.722505084902282e-05, "loss": 0.2596, "step": 35590, "task_loss": 0.3814893960952759 }, { "compression_loss": 0.0, "distillation_loss": 0.21219594776630402, "epoch": 12.87, "learning_rate": 2.7147580220430556e-05, "loss": 0.2698, "step": 35600, "task_loss": 0.26240330934524536 }, { "compression_loss": 0.0, "distillation_loss": 0.274894118309021, "epoch": 12.87, "learning_rate": 2.7070150081555185e-05, "loss": 0.2635, "step": 35610, "task_loss": 0.6794742345809937 }, { "compression_loss": 0.0, "distillation_loss": 0.29060205817222595, "epoch": 12.87, "learning_rate": 2.699276083165254e-05, "loss": 0.3269, "step": 35620, "task_loss": 0.4442636966705322 }, { "compression_loss": 0.0, "distillation_loss": 0.2644478380680084, "epoch": 12.88, "learning_rate": 2.6915412869767616e-05, "loss": 0.2703, "step": 35630, "task_loss": 0.33713334798812866 }, { "compression_loss": 0.0, "distillation_loss": 0.2862852215766907, "epoch": 12.88, "learning_rate": 2.6838106594732518e-05, "loss": 0.2643, "step": 35640, "task_loss": 0.5014687180519104 }, { "compression_loss": 0.0, "distillation_loss": 0.2821693420410156, "epoch": 12.88, "learning_rate": 2.6760842405164378e-05, "loss": 0.2717, "step": 35650, "task_loss": 0.4597575068473816 }, { "compression_loss": 0.0, "distillation_loss": 0.2885741591453552, "epoch": 12.89, "learning_rate": 2.6683620699463366e-05, "loss": 0.3032, "step": 35660, "task_loss": 0.7140188217163086 }, { "compression_loss": 0.0, "distillation_loss": 0.23422183096408844, "epoch": 12.89, "learning_rate": 2.6606441875810548e-05, "loss": 0.2679, "step": 35670, "task_loss": 0.4431329369544983 }, { "compression_loss": 0.0, "distillation_loss": 0.2877368927001953, "epoch": 12.89, "learning_rate": 2.6529306332165898e-05, "loss": 0.286, "step": 35680, "task_loss": 0.5499070286750793 }, { "compression_loss": 0.0, "distillation_loss": 0.3035220503807068, "epoch": 12.9, "learning_rate": 2.645221446626621e-05, "loss": 0.291, "step": 35690, "task_loss": 0.6294026970863342 }, { "compression_loss": 0.0, "distillation_loss": 0.26873719692230225, "epoch": 12.9, "learning_rate": 2.6375166675623076e-05, "loss": 0.306, "step": 35700, "task_loss": 0.4272887706756592 }, { "compression_loss": 0.0, "distillation_loss": 0.26753750443458557, "epoch": 12.91, "learning_rate": 2.62981633575208e-05, "loss": 0.2603, "step": 35710, "task_loss": 0.4566969871520996 }, { "compression_loss": 0.0, "distillation_loss": 0.22760161757469177, "epoch": 12.91, "learning_rate": 2.62212049090144e-05, "loss": 0.2906, "step": 35720, "task_loss": 0.3315356373786926 }, { "compression_loss": 0.0, "distillation_loss": 0.22385570406913757, "epoch": 12.91, "learning_rate": 2.6144291726927492e-05, "loss": 0.2608, "step": 35730, "task_loss": 0.3490475118160248 }, { "compression_loss": 0.0, "distillation_loss": 0.22863799333572388, "epoch": 12.92, "learning_rate": 2.606742420785035e-05, "loss": 0.2873, "step": 35740, "task_loss": 0.3199641704559326 }, { "compression_loss": 0.0, "distillation_loss": 0.2786450982093811, "epoch": 12.92, "learning_rate": 2.5990602748137694e-05, "loss": 0.2993, "step": 35750, "task_loss": 0.9228396415710449 }, { "epoch": 12.92, "eval_exact_match": 83.40586565752129, "eval_f1": 90.15130214122858, "step": 35750 }, { "compression_loss": 0.0, "distillation_loss": 0.22297132015228271, "epoch": 12.92, "learning_rate": 2.5913827743906845e-05, "loss": 0.2731, "step": 35760, "task_loss": 0.8088899850845337 }, { "compression_loss": 0.0, "distillation_loss": 0.3312681317329407, "epoch": 12.93, "learning_rate": 2.5837099591035526e-05, "loss": 0.2676, "step": 35770, "task_loss": 0.6634341478347778 }, { "compression_loss": 0.0, "distillation_loss": 0.24431252479553223, "epoch": 12.93, "learning_rate": 2.57604186851599e-05, "loss": 0.2808, "step": 35780, "task_loss": 0.480110228061676 }, { "compression_loss": 0.0, "distillation_loss": 0.2923346161842346, "epoch": 12.93, "learning_rate": 2.5683785421672488e-05, "loss": 0.2755, "step": 35790, "task_loss": 0.431102454662323 }, { "compression_loss": 0.0, "distillation_loss": 0.2945399582386017, "epoch": 12.94, "learning_rate": 2.560720019572019e-05, "loss": 0.2777, "step": 35800, "task_loss": 0.495644211769104 }, { "compression_loss": 0.0, "distillation_loss": 0.36957889795303345, "epoch": 12.94, "learning_rate": 2.5530663402202158e-05, "loss": 0.2792, "step": 35810, "task_loss": 0.6453205347061157 }, { "compression_loss": 0.0, "distillation_loss": 0.327834814786911, "epoch": 12.95, "learning_rate": 2.5454175435767846e-05, "loss": 0.2772, "step": 35820, "task_loss": 0.5545930862426758 }, { "compression_loss": 0.0, "distillation_loss": 0.30460476875305176, "epoch": 12.95, "learning_rate": 2.5377736690814943e-05, "loss": 0.2708, "step": 35830, "task_loss": 0.7154297828674316 }, { "compression_loss": 0.0, "distillation_loss": 0.29110848903656006, "epoch": 12.95, "learning_rate": 2.5301347561487302e-05, "loss": 0.2833, "step": 35840, "task_loss": 0.4856996238231659 }, { "compression_loss": 0.0, "distillation_loss": 0.22446209192276, "epoch": 12.96, "learning_rate": 2.5225008441672982e-05, "loss": 0.2434, "step": 35850, "task_loss": 0.5817791223526001 }, { "compression_loss": 0.0, "distillation_loss": 0.22967638075351715, "epoch": 12.96, "learning_rate": 2.514871972500213e-05, "loss": 0.2544, "step": 35860, "task_loss": 0.41800975799560547 }, { "compression_loss": 0.0, "distillation_loss": 0.3124304413795471, "epoch": 12.96, "learning_rate": 2.507248180484505e-05, "loss": 0.289, "step": 35870, "task_loss": 0.35523521900177 }, { "compression_loss": 0.0, "distillation_loss": 0.378414511680603, "epoch": 12.97, "learning_rate": 2.4996295074310068e-05, "loss": 0.2855, "step": 35880, "task_loss": 0.6406650543212891 }, { "compression_loss": 0.0, "distillation_loss": 0.21744538843631744, "epoch": 12.97, "learning_rate": 2.492015992624161e-05, "loss": 0.2866, "step": 35890, "task_loss": 0.428682804107666 }, { "compression_loss": 0.0, "distillation_loss": 0.23970133066177368, "epoch": 12.97, "learning_rate": 2.484407675321807e-05, "loss": 0.292, "step": 35900, "task_loss": 0.4487767517566681 }, { "compression_loss": 0.0, "distillation_loss": 0.3047142028808594, "epoch": 12.98, "learning_rate": 2.4768045947549914e-05, "loss": 0.3119, "step": 35910, "task_loss": 0.6567196846008301 }, { "compression_loss": 0.0, "distillation_loss": 0.3284294009208679, "epoch": 12.98, "learning_rate": 2.46920679012775e-05, "loss": 0.2771, "step": 35920, "task_loss": 0.5150258541107178 }, { "compression_loss": 0.0, "distillation_loss": 0.23500069975852966, "epoch": 12.99, "learning_rate": 2.4616143006169216e-05, "loss": 0.27, "step": 35930, "task_loss": 0.28987038135528564 }, { "compression_loss": 0.0, "distillation_loss": 0.21058207750320435, "epoch": 12.99, "learning_rate": 2.4540271653719334e-05, "loss": 0.2711, "step": 35940, "task_loss": 0.4640544652938843 }, { "compression_loss": 0.0, "distillation_loss": 0.23709851503372192, "epoch": 12.99, "learning_rate": 2.4464454235146077e-05, "loss": 0.2738, "step": 35950, "task_loss": 0.6334000825881958 }, { "compression_loss": 0.0, "distillation_loss": 0.22366857528686523, "epoch": 13.0, "learning_rate": 2.4388691141389522e-05, "loss": 0.2613, "step": 35960, "task_loss": 0.36642172932624817 }, { "compression_loss": 0.0, "distillation_loss": 0.2464757263660431, "epoch": 13.0, "learning_rate": 2.431298276310969e-05, "loss": 0.2681, "step": 35970, "task_loss": 0.5318572521209717 }, { "compression_loss": 0.0, "distillation_loss": 0.21534934639930725, "epoch": 13.0, "learning_rate": 2.423732949068441e-05, "loss": 0.2772, "step": 35980, "task_loss": 0.5838292241096497 }, { "compression_loss": 0.0, "distillation_loss": 0.20216085016727448, "epoch": 13.01, "learning_rate": 2.4161731714207415e-05, "loss": 0.2649, "step": 35990, "task_loss": 0.29948505759239197 }, { "compression_loss": 0.0, "distillation_loss": 0.24598738551139832, "epoch": 13.01, "learning_rate": 2.4086189823486237e-05, "loss": 0.2785, "step": 36000, "task_loss": 0.5142921209335327 }, { "epoch": 13.01, "eval_exact_match": 83.43424787133397, "eval_f1": 90.10950263383882, "step": 36000 }, { "compression_loss": 0.0, "distillation_loss": 0.25713568925857544, "epoch": 13.01, "learning_rate": 2.401070420804031e-05, "loss": 0.2739, "step": 36010, "task_loss": 0.5379408597946167 }, { "compression_loss": 0.0, "distillation_loss": 0.25871193408966064, "epoch": 13.02, "learning_rate": 2.393527525709879e-05, "loss": 0.2571, "step": 36020, "task_loss": 0.6447278261184692 }, { "compression_loss": 0.0, "distillation_loss": 0.19045813381671906, "epoch": 13.02, "learning_rate": 2.3859903359598768e-05, "loss": 0.2508, "step": 36030, "task_loss": 0.358597069978714 }, { "compression_loss": 0.0, "distillation_loss": 0.2523159384727478, "epoch": 13.02, "learning_rate": 2.378458890418306e-05, "loss": 0.2541, "step": 36040, "task_loss": 0.5766875743865967 }, { "compression_loss": 0.0, "distillation_loss": 0.16886655986309052, "epoch": 13.03, "learning_rate": 2.370933227919834e-05, "loss": 0.2527, "step": 36050, "task_loss": 0.5645083785057068 }, { "compression_loss": 0.0, "distillation_loss": 0.24453911185264587, "epoch": 13.03, "learning_rate": 2.3634133872693063e-05, "loss": 0.2473, "step": 36060, "task_loss": 0.7919536828994751 }, { "compression_loss": 0.0, "distillation_loss": 0.19830533862113953, "epoch": 13.04, "learning_rate": 2.3558994072415506e-05, "loss": 0.2636, "step": 36070, "task_loss": 0.5828542709350586 }, { "compression_loss": 0.0, "distillation_loss": 0.3160579204559326, "epoch": 13.04, "learning_rate": 2.3483913265811735e-05, "loss": 0.3015, "step": 36080, "task_loss": 0.5136288404464722 }, { "compression_loss": 0.0, "distillation_loss": 0.2591038942337036, "epoch": 13.04, "learning_rate": 2.3408891840023662e-05, "loss": 0.2589, "step": 36090, "task_loss": 0.4600667953491211 }, { "compression_loss": 0.0, "distillation_loss": 0.20899376273155212, "epoch": 13.05, "learning_rate": 2.3333930181886945e-05, "loss": 0.2379, "step": 36100, "task_loss": 0.19576582312583923 }, { "compression_loss": 0.0, "distillation_loss": 0.20569036900997162, "epoch": 13.05, "learning_rate": 2.3259028677929147e-05, "loss": 0.2558, "step": 36110, "task_loss": 0.40065017342567444 }, { "compression_loss": 0.0, "distillation_loss": 0.2914265990257263, "epoch": 13.05, "learning_rate": 2.3184187714367566e-05, "loss": 0.2405, "step": 36120, "task_loss": 0.456284761428833 }, { "compression_loss": 0.0, "distillation_loss": 0.23679134249687195, "epoch": 13.06, "learning_rate": 2.3109407677107403e-05, "loss": 0.2539, "step": 36130, "task_loss": 0.49874114990234375 }, { "compression_loss": 0.0, "distillation_loss": 0.22600065171718597, "epoch": 13.06, "learning_rate": 2.303468895173967e-05, "loss": 0.2804, "step": 36140, "task_loss": 0.49588102102279663 }, { "compression_loss": 0.0, "distillation_loss": 0.36351796984672546, "epoch": 13.06, "learning_rate": 2.2960031923539245e-05, "loss": 0.2586, "step": 36150, "task_loss": 0.7948037385940552 }, { "compression_loss": 0.0, "distillation_loss": 0.284883052110672, "epoch": 13.07, "learning_rate": 2.2885436977462856e-05, "loss": 0.2464, "step": 36160, "task_loss": 0.5250895619392395 }, { "compression_loss": 0.0, "distillation_loss": 0.18825049698352814, "epoch": 13.07, "learning_rate": 2.281090449814715e-05, "loss": 0.2421, "step": 36170, "task_loss": 0.3618870973587036 }, { "compression_loss": 0.0, "distillation_loss": 0.2766856849193573, "epoch": 13.08, "learning_rate": 2.2736434869906633e-05, "loss": 0.2754, "step": 36180, "task_loss": 0.2450508177280426 }, { "compression_loss": 0.0, "distillation_loss": 0.20527908205986023, "epoch": 13.08, "learning_rate": 2.266202847673176e-05, "loss": 0.2416, "step": 36190, "task_loss": 0.37746256589889526 }, { "compression_loss": 0.0, "distillation_loss": 0.20512312650680542, "epoch": 13.08, "learning_rate": 2.2587685702286905e-05, "loss": 0.2368, "step": 36200, "task_loss": 0.5858408808708191 }, { "compression_loss": 0.0, "distillation_loss": 0.26339060068130493, "epoch": 13.09, "learning_rate": 2.2513406929908436e-05, "loss": 0.2462, "step": 36210, "task_loss": 0.5640926361083984 }, { "compression_loss": 0.0, "distillation_loss": 0.26149576902389526, "epoch": 13.09, "learning_rate": 2.2439192542602643e-05, "loss": 0.253, "step": 36220, "task_loss": 1.1567996740341187 }, { "compression_loss": 0.0, "distillation_loss": 0.22706861793994904, "epoch": 13.09, "learning_rate": 2.2365042923043904e-05, "loss": 0.2413, "step": 36230, "task_loss": 0.4056210219860077 }, { "compression_loss": 0.0, "distillation_loss": 0.22760742902755737, "epoch": 13.1, "learning_rate": 2.2290958453572563e-05, "loss": 0.2546, "step": 36240, "task_loss": 0.624477744102478 }, { "compression_loss": 0.0, "distillation_loss": 0.23840272426605225, "epoch": 13.1, "learning_rate": 2.2216939516193066e-05, "loss": 0.2624, "step": 36250, "task_loss": 0.6678675413131714 }, { "epoch": 13.1, "eval_exact_match": 83.44370860927152, "eval_f1": 90.08635960567796, "step": 36250 }, { "compression_loss": 0.0, "distillation_loss": 0.22495925426483154, "epoch": 13.1, "learning_rate": 2.2142986492571942e-05, "loss": 0.2348, "step": 36260, "task_loss": 0.35559767484664917 }, { "compression_loss": 0.0, "distillation_loss": 0.2641688287258148, "epoch": 13.11, "learning_rate": 2.2069099764035857e-05, "loss": 0.2598, "step": 36270, "task_loss": 0.4362841546535492 }, { "compression_loss": 0.0, "distillation_loss": 0.1987636238336563, "epoch": 13.11, "learning_rate": 2.1995279711569616e-05, "loss": 0.2754, "step": 36280, "task_loss": 0.5346006155014038 }, { "compression_loss": 0.0, "distillation_loss": 0.21387869119644165, "epoch": 13.12, "learning_rate": 2.192152671581425e-05, "loss": 0.262, "step": 36290, "task_loss": 0.45050013065338135 }, { "compression_loss": 0.0, "distillation_loss": 0.24132338166236877, "epoch": 13.12, "learning_rate": 2.1847841157064977e-05, "loss": 0.2405, "step": 36300, "task_loss": 0.3253955841064453 }, { "compression_loss": 0.0, "distillation_loss": 0.26122480630874634, "epoch": 13.12, "learning_rate": 2.1774223415269346e-05, "loss": 0.2751, "step": 36310, "task_loss": 0.5988302826881409 }, { "compression_loss": 0.0, "distillation_loss": 0.22946585714817047, "epoch": 13.13, "learning_rate": 2.1700673870025175e-05, "loss": 0.2816, "step": 36320, "task_loss": 0.4920123815536499 }, { "compression_loss": 0.0, "distillation_loss": 0.24815085530281067, "epoch": 13.13, "learning_rate": 2.1627192900578653e-05, "loss": 0.285, "step": 36330, "task_loss": 0.44152703881263733 }, { "compression_loss": 0.0, "distillation_loss": 0.2004362791776657, "epoch": 13.13, "learning_rate": 2.1553780885822373e-05, "loss": 0.2194, "step": 36340, "task_loss": 0.41171136498451233 }, { "compression_loss": 0.0, "distillation_loss": 0.19157975912094116, "epoch": 13.14, "learning_rate": 2.1480438204293358e-05, "loss": 0.2551, "step": 36350, "task_loss": 0.6984323263168335 }, { "compression_loss": 0.0, "distillation_loss": 0.17804864048957825, "epoch": 13.14, "learning_rate": 2.1407165234171147e-05, "loss": 0.2508, "step": 36360, "task_loss": 0.849889874458313 }, { "compression_loss": 0.0, "distillation_loss": 0.3071947693824768, "epoch": 13.14, "learning_rate": 2.1333962353275808e-05, "loss": 0.2493, "step": 36370, "task_loss": 0.6484819650650024 }, { "compression_loss": 0.0, "distillation_loss": 0.2442978024482727, "epoch": 13.15, "learning_rate": 2.1260829939066002e-05, "loss": 0.2629, "step": 36380, "task_loss": 0.4738425016403198 }, { "compression_loss": 0.0, "distillation_loss": 0.41016677021980286, "epoch": 13.15, "learning_rate": 2.118776836863708e-05, "loss": 0.2816, "step": 36390, "task_loss": 0.7150113582611084 }, { "compression_loss": 0.0, "distillation_loss": 0.2693639397621155, "epoch": 13.16, "learning_rate": 2.1114778018719025e-05, "loss": 0.2863, "step": 36400, "task_loss": 0.9608858823776245 }, { "compression_loss": 0.0, "distillation_loss": 0.22125914692878723, "epoch": 13.16, "learning_rate": 2.104185926567466e-05, "loss": 0.2354, "step": 36410, "task_loss": 0.38470879197120667 }, { "compression_loss": 0.0, "distillation_loss": 0.19842985272407532, "epoch": 13.16, "learning_rate": 2.096901248549757e-05, "loss": 0.2564, "step": 36420, "task_loss": 0.4488826394081116 }, { "compression_loss": 0.0, "distillation_loss": 0.25292330980300903, "epoch": 13.17, "learning_rate": 2.0896238053810277e-05, "loss": 0.268, "step": 36430, "task_loss": 0.5951564311981201 }, { "compression_loss": 0.0, "distillation_loss": 0.2672657370567322, "epoch": 13.17, "learning_rate": 2.082353634586219e-05, "loss": 0.2637, "step": 36440, "task_loss": 0.6640857458114624 }, { "compression_loss": 0.0, "distillation_loss": 0.2842154800891876, "epoch": 13.17, "learning_rate": 2.0750907736527796e-05, "loss": 0.2534, "step": 36450, "task_loss": 0.653637170791626 }, { "compression_loss": 0.0, "distillation_loss": 0.2182607650756836, "epoch": 13.18, "learning_rate": 2.06783526003046e-05, "loss": 0.2511, "step": 36460, "task_loss": 0.5978858470916748 }, { "compression_loss": 0.0, "distillation_loss": 0.22036978602409363, "epoch": 13.18, "learning_rate": 2.06058713113113e-05, "loss": 0.2466, "step": 36470, "task_loss": 0.27851811051368713 }, { "compression_loss": 0.0, "distillation_loss": 0.20994271337985992, "epoch": 13.18, "learning_rate": 2.0533464243285776e-05, "loss": 0.2533, "step": 36480, "task_loss": 0.39874762296676636 }, { "compression_loss": 0.0, "distillation_loss": 0.23912584781646729, "epoch": 13.19, "learning_rate": 2.046113176958325e-05, "loss": 0.252, "step": 36490, "task_loss": 0.5322191715240479 }, { "compression_loss": 0.0, "distillation_loss": 0.23245467245578766, "epoch": 13.19, "learning_rate": 2.038887426317424e-05, "loss": 0.2353, "step": 36500, "task_loss": 0.4917253851890564 }, { "epoch": 13.19, "eval_exact_match": 83.519394512772, "eval_f1": 90.19268259004296, "step": 36500 }, { "compression_loss": 0.0, "distillation_loss": 0.2391589879989624, "epoch": 13.19, "learning_rate": 2.0316692096642786e-05, "loss": 0.2526, "step": 36510, "task_loss": 0.7205773591995239 }, { "compression_loss": 0.0, "distillation_loss": 0.18963998556137085, "epoch": 13.2, "learning_rate": 2.0244585642184383e-05, "loss": 0.2644, "step": 36520, "task_loss": 0.45464566349983215 }, { "compression_loss": 0.0, "distillation_loss": 0.3253783583641052, "epoch": 13.2, "learning_rate": 2.017255527160416e-05, "loss": 0.2724, "step": 36530, "task_loss": 0.5648168921470642 }, { "compression_loss": 0.0, "distillation_loss": 0.23560191690921783, "epoch": 13.21, "learning_rate": 2.0100601356314944e-05, "loss": 0.247, "step": 36540, "task_loss": 0.2432674914598465 }, { "compression_loss": 0.0, "distillation_loss": 0.22225362062454224, "epoch": 13.21, "learning_rate": 2.0028724267335317e-05, "loss": 0.2637, "step": 36550, "task_loss": 0.35249823331832886 }, { "compression_loss": 0.0, "distillation_loss": 0.26490503549575806, "epoch": 13.21, "learning_rate": 1.9956924375287686e-05, "loss": 0.2337, "step": 36560, "task_loss": 0.7395399808883667 }, { "compression_loss": 0.0, "distillation_loss": 0.29515647888183594, "epoch": 13.22, "learning_rate": 1.9885202050396498e-05, "loss": 0.2518, "step": 36570, "task_loss": 0.35637781023979187 }, { "compression_loss": 0.0, "distillation_loss": 0.17328497767448425, "epoch": 13.22, "learning_rate": 1.9813557662486124e-05, "loss": 0.223, "step": 36580, "task_loss": 0.549172580242157 }, { "compression_loss": 0.0, "distillation_loss": 0.21613357961177826, "epoch": 13.22, "learning_rate": 1.9741991580979143e-05, "loss": 0.2387, "step": 36590, "task_loss": 0.46144798398017883 }, { "compression_loss": 0.0, "distillation_loss": 0.18344077467918396, "epoch": 13.23, "learning_rate": 1.967050417489433e-05, "loss": 0.2439, "step": 36600, "task_loss": 0.4379880428314209 }, { "compression_loss": 0.0, "distillation_loss": 0.2288159728050232, "epoch": 13.23, "learning_rate": 1.9599095812844813e-05, "loss": 0.2528, "step": 36610, "task_loss": 0.6495665311813354 }, { "compression_loss": 0.0, "distillation_loss": 0.16957059502601624, "epoch": 13.23, "learning_rate": 1.9527766863036083e-05, "loss": 0.2552, "step": 36620, "task_loss": 0.3471338748931885 }, { "compression_loss": 0.0, "distillation_loss": 0.2566898465156555, "epoch": 13.24, "learning_rate": 1.9456517693264205e-05, "loss": 0.2451, "step": 36630, "task_loss": 0.4642353057861328 }, { "compression_loss": 0.0, "distillation_loss": 0.24621951580047607, "epoch": 13.24, "learning_rate": 1.9385348670913862e-05, "loss": 0.2651, "step": 36640, "task_loss": 0.24718859791755676 }, { "compression_loss": 0.0, "distillation_loss": 0.22665154933929443, "epoch": 13.25, "learning_rate": 1.9314260162956464e-05, "loss": 0.2416, "step": 36650, "task_loss": 0.3895531892776489 }, { "compression_loss": 0.0, "distillation_loss": 0.25208577513694763, "epoch": 13.25, "learning_rate": 1.924325253594826e-05, "loss": 0.263, "step": 36660, "task_loss": 0.4521622359752655 }, { "compression_loss": 0.0, "distillation_loss": 0.28293052315711975, "epoch": 13.25, "learning_rate": 1.9172326156028467e-05, "loss": 0.2326, "step": 36670, "task_loss": 0.6860798597335815 }, { "compression_loss": 0.0, "distillation_loss": 0.2210230976343155, "epoch": 13.26, "learning_rate": 1.910148138891732e-05, "loss": 0.2396, "step": 36680, "task_loss": 0.4906100928783417 }, { "compression_loss": 0.0, "distillation_loss": 0.20675742626190186, "epoch": 13.26, "learning_rate": 1.9030718599914283e-05, "loss": 0.2274, "step": 36690, "task_loss": 0.3917931318283081 }, { "compression_loss": 0.0, "distillation_loss": 0.2826964855194092, "epoch": 13.26, "learning_rate": 1.896003815389608e-05, "loss": 0.2658, "step": 36700, "task_loss": 0.39695435762405396 }, { "compression_loss": 0.0, "distillation_loss": 0.24488209187984467, "epoch": 13.27, "learning_rate": 1.888944041531488e-05, "loss": 0.2542, "step": 36710, "task_loss": 0.6131412982940674 }, { "compression_loss": 0.0, "distillation_loss": 0.17799416184425354, "epoch": 13.27, "learning_rate": 1.8818925748196324e-05, "loss": 0.2525, "step": 36720, "task_loss": 0.38853368163108826 }, { "compression_loss": 0.0, "distillation_loss": 0.31300222873687744, "epoch": 13.27, "learning_rate": 1.874849451613779e-05, "loss": 0.2527, "step": 36730, "task_loss": 0.44057413935661316 }, { "compression_loss": 0.0, "distillation_loss": 0.22045227885246277, "epoch": 13.28, "learning_rate": 1.8678147082306366e-05, "loss": 0.259, "step": 36740, "task_loss": 0.5224936008453369 }, { "compression_loss": 0.0, "distillation_loss": 0.25457197427749634, "epoch": 13.28, "learning_rate": 1.8607883809437084e-05, "loss": 0.2566, "step": 36750, "task_loss": 0.47354400157928467 }, { "epoch": 13.28, "eval_exact_match": 83.19772942289498, "eval_f1": 89.94332125061486, "step": 36750 }, { "compression_loss": 0.0, "distillation_loss": 0.214283749461174, "epoch": 13.29, "learning_rate": 1.853770505983101e-05, "loss": 0.2223, "step": 36760, "task_loss": 0.3899107873439789 }, { "compression_loss": 0.0, "distillation_loss": 0.22692391276359558, "epoch": 13.29, "learning_rate": 1.846761119535338e-05, "loss": 0.2566, "step": 36770, "task_loss": 0.5070915222167969 }, { "compression_loss": 0.0, "distillation_loss": 0.2641848027706146, "epoch": 13.29, "learning_rate": 1.839760257743171e-05, "loss": 0.2649, "step": 36780, "task_loss": 0.8205592632293701 }, { "compression_loss": 0.0, "distillation_loss": 0.25266072154045105, "epoch": 13.3, "learning_rate": 1.832767956705399e-05, "loss": 0.2525, "step": 36790, "task_loss": 0.5840563178062439 }, { "compression_loss": 0.0, "distillation_loss": 0.27707502245903015, "epoch": 13.3, "learning_rate": 1.825784252476677e-05, "loss": 0.2384, "step": 36800, "task_loss": 0.6803827285766602 }, { "compression_loss": 0.0, "distillation_loss": 0.20863491296768188, "epoch": 13.3, "learning_rate": 1.818809181067334e-05, "loss": 0.281, "step": 36810, "task_loss": 0.24492724239826202 }, { "compression_loss": 0.0, "distillation_loss": 0.2772062122821808, "epoch": 13.31, "learning_rate": 1.8118427784431795e-05, "loss": 0.2565, "step": 36820, "task_loss": 0.2199469804763794 }, { "compression_loss": 0.0, "distillation_loss": 0.19231802225112915, "epoch": 13.31, "learning_rate": 1.8048850805253334e-05, "loss": 0.2663, "step": 36830, "task_loss": 0.6367592811584473 }, { "compression_loss": 0.0, "distillation_loss": 0.24778011441230774, "epoch": 13.31, "learning_rate": 1.7979361231900222e-05, "loss": 0.2265, "step": 36840, "task_loss": 0.48328888416290283 }, { "compression_loss": 0.0, "distillation_loss": 0.22561031579971313, "epoch": 13.32, "learning_rate": 1.7909959422684075e-05, "loss": 0.2561, "step": 36850, "task_loss": 0.49358463287353516 }, { "compression_loss": 0.0, "distillation_loss": 0.3352190852165222, "epoch": 13.32, "learning_rate": 1.7840645735463956e-05, "loss": 0.2427, "step": 36860, "task_loss": 0.6199913024902344 }, { "compression_loss": 0.0, "distillation_loss": 0.3952946662902832, "epoch": 13.32, "learning_rate": 1.7771420527644567e-05, "loss": 0.2558, "step": 36870, "task_loss": 0.5466555953025818 }, { "compression_loss": 0.0, "distillation_loss": 0.22066161036491394, "epoch": 13.33, "learning_rate": 1.770228415617432e-05, "loss": 0.2415, "step": 36880, "task_loss": 0.6449267864227295 }, { "compression_loss": 0.0, "distillation_loss": 0.22198528051376343, "epoch": 13.33, "learning_rate": 1.763323697754362e-05, "loss": 0.2611, "step": 36890, "task_loss": 0.6034165620803833 }, { "compression_loss": 0.0, "distillation_loss": 0.24511726200580597, "epoch": 13.34, "learning_rate": 1.7564279347782926e-05, "loss": 0.2325, "step": 36900, "task_loss": 0.346041738986969 }, { "compression_loss": 0.0, "distillation_loss": 0.20405146479606628, "epoch": 13.34, "learning_rate": 1.7495411622460984e-05, "loss": 0.2366, "step": 36910, "task_loss": 0.2541355788707733 }, { "compression_loss": 0.0, "distillation_loss": 0.3636261820793152, "epoch": 13.34, "learning_rate": 1.7426634156682904e-05, "loss": 0.2816, "step": 36920, "task_loss": 0.7391115427017212 }, { "compression_loss": 0.0, "distillation_loss": 0.1870489865541458, "epoch": 13.35, "learning_rate": 1.735794730508848e-05, "loss": 0.2518, "step": 36930, "task_loss": 0.22427958250045776 }, { "compression_loss": 0.0, "distillation_loss": 0.28781723976135254, "epoch": 13.35, "learning_rate": 1.728935142185018e-05, "loss": 0.24, "step": 36940, "task_loss": 0.36266857385635376 }, { "compression_loss": 0.0, "distillation_loss": 0.24645911157131195, "epoch": 13.35, "learning_rate": 1.7220846860671454e-05, "loss": 0.2527, "step": 36950, "task_loss": 0.7112646102905273 }, { "compression_loss": 0.0, "distillation_loss": 0.23091328144073486, "epoch": 13.36, "learning_rate": 1.7152433974784854e-05, "loss": 0.2435, "step": 36960, "task_loss": 0.5005269646644592 }, { "compression_loss": 0.0, "distillation_loss": 0.2623830735683441, "epoch": 13.36, "learning_rate": 1.708411311695024e-05, "loss": 0.2452, "step": 36970, "task_loss": 0.438732385635376 }, { "compression_loss": 0.0, "distillation_loss": 0.2850179970264435, "epoch": 13.36, "learning_rate": 1.70158846394529e-05, "loss": 0.2597, "step": 36980, "task_loss": 0.5956829786300659 }, { "compression_loss": 0.0, "distillation_loss": 0.2658255100250244, "epoch": 13.37, "learning_rate": 1.6947748894101824e-05, "loss": 0.2444, "step": 36990, "task_loss": 0.4191124737262726 }, { "compression_loss": 0.0, "distillation_loss": 0.21543040871620178, "epoch": 13.37, "learning_rate": 1.6879706232227825e-05, "loss": 0.2556, "step": 37000, "task_loss": 0.8701019883155823 }, { "epoch": 13.37, "eval_exact_match": 83.40586565752129, "eval_f1": 90.1025124401472, "step": 37000 }, { "compression_loss": 0.0, "distillation_loss": 0.20009112358093262, "epoch": 13.38, "learning_rate": 1.6811757004681757e-05, "loss": 0.2638, "step": 37010, "task_loss": 0.5392252802848816 }, { "compression_loss": 0.0, "distillation_loss": 0.20085132122039795, "epoch": 13.38, "learning_rate": 1.674390156183265e-05, "loss": 0.2408, "step": 37020, "task_loss": 0.40361395478248596 }, { "compression_loss": 0.0, "distillation_loss": 0.22304938733577728, "epoch": 13.38, "learning_rate": 1.6676140253566046e-05, "loss": 0.2363, "step": 37030, "task_loss": 0.27312329411506653 }, { "compression_loss": 0.0, "distillation_loss": 0.18347741663455963, "epoch": 13.39, "learning_rate": 1.6608473429281984e-05, "loss": 0.2462, "step": 37040, "task_loss": 0.39412713050842285 }, { "compression_loss": 0.0, "distillation_loss": 0.1991209089756012, "epoch": 13.39, "learning_rate": 1.6540901437893383e-05, "loss": 0.2617, "step": 37050, "task_loss": 0.39125919342041016 }, { "compression_loss": 0.0, "distillation_loss": 0.306510865688324, "epoch": 13.39, "learning_rate": 1.6473424627824154e-05, "loss": 0.2626, "step": 37060, "task_loss": 0.8488194942474365 }, { "compression_loss": 0.0, "distillation_loss": 0.26255011558532715, "epoch": 13.4, "learning_rate": 1.6406043347007433e-05, "loss": 0.2727, "step": 37070, "task_loss": 0.9364755749702454 }, { "compression_loss": 0.0, "distillation_loss": 0.21056485176086426, "epoch": 13.4, "learning_rate": 1.633875794288373e-05, "loss": 0.2605, "step": 37080, "task_loss": 0.48968979716300964 }, { "compression_loss": 0.0, "distillation_loss": 0.2805178165435791, "epoch": 13.4, "learning_rate": 1.6271568762399232e-05, "loss": 0.2535, "step": 37090, "task_loss": 0.5518473386764526 }, { "compression_loss": 0.0, "distillation_loss": 0.21690964698791504, "epoch": 13.41, "learning_rate": 1.620447615200395e-05, "loss": 0.2596, "step": 37100, "task_loss": 0.42174243927001953 }, { "compression_loss": 0.0, "distillation_loss": 0.2182358354330063, "epoch": 13.41, "learning_rate": 1.613748045764995e-05, "loss": 0.2438, "step": 37110, "task_loss": 0.3501666188240051 }, { "compression_loss": 0.0, "distillation_loss": 0.25687599182128906, "epoch": 13.42, "learning_rate": 1.607058202478953e-05, "loss": 0.2505, "step": 37120, "task_loss": 0.27550244331359863 }, { "compression_loss": 0.0, "distillation_loss": 0.23267127573490143, "epoch": 13.42, "learning_rate": 1.600378119837355e-05, "loss": 0.2358, "step": 37130, "task_loss": 0.7091718912124634 }, { "compression_loss": 0.0, "distillation_loss": 0.14930732548236847, "epoch": 13.42, "learning_rate": 1.59370783228495e-05, "loss": 0.2473, "step": 37140, "task_loss": 0.22773753106594086 }, { "compression_loss": 0.0, "distillation_loss": 0.1943025439977646, "epoch": 13.43, "learning_rate": 1.5870473742159846e-05, "loss": 0.236, "step": 37150, "task_loss": 0.4039733409881592 }, { "compression_loss": 0.0, "distillation_loss": 0.2754412591457367, "epoch": 13.43, "learning_rate": 1.5803967799740204e-05, "loss": 0.2567, "step": 37160, "task_loss": 1.006929874420166 }, { "compression_loss": 0.0, "distillation_loss": 0.20370972156524658, "epoch": 13.43, "learning_rate": 1.573756083851759e-05, "loss": 0.2439, "step": 37170, "task_loss": 0.3426205515861511 }, { "compression_loss": 0.0, "distillation_loss": 0.17631080746650696, "epoch": 13.44, "learning_rate": 1.5671253200908604e-05, "loss": 0.2271, "step": 37180, "task_loss": 0.44608959555625916 }, { "compression_loss": 0.0, "distillation_loss": 0.23708629608154297, "epoch": 13.44, "learning_rate": 1.560504522881773e-05, "loss": 0.2556, "step": 37190, "task_loss": 0.6119314432144165 }, { "compression_loss": 0.0, "distillation_loss": 0.347969651222229, "epoch": 13.44, "learning_rate": 1.5538937263635538e-05, "loss": 0.2768, "step": 37200, "task_loss": 0.7669745683670044 }, { "compression_loss": 0.0, "distillation_loss": 0.2737374007701874, "epoch": 13.45, "learning_rate": 1.5472929646236938e-05, "loss": 0.2327, "step": 37210, "task_loss": 0.8010777831077576 }, { "compression_loss": 0.0, "distillation_loss": 0.2608548700809479, "epoch": 13.45, "learning_rate": 1.5407022716979363e-05, "loss": 0.2563, "step": 37220, "task_loss": 0.33176928758621216 }, { "compression_loss": 0.0, "distillation_loss": 0.21440593898296356, "epoch": 13.46, "learning_rate": 1.534121681570116e-05, "loss": 0.255, "step": 37230, "task_loss": 0.3645903468132019 }, { "compression_loss": 0.0, "distillation_loss": 0.23056039214134216, "epoch": 13.46, "learning_rate": 1.5275512281719637e-05, "loss": 0.2443, "step": 37240, "task_loss": 0.5096204280853271 }, { "compression_loss": 0.0, "distillation_loss": 0.2819928228855133, "epoch": 13.46, "learning_rate": 1.5209909453829482e-05, "loss": 0.2712, "step": 37250, "task_loss": 0.2542950510978699 }, { "epoch": 13.46, "eval_exact_match": 83.4247871333964, "eval_f1": 90.14533748801033, "step": 37250 }, { "compression_loss": 0.0, "distillation_loss": 0.21100404858589172, "epoch": 13.47, "learning_rate": 1.5144408670300936e-05, "loss": 0.2543, "step": 37260, "task_loss": 0.6692283749580383 }, { "compression_loss": 0.0, "distillation_loss": 0.23726926743984222, "epoch": 13.47, "learning_rate": 1.5079010268878078e-05, "loss": 0.2438, "step": 37270, "task_loss": 0.6562150120735168 }, { "compression_loss": 0.0, "distillation_loss": 0.24360351264476776, "epoch": 13.47, "learning_rate": 1.5013714586777031e-05, "loss": 0.2696, "step": 37280, "task_loss": 0.3701857328414917 }, { "compression_loss": 0.0, "distillation_loss": 0.20092684030532837, "epoch": 13.48, "learning_rate": 1.4948521960684298e-05, "loss": 0.2565, "step": 37290, "task_loss": 0.5324305295944214 }, { "compression_loss": 0.0, "distillation_loss": 0.24796535074710846, "epoch": 13.48, "learning_rate": 1.4883432726754985e-05, "loss": 0.2497, "step": 37300, "task_loss": 0.6669043302536011 }, { "compression_loss": 0.0, "distillation_loss": 0.24593818187713623, "epoch": 13.48, "learning_rate": 1.4818447220611084e-05, "loss": 0.2446, "step": 37310, "task_loss": 0.5198768377304077 }, { "compression_loss": 0.0, "distillation_loss": 0.18586906790733337, "epoch": 13.49, "learning_rate": 1.475356577733968e-05, "loss": 0.2344, "step": 37320, "task_loss": 0.40673136711120605 }, { "compression_loss": 0.0, "distillation_loss": 0.24557605385780334, "epoch": 13.49, "learning_rate": 1.4688788731491351e-05, "loss": 0.2487, "step": 37330, "task_loss": 0.43017250299453735 }, { "compression_loss": 0.0, "distillation_loss": 0.22308820486068726, "epoch": 13.49, "learning_rate": 1.4624116417078305e-05, "loss": 0.2509, "step": 37340, "task_loss": 0.2053220272064209 }, { "compression_loss": 0.0, "distillation_loss": 0.1951208859682083, "epoch": 13.5, "learning_rate": 1.4559549167572745e-05, "loss": 0.2265, "step": 37350, "task_loss": 0.3179614543914795 }, { "compression_loss": 0.0, "distillation_loss": 0.2037278413772583, "epoch": 13.5, "learning_rate": 1.4495087315905124e-05, "loss": 0.2395, "step": 37360, "task_loss": 0.2637331187725067 }, { "compression_loss": 0.0, "distillation_loss": 0.2836878299713135, "epoch": 13.51, "learning_rate": 1.4430731194462441e-05, "loss": 0.2714, "step": 37370, "task_loss": 0.6579056978225708 }, { "compression_loss": 0.0, "distillation_loss": 0.32002729177474976, "epoch": 13.51, "learning_rate": 1.4366481135086468e-05, "loss": 0.2596, "step": 37380, "task_loss": 0.3903340697288513 }, { "compression_loss": 0.0, "distillation_loss": 0.21812787652015686, "epoch": 13.51, "learning_rate": 1.4302337469072133e-05, "loss": 0.2482, "step": 37390, "task_loss": 0.3896595239639282 }, { "compression_loss": 0.0, "distillation_loss": 0.24210324883460999, "epoch": 13.52, "learning_rate": 1.423830052716575e-05, "loss": 0.2454, "step": 37400, "task_loss": 0.29697751998901367 }, { "compression_loss": 0.0, "distillation_loss": 0.1834373027086258, "epoch": 13.52, "learning_rate": 1.4174370639563332e-05, "loss": 0.2527, "step": 37410, "task_loss": 0.2785026729106903 }, { "compression_loss": 0.0, "distillation_loss": 0.23967260122299194, "epoch": 13.52, "learning_rate": 1.4110548135908838e-05, "loss": 0.2604, "step": 37420, "task_loss": 0.4717600643634796 }, { "compression_loss": 0.0, "distillation_loss": 0.18912403285503387, "epoch": 13.53, "learning_rate": 1.4046833345292616e-05, "loss": 0.2177, "step": 37430, "task_loss": 0.38669267296791077 }, { "compression_loss": 0.0, "distillation_loss": 0.2931283116340637, "epoch": 13.53, "learning_rate": 1.398322659624951e-05, "loss": 0.2575, "step": 37440, "task_loss": 0.4614885449409485 }, { "compression_loss": 0.0, "distillation_loss": 0.24810287356376648, "epoch": 13.53, "learning_rate": 1.3919728216757317e-05, "loss": 0.2462, "step": 37450, "task_loss": 0.48419344425201416 }, { "compression_loss": 0.0, "distillation_loss": 0.2596108913421631, "epoch": 13.54, "learning_rate": 1.3856338534235039e-05, "loss": 0.2684, "step": 37460, "task_loss": 0.7017297148704529 }, { "compression_loss": 0.0, "distillation_loss": 0.2619935870170593, "epoch": 13.54, "learning_rate": 1.37930578755412e-05, "loss": 0.2436, "step": 37470, "task_loss": 0.4745601415634155 }, { "compression_loss": 0.0, "distillation_loss": 0.17720942199230194, "epoch": 13.55, "learning_rate": 1.3729886566972142e-05, "loss": 0.245, "step": 37480, "task_loss": 0.16972871124744415 }, { "compression_loss": 0.0, "distillation_loss": 0.2728440463542938, "epoch": 13.55, "learning_rate": 1.3666824934260375e-05, "loss": 0.2331, "step": 37490, "task_loss": 0.8913913369178772 }, { "compression_loss": 0.0, "distillation_loss": 0.2932123839855194, "epoch": 13.55, "learning_rate": 1.3603873302572893e-05, "loss": 0.2365, "step": 37500, "task_loss": 0.40376877784729004 }, { "epoch": 13.55, "eval_exact_match": 83.50993377483444, "eval_f1": 90.26178759077712, "step": 37500 }, { "compression_loss": 0.0, "distillation_loss": 0.20828589797019958, "epoch": 13.56, "learning_rate": 1.3541031996509478e-05, "loss": 0.256, "step": 37510, "task_loss": 0.26341885328292847 }, { "compression_loss": 0.0, "distillation_loss": 0.16295726597309113, "epoch": 13.56, "learning_rate": 1.347830134010101e-05, "loss": 0.2389, "step": 37520, "task_loss": 0.3301553428173065 }, { "compression_loss": 0.0, "distillation_loss": 0.25007620453834534, "epoch": 13.56, "learning_rate": 1.3415681656807893e-05, "loss": 0.2506, "step": 37530, "task_loss": 0.49313557147979736 }, { "compression_loss": 0.0, "distillation_loss": 0.24634864926338196, "epoch": 13.57, "learning_rate": 1.335317326951823e-05, "loss": 0.2445, "step": 37540, "task_loss": 0.4683687090873718 }, { "compression_loss": 0.0, "distillation_loss": 0.1813734769821167, "epoch": 13.57, "learning_rate": 1.3290776500546303e-05, "loss": 0.2337, "step": 37550, "task_loss": 0.4950636625289917 }, { "compression_loss": 0.0, "distillation_loss": 0.2741466164588928, "epoch": 13.57, "learning_rate": 1.3228491671630832e-05, "loss": 0.2403, "step": 37560, "task_loss": 0.7804323434829712 }, { "compression_loss": 0.0, "distillation_loss": 0.2687833905220032, "epoch": 13.58, "learning_rate": 1.3166319103933353e-05, "loss": 0.2493, "step": 37570, "task_loss": 0.4127853810787201 }, { "compression_loss": 0.0, "distillation_loss": 0.2195703685283661, "epoch": 13.58, "learning_rate": 1.3104259118036505e-05, "loss": 0.2727, "step": 37580, "task_loss": 0.16775909066200256 }, { "compression_loss": 0.0, "distillation_loss": 0.2721908688545227, "epoch": 13.59, "learning_rate": 1.3042312033942458e-05, "loss": 0.2317, "step": 37590, "task_loss": 0.5137249231338501 }, { "compression_loss": 0.0, "distillation_loss": 0.1676669418811798, "epoch": 13.59, "learning_rate": 1.2980478171071208e-05, "loss": 0.2515, "step": 37600, "task_loss": 0.4399087727069855 }, { "compression_loss": 0.0, "distillation_loss": 0.22237111628055573, "epoch": 13.59, "learning_rate": 1.2918757848258958e-05, "loss": 0.2403, "step": 37610, "task_loss": 0.5158141851425171 }, { "compression_loss": 0.0, "distillation_loss": 0.2540138065814972, "epoch": 13.6, "learning_rate": 1.285715138375641e-05, "loss": 0.2341, "step": 37620, "task_loss": 0.6373366117477417 }, { "compression_loss": 0.0, "distillation_loss": 0.26354658603668213, "epoch": 13.6, "learning_rate": 1.2795659095227255e-05, "loss": 0.2724, "step": 37630, "task_loss": 0.64971923828125 }, { "compression_loss": 0.0, "distillation_loss": 0.19850783050060272, "epoch": 13.6, "learning_rate": 1.2734281299746384e-05, "loss": 0.2667, "step": 37640, "task_loss": 0.29709264636039734 }, { "compression_loss": 0.0, "distillation_loss": 0.24133870005607605, "epoch": 13.61, "learning_rate": 1.2673018313798351e-05, "loss": 0.2627, "step": 37650, "task_loss": 0.3527446687221527 }, { "compression_loss": 0.0, "distillation_loss": 0.2671836018562317, "epoch": 13.61, "learning_rate": 1.261187045327572e-05, "loss": 0.2677, "step": 37660, "task_loss": 0.8679643869400024 }, { "compression_loss": 0.0, "distillation_loss": 0.2491600662469864, "epoch": 13.61, "learning_rate": 1.2550838033477418e-05, "loss": 0.2418, "step": 37670, "task_loss": 0.40323901176452637 }, { "compression_loss": 0.0, "distillation_loss": 0.21075092256069183, "epoch": 13.62, "learning_rate": 1.2489921369107137e-05, "loss": 0.2454, "step": 37680, "task_loss": 0.5236786603927612 }, { "compression_loss": 0.0, "distillation_loss": 0.30563992261886597, "epoch": 13.62, "learning_rate": 1.242912077427165e-05, "loss": 0.2314, "step": 37690, "task_loss": 0.5004376173019409 }, { "compression_loss": 0.0, "distillation_loss": 0.2149924635887146, "epoch": 13.62, "learning_rate": 1.2368436562479312e-05, "loss": 0.2544, "step": 37700, "task_loss": 0.5571039319038391 }, { "compression_loss": 0.0, "distillation_loss": 0.22124919295310974, "epoch": 13.63, "learning_rate": 1.2307869046638296e-05, "loss": 0.2382, "step": 37710, "task_loss": 0.41436147689819336 }, { "compression_loss": 0.0, "distillation_loss": 0.21285533905029297, "epoch": 13.63, "learning_rate": 1.2247418539055088e-05, "loss": 0.2447, "step": 37720, "task_loss": 0.5282078385353088 }, { "compression_loss": 0.0, "distillation_loss": 0.22626446187496185, "epoch": 13.64, "learning_rate": 1.2187085351432828e-05, "loss": 0.2537, "step": 37730, "task_loss": 0.5426703691482544 }, { "compression_loss": 0.0, "distillation_loss": 0.2785342335700989, "epoch": 13.64, "learning_rate": 1.2126869794869742e-05, "loss": 0.2479, "step": 37740, "task_loss": 0.4130212068557739 }, { "compression_loss": 0.0, "distillation_loss": 0.2820085287094116, "epoch": 13.64, "learning_rate": 1.2066772179857454e-05, "loss": 0.2727, "step": 37750, "task_loss": 0.482336163520813 }, { "epoch": 13.64, "eval_exact_match": 83.50047303689688, "eval_f1": 90.1728300271499, "step": 37750 }, { "compression_loss": 0.0, "distillation_loss": 0.2469613254070282, "epoch": 13.65, "learning_rate": 1.2006792816279496e-05, "loss": 0.2344, "step": 37760, "task_loss": 0.39518874883651733 }, { "compression_loss": 0.0, "distillation_loss": 0.17427587509155273, "epoch": 13.65, "learning_rate": 1.1946932013409631e-05, "loss": 0.2599, "step": 37770, "task_loss": 0.5201607346534729 }, { "compression_loss": 0.0, "distillation_loss": 0.29734861850738525, "epoch": 13.65, "learning_rate": 1.1887190079910299e-05, "loss": 0.2529, "step": 37780, "task_loss": 0.47351279854774475 }, { "compression_loss": 0.0, "distillation_loss": 0.1882125586271286, "epoch": 13.66, "learning_rate": 1.1827567323830968e-05, "loss": 0.2428, "step": 37790, "task_loss": 0.2635359764099121 }, { "compression_loss": 0.0, "distillation_loss": 0.2885553240776062, "epoch": 13.66, "learning_rate": 1.1768064052606658e-05, "loss": 0.2525, "step": 37800, "task_loss": 0.5406814813613892 }, { "compression_loss": 0.0, "distillation_loss": 0.19153067469596863, "epoch": 13.66, "learning_rate": 1.1708680573056214e-05, "loss": 0.2589, "step": 37810, "task_loss": 0.31528323888778687 }, { "compression_loss": 0.0, "distillation_loss": 0.24100527167320251, "epoch": 13.67, "learning_rate": 1.1649417191380835e-05, "loss": 0.2487, "step": 37820, "task_loss": 0.6156647205352783 }, { "compression_loss": 0.0, "distillation_loss": 0.17599420249462128, "epoch": 13.67, "learning_rate": 1.1590274213162445e-05, "loss": 0.2461, "step": 37830, "task_loss": 0.36050450801849365 }, { "compression_loss": 0.0, "distillation_loss": 0.2299099862575531, "epoch": 13.68, "learning_rate": 1.153125194336214e-05, "loss": 0.2828, "step": 37840, "task_loss": 0.4206226170063019 }, { "compression_loss": 0.0, "distillation_loss": 0.20498183369636536, "epoch": 13.68, "learning_rate": 1.147235068631857e-05, "loss": 0.2303, "step": 37850, "task_loss": 0.47948789596557617 }, { "compression_loss": 0.0, "distillation_loss": 0.2277766764163971, "epoch": 13.68, "learning_rate": 1.1413570745746428e-05, "loss": 0.25, "step": 37860, "task_loss": 0.5457226037979126 }, { "compression_loss": 0.0, "distillation_loss": 0.26872360706329346, "epoch": 13.69, "learning_rate": 1.135491242473486e-05, "loss": 0.2548, "step": 37870, "task_loss": 0.5940477848052979 }, { "compression_loss": 0.0, "distillation_loss": 0.16519543528556824, "epoch": 13.69, "learning_rate": 1.1296376025745887e-05, "loss": 0.2321, "step": 37880, "task_loss": 0.3459252417087555 }, { "compression_loss": 0.0, "distillation_loss": 0.26993200182914734, "epoch": 13.69, "learning_rate": 1.123796185061287e-05, "loss": 0.2517, "step": 37890, "task_loss": 0.8263578414916992 }, { "compression_loss": 0.0, "distillation_loss": 0.23767806589603424, "epoch": 13.7, "learning_rate": 1.1179670200538954e-05, "loss": 0.2378, "step": 37900, "task_loss": 0.47348955273628235 }, { "compression_loss": 0.0, "distillation_loss": 0.2323693037033081, "epoch": 13.7, "learning_rate": 1.1121501376095457e-05, "loss": 0.2371, "step": 37910, "task_loss": 0.2555083632469177 }, { "compression_loss": 0.0, "distillation_loss": 0.21224288642406464, "epoch": 13.7, "learning_rate": 1.1063455677220416e-05, "loss": 0.2315, "step": 37920, "task_loss": 0.27007701992988586 }, { "compression_loss": 0.0, "distillation_loss": 0.21018406748771667, "epoch": 13.71, "learning_rate": 1.1005533403216972e-05, "loss": 0.235, "step": 37930, "task_loss": 0.5078712105751038 }, { "compression_loss": 0.0, "distillation_loss": 0.25440073013305664, "epoch": 13.71, "learning_rate": 1.0947734852751854e-05, "loss": 0.2425, "step": 37940, "task_loss": 0.3627524673938751 }, { "compression_loss": 0.0, "distillation_loss": 0.2233213484287262, "epoch": 13.72, "learning_rate": 1.08900603238538e-05, "loss": 0.2278, "step": 37950, "task_loss": 0.5547107458114624 }, { "compression_loss": 0.0, "distillation_loss": 0.2389487624168396, "epoch": 13.72, "learning_rate": 1.083251011391211e-05, "loss": 0.2451, "step": 37960, "task_loss": 0.47202402353286743 }, { "compression_loss": 0.0, "distillation_loss": 0.2538294196128845, "epoch": 13.72, "learning_rate": 1.0775084519674985e-05, "loss": 0.2364, "step": 37970, "task_loss": 0.4768025279045105 }, { "compression_loss": 0.0, "distillation_loss": 0.2578142285346985, "epoch": 13.73, "learning_rate": 1.071778383724812e-05, "loss": 0.2361, "step": 37980, "task_loss": 0.3769608438014984 }, { "compression_loss": 0.0, "distillation_loss": 0.20831502974033356, "epoch": 13.73, "learning_rate": 1.0660608362093095e-05, "loss": 0.2343, "step": 37990, "task_loss": 0.616707444190979 }, { "compression_loss": 0.0, "distillation_loss": 0.1966831386089325, "epoch": 13.73, "learning_rate": 1.0603558389025896e-05, "loss": 0.2402, "step": 38000, "task_loss": 0.3687472343444824 }, { "epoch": 13.73, "eval_exact_match": 83.59508041627247, "eval_f1": 90.24016460930066, "step": 38000 }, { "compression_loss": 0.0, "distillation_loss": 0.2341974973678589, "epoch": 13.74, "learning_rate": 1.0546634212215348e-05, "loss": 0.2361, "step": 38010, "task_loss": 0.31787946820259094 }, { "compression_loss": 0.0, "distillation_loss": 0.18781892955303192, "epoch": 13.74, "learning_rate": 1.0489836125181665e-05, "loss": 0.2314, "step": 38020, "task_loss": 0.30610185861587524 }, { "compression_loss": 0.0, "distillation_loss": 0.32764458656311035, "epoch": 13.74, "learning_rate": 1.0433164420794874e-05, "loss": 0.2738, "step": 38030, "task_loss": 0.8996621370315552 }, { "compression_loss": 0.0, "distillation_loss": 0.19176249206066132, "epoch": 13.75, "learning_rate": 1.0376619391273353e-05, "loss": 0.2427, "step": 38040, "task_loss": 0.2879910469055176 }, { "compression_loss": 0.0, "distillation_loss": 0.22764107584953308, "epoch": 13.75, "learning_rate": 1.0320201328182257e-05, "loss": 0.2905, "step": 38050, "task_loss": 0.39458274841308594 }, { "compression_loss": 0.0, "distillation_loss": 0.23433086276054382, "epoch": 13.75, "learning_rate": 1.0263910522432138e-05, "loss": 0.2425, "step": 38060, "task_loss": 0.8387042284011841 }, { "compression_loss": 0.0, "distillation_loss": 0.2206321656703949, "epoch": 13.76, "learning_rate": 1.020774726427728e-05, "loss": 0.25, "step": 38070, "task_loss": 0.3945035934448242 }, { "compression_loss": 0.0, "distillation_loss": 0.19005993008613586, "epoch": 13.76, "learning_rate": 1.0151711843314342e-05, "loss": 0.2536, "step": 38080, "task_loss": 0.741138219833374 }, { "compression_loss": 0.0, "distillation_loss": 0.30447882413864136, "epoch": 13.77, "learning_rate": 1.0095804548480798e-05, "loss": 0.2431, "step": 38090, "task_loss": 0.5937259197235107 }, { "compression_loss": 0.0, "distillation_loss": 0.24016176164150238, "epoch": 13.77, "learning_rate": 1.004002566805347e-05, "loss": 0.2601, "step": 38100, "task_loss": 0.4847050905227661 }, { "compression_loss": 0.0, "distillation_loss": 0.21476280689239502, "epoch": 13.77, "learning_rate": 9.984375489647e-06, "loss": 0.236, "step": 38110, "task_loss": 0.262297123670578 }, { "compression_loss": 0.0, "distillation_loss": 0.23751616477966309, "epoch": 13.78, "learning_rate": 9.92885430021243e-06, "loss": 0.2438, "step": 38120, "task_loss": 0.323070228099823 }, { "compression_loss": 0.0, "distillation_loss": 0.1588648557662964, "epoch": 13.78, "learning_rate": 9.87346238603569e-06, "loss": 0.2267, "step": 38130, "task_loss": 0.26954102516174316 }, { "compression_loss": 0.0, "distillation_loss": 0.227637380361557, "epoch": 13.78, "learning_rate": 9.81820003273612e-06, "loss": 0.241, "step": 38140, "task_loss": 0.6161648631095886 }, { "compression_loss": 0.0, "distillation_loss": 0.17858195304870605, "epoch": 13.79, "learning_rate": 9.763067525264964e-06, "loss": 0.2348, "step": 38150, "task_loss": 0.34298527240753174 }, { "compression_loss": 0.0, "distillation_loss": 0.1912699043750763, "epoch": 13.79, "learning_rate": 9.708065147904013e-06, "loss": 0.2339, "step": 38160, "task_loss": 0.35443249344825745 }, { "compression_loss": 0.0, "distillation_loss": 0.2191619575023651, "epoch": 13.79, "learning_rate": 9.653193184263991e-06, "loss": 0.2252, "step": 38170, "task_loss": 0.5474572777748108 }, { "compression_loss": 0.0, "distillation_loss": 0.23868389427661896, "epoch": 13.8, "learning_rate": 9.598451917283206e-06, "loss": 0.2393, "step": 38180, "task_loss": 0.30752283334732056 }, { "compression_loss": 0.0, "distillation_loss": 0.2384638637304306, "epoch": 13.8, "learning_rate": 9.543841629226033e-06, "loss": 0.2517, "step": 38190, "task_loss": 0.5427389144897461 }, { "compression_loss": 0.0, "distillation_loss": 0.2237512767314911, "epoch": 13.81, "learning_rate": 9.489362601681498e-06, "loss": 0.2463, "step": 38200, "task_loss": 0.23124629259109497 }, { "compression_loss": 0.0, "distillation_loss": 0.23062996566295624, "epoch": 13.81, "learning_rate": 9.435015115561758e-06, "loss": 0.2568, "step": 38210, "task_loss": 0.16488313674926758 }, { "compression_loss": 0.0, "distillation_loss": 0.21428482234477997, "epoch": 13.81, "learning_rate": 9.380799451100741e-06, "loss": 0.2449, "step": 38220, "task_loss": 0.6556562185287476 }, { "compression_loss": 0.0, "distillation_loss": 0.22192302346229553, "epoch": 13.82, "learning_rate": 9.326715887852645e-06, "loss": 0.2381, "step": 38230, "task_loss": 0.637799859046936 }, { "compression_loss": 0.0, "distillation_loss": 0.24508652091026306, "epoch": 13.82, "learning_rate": 9.272764704690518e-06, "loss": 0.2477, "step": 38240, "task_loss": 0.5043495893478394 }, { "compression_loss": 0.0, "distillation_loss": 0.20607826113700867, "epoch": 13.82, "learning_rate": 9.218946179804766e-06, "loss": 0.2682, "step": 38250, "task_loss": 0.5137103796005249 }, { "epoch": 13.82, "eval_exact_match": 83.41532639545885, "eval_f1": 90.16543784803585, "step": 38250 }, { "compression_loss": 0.0, "distillation_loss": 0.2131115049123764, "epoch": 13.83, "learning_rate": 9.16526059070185e-06, "loss": 0.2364, "step": 38260, "task_loss": 0.7902463674545288 }, { "compression_loss": 0.0, "distillation_loss": 0.21433007717132568, "epoch": 13.83, "learning_rate": 9.11170821420269e-06, "loss": 0.2245, "step": 38270, "task_loss": 0.7640265226364136 }, { "compression_loss": 0.0, "distillation_loss": 0.2725834250450134, "epoch": 13.83, "learning_rate": 9.058289326441349e-06, "loss": 0.2431, "step": 38280, "task_loss": 0.3009580969810486 }, { "compression_loss": 0.0, "distillation_loss": 0.22229403257369995, "epoch": 13.84, "learning_rate": 9.005004202863581e-06, "loss": 0.2366, "step": 38290, "task_loss": 0.2828536927700043 }, { "compression_loss": 0.0, "distillation_loss": 0.20776572823524475, "epoch": 13.84, "learning_rate": 8.951853118225413e-06, "loss": 0.2613, "step": 38300, "task_loss": 0.49406182765960693 }, { "compression_loss": 0.0, "distillation_loss": 0.1888459324836731, "epoch": 13.85, "learning_rate": 8.898836346591686e-06, "loss": 0.2452, "step": 38310, "task_loss": 0.24637502431869507 }, { "compression_loss": 0.0, "distillation_loss": 0.1972496658563614, "epoch": 13.85, "learning_rate": 8.845954161334712e-06, "loss": 0.2501, "step": 38320, "task_loss": 0.23441748321056366 }, { "compression_loss": 0.0, "distillation_loss": 0.25202658772468567, "epoch": 13.85, "learning_rate": 8.793206835132823e-06, "loss": 0.2476, "step": 38330, "task_loss": 0.30959269404411316 }, { "compression_loss": 0.0, "distillation_loss": 0.18595066666603088, "epoch": 13.86, "learning_rate": 8.74059463996898e-06, "loss": 0.2561, "step": 38340, "task_loss": 0.45879602432250977 }, { "compression_loss": 0.0, "distillation_loss": 0.22673186659812927, "epoch": 13.86, "learning_rate": 8.688117847129323e-06, "loss": 0.264, "step": 38350, "task_loss": 0.5123938322067261 }, { "compression_loss": 0.0, "distillation_loss": 0.24467988312244415, "epoch": 13.86, "learning_rate": 8.635776727201879e-06, "loss": 0.2449, "step": 38360, "task_loss": 0.344679057598114 }, { "compression_loss": 0.0, "distillation_loss": 0.17541030049324036, "epoch": 13.87, "learning_rate": 8.583571550075038e-06, "loss": 0.2452, "step": 38370, "task_loss": 0.3095884919166565 }, { "compression_loss": 0.0, "distillation_loss": 0.18970157206058502, "epoch": 13.87, "learning_rate": 8.531502584936257e-06, "loss": 0.2627, "step": 38380, "task_loss": 0.2229578197002411 }, { "compression_loss": 0.0, "distillation_loss": 0.19921694695949554, "epoch": 13.87, "learning_rate": 8.479570100270628e-06, "loss": 0.2512, "step": 38390, "task_loss": 0.7031322717666626 }, { "compression_loss": 0.0, "distillation_loss": 0.19549816846847534, "epoch": 13.88, "learning_rate": 8.427774363859511e-06, "loss": 0.2552, "step": 38400, "task_loss": 0.4025685787200928 }, { "compression_loss": 0.0, "distillation_loss": 0.29793527722358704, "epoch": 13.88, "learning_rate": 8.37611564277913e-06, "loss": 0.2567, "step": 38410, "task_loss": 0.4364060163497925 }, { "compression_loss": 0.0, "distillation_loss": 0.27970990538597107, "epoch": 13.89, "learning_rate": 8.324594203399223e-06, "loss": 0.2352, "step": 38420, "task_loss": 0.619552731513977 }, { "compression_loss": 0.0, "distillation_loss": 0.20510753989219666, "epoch": 13.89, "learning_rate": 8.273210311381659e-06, "loss": 0.2461, "step": 38430, "task_loss": 0.30276721715927124 }, { "compression_loss": 0.0, "distillation_loss": 0.22325491905212402, "epoch": 13.89, "learning_rate": 8.221964231679075e-06, "loss": 0.2481, "step": 38440, "task_loss": 0.5327010750770569 }, { "compression_loss": 0.0, "distillation_loss": 0.1987735629081726, "epoch": 13.9, "learning_rate": 8.170856228533461e-06, "loss": 0.2206, "step": 38450, "task_loss": 0.49047574400901794 }, { "compression_loss": 0.0, "distillation_loss": 0.21051108837127686, "epoch": 13.9, "learning_rate": 8.119886565474906e-06, "loss": 0.233, "step": 38460, "task_loss": 0.3335927724838257 }, { "compression_loss": 0.0, "distillation_loss": 0.19253046810626984, "epoch": 13.9, "learning_rate": 8.069055505320102e-06, "loss": 0.2326, "step": 38470, "task_loss": 0.495675653219223 }, { "compression_loss": 0.0, "distillation_loss": 0.23543712496757507, "epoch": 13.91, "learning_rate": 8.018363310171097e-06, "loss": 0.2497, "step": 38480, "task_loss": 0.45676204562187195 }, { "compression_loss": 0.0, "distillation_loss": 0.2220216989517212, "epoch": 13.91, "learning_rate": 7.9678102414139e-06, "loss": 0.2253, "step": 38490, "task_loss": 0.6177787780761719 }, { "compression_loss": 0.0, "distillation_loss": 0.2542474865913391, "epoch": 13.91, "learning_rate": 7.917396559717134e-06, "loss": 0.2341, "step": 38500, "task_loss": 0.5604841113090515 }, { "epoch": 13.91, "eval_exact_match": 83.73699148533585, "eval_f1": 90.33564700005304, "step": 38500 }, { "compression_loss": 0.0, "distillation_loss": 0.2272586077451706, "epoch": 13.92, "learning_rate": 7.867122525030685e-06, "loss": 0.2341, "step": 38510, "task_loss": 0.45142441987991333 }, { "compression_loss": 0.0, "distillation_loss": 0.19398754835128784, "epoch": 13.92, "learning_rate": 7.816988396584382e-06, "loss": 0.2454, "step": 38520, "task_loss": 0.5788732767105103 }, { "compression_loss": 0.0, "distillation_loss": 0.21237385272979736, "epoch": 13.92, "learning_rate": 7.766994432886658e-06, "loss": 0.2452, "step": 38530, "task_loss": 0.3880312442779541 }, { "compression_loss": 0.0, "distillation_loss": 0.21336814761161804, "epoch": 13.93, "learning_rate": 7.7171408917232e-06, "loss": 0.2534, "step": 38540, "task_loss": 0.5350017547607422 }, { "compression_loss": 0.0, "distillation_loss": 0.24495704472064972, "epoch": 13.93, "learning_rate": 7.667428030155615e-06, "loss": 0.2345, "step": 38550, "task_loss": 0.38775384426116943 }, { "compression_loss": 0.0, "distillation_loss": 0.2679603695869446, "epoch": 13.94, "learning_rate": 7.61785610452016e-06, "loss": 0.2355, "step": 38560, "task_loss": 0.4116300046443939 }, { "compression_loss": 0.0, "distillation_loss": 0.15576696395874023, "epoch": 13.94, "learning_rate": 7.568425370426332e-06, "loss": 0.2455, "step": 38570, "task_loss": 0.24840381741523743 }, { "compression_loss": 0.0, "distillation_loss": 0.25543665885925293, "epoch": 13.94, "learning_rate": 7.5191360827556245e-06, "loss": 0.2583, "step": 38580, "task_loss": 0.5544813871383667 }, { "compression_loss": 0.0, "distillation_loss": 0.2729243040084839, "epoch": 13.95, "learning_rate": 7.4699884956601825e-06, "loss": 0.2514, "step": 38590, "task_loss": 0.45332229137420654 }, { "compression_loss": 0.0, "distillation_loss": 0.27071091532707214, "epoch": 13.95, "learning_rate": 7.420982862561493e-06, "loss": 0.2566, "step": 38600, "task_loss": 0.4697257876396179 }, { "compression_loss": 0.0, "distillation_loss": 0.26173168420791626, "epoch": 13.95, "learning_rate": 7.372119436149068e-06, "loss": 0.224, "step": 38610, "task_loss": 0.1902489811182022 }, { "compression_loss": 0.0, "distillation_loss": 0.23809251189231873, "epoch": 13.96, "learning_rate": 7.323398468379176e-06, "loss": 0.2403, "step": 38620, "task_loss": 0.5104325413703918 }, { "compression_loss": 0.0, "distillation_loss": 0.3130069673061371, "epoch": 13.96, "learning_rate": 7.2748202104735e-06, "loss": 0.2752, "step": 38630, "task_loss": 1.0560616254806519 }, { "compression_loss": 0.0, "distillation_loss": 0.19314908981323242, "epoch": 13.96, "learning_rate": 7.226384912917893e-06, "loss": 0.2569, "step": 38640, "task_loss": 0.5038644075393677 }, { "compression_loss": 0.0, "distillation_loss": 0.2326485961675644, "epoch": 13.97, "learning_rate": 7.178092825461004e-06, "loss": 0.2354, "step": 38650, "task_loss": 0.5128936171531677 }, { "compression_loss": 0.0, "distillation_loss": 0.1915554702281952, "epoch": 13.97, "learning_rate": 7.129944197113112e-06, "loss": 0.248, "step": 38660, "task_loss": 0.3978360891342163 }, { "compression_loss": 0.0, "distillation_loss": 0.1979401707649231, "epoch": 13.98, "learning_rate": 7.081939276144708e-06, "loss": 0.2195, "step": 38670, "task_loss": 0.4705093801021576 }, { "compression_loss": 0.0, "distillation_loss": 0.23233872652053833, "epoch": 13.98, "learning_rate": 7.034078310085315e-06, "loss": 0.23, "step": 38680, "task_loss": 0.6098400354385376 }, { "compression_loss": 0.0, "distillation_loss": 0.2868359088897705, "epoch": 13.98, "learning_rate": 6.986361545722173e-06, "loss": 0.257, "step": 38690, "task_loss": 0.7146321535110474 }, { "compression_loss": 0.0, "distillation_loss": 0.2127329409122467, "epoch": 13.99, "learning_rate": 6.938789229098967e-06, "loss": 0.2325, "step": 38700, "task_loss": 0.2952830195426941 }, { "compression_loss": 0.0, "distillation_loss": 0.23154892027378082, "epoch": 13.99, "learning_rate": 6.8913616055145456e-06, "loss": 0.2365, "step": 38710, "task_loss": 0.494489848613739 }, { "compression_loss": 0.0, "distillation_loss": 0.22295670211315155, "epoch": 13.99, "learning_rate": 6.8440789195216915e-06, "loss": 0.2562, "step": 38720, "task_loss": 0.3631395101547241 }, { "compression_loss": 0.0, "distillation_loss": 0.2322646975517273, "epoch": 14.0, "learning_rate": 6.796941414925831e-06, "loss": 0.2093, "step": 38730, "task_loss": 0.459478497505188 }, { "compression_loss": 0.0, "distillation_loss": 0.2091185450553894, "epoch": 14.0, "learning_rate": 6.7499493347838e-06, "loss": 0.2628, "step": 38740, "task_loss": 0.42174839973449707 }, { "compression_loss": 0.0, "distillation_loss": 0.16904908418655396, "epoch": 14.0, "learning_rate": 6.703102921402538e-06, "loss": 0.2227, "step": 38750, "task_loss": 0.30236685276031494 }, { "epoch": 14.0, "eval_exact_match": 83.50993377483444, "eval_f1": 90.17927551421984, "step": 38750 }, { "compression_loss": 0.0, "distillation_loss": 0.16442182660102844, "epoch": 14.01, "learning_rate": 6.656402416337944e-06, "loss": 0.2263, "step": 38760, "task_loss": 0.2523331344127655 }, { "compression_loss": 0.0, "distillation_loss": 0.25732508301734924, "epoch": 14.01, "learning_rate": 6.609848060393494e-06, "loss": 0.2272, "step": 38770, "task_loss": 0.5731315612792969 }, { "compression_loss": 0.0, "distillation_loss": 0.21109287440776825, "epoch": 14.02, "learning_rate": 6.563440093619115e-06, "loss": 0.2242, "step": 38780, "task_loss": 0.5353798866271973 }, { "compression_loss": 0.0, "distillation_loss": 0.18337416648864746, "epoch": 14.02, "learning_rate": 6.5171787553098855e-06, "loss": 0.228, "step": 38790, "task_loss": 0.34574657678604126 }, { "compression_loss": 0.0, "distillation_loss": 0.23161965608596802, "epoch": 14.02, "learning_rate": 6.471064284004835e-06, "loss": 0.2217, "step": 38800, "task_loss": 0.3575626015663147 }, { "compression_loss": 0.0, "distillation_loss": 0.18752753734588623, "epoch": 14.03, "learning_rate": 6.4250969174856635e-06, "loss": 0.2242, "step": 38810, "task_loss": 0.2804476022720337 }, { "compression_loss": 0.0, "distillation_loss": 0.164092019200325, "epoch": 14.03, "learning_rate": 6.379276892775579e-06, "loss": 0.2367, "step": 38820, "task_loss": 0.18792039155960083 }, { "compression_loss": 0.0, "distillation_loss": 0.23862093687057495, "epoch": 14.03, "learning_rate": 6.3336044461380385e-06, "loss": 0.2302, "step": 38830, "task_loss": 0.6268182992935181 }, { "compression_loss": 0.0, "distillation_loss": 0.2213876098394394, "epoch": 14.04, "learning_rate": 6.288079813075543e-06, "loss": 0.2261, "step": 38840, "task_loss": 0.4968934655189514 }, { "compression_loss": 0.0, "distillation_loss": 0.23776161670684814, "epoch": 14.04, "learning_rate": 6.242703228328384e-06, "loss": 0.2257, "step": 38850, "task_loss": 0.5843216180801392 }, { "compression_loss": 0.0, "distillation_loss": 0.18511147797107697, "epoch": 14.04, "learning_rate": 6.197474925873525e-06, "loss": 0.2269, "step": 38860, "task_loss": 0.2638187110424042 }, { "compression_loss": 0.0, "distillation_loss": 0.1563921570777893, "epoch": 14.05, "learning_rate": 6.1523951389232715e-06, "loss": 0.22, "step": 38870, "task_loss": 0.43536078929901123 }, { "compression_loss": 0.0, "distillation_loss": 0.18232986330986023, "epoch": 14.05, "learning_rate": 6.107464099924179e-06, "loss": 0.2148, "step": 38880, "task_loss": 0.36110395193099976 }, { "compression_loss": 0.0, "distillation_loss": 0.2118646204471588, "epoch": 14.05, "learning_rate": 6.062682040555794e-06, "loss": 0.2408, "step": 38890, "task_loss": 0.8396679162979126 }, { "compression_loss": 0.0, "distillation_loss": 0.17698946595191956, "epoch": 14.06, "learning_rate": 6.018049191729475e-06, "loss": 0.2141, "step": 38900, "task_loss": 0.2846156060695648 }, { "compression_loss": 0.0, "distillation_loss": 0.22236953675746918, "epoch": 14.06, "learning_rate": 5.9735657835871894e-06, "loss": 0.2393, "step": 38910, "task_loss": 0.6163221001625061 }, { "compression_loss": 0.0, "distillation_loss": 0.21342119574546814, "epoch": 14.07, "learning_rate": 5.929232045500351e-06, "loss": 0.2244, "step": 38920, "task_loss": 0.48823344707489014 }, { "compression_loss": 0.0, "distillation_loss": 0.2061123251914978, "epoch": 14.07, "learning_rate": 5.885048206068627e-06, "loss": 0.2401, "step": 38930, "task_loss": 0.4645582437515259 }, { "compression_loss": 0.0, "distillation_loss": 0.22864103317260742, "epoch": 14.07, "learning_rate": 5.841014493118753e-06, "loss": 0.2497, "step": 38940, "task_loss": 0.8002902269363403 }, { "compression_loss": 0.0, "distillation_loss": 0.1768096685409546, "epoch": 14.08, "learning_rate": 5.797131133703335e-06, "loss": 0.2108, "step": 38950, "task_loss": 0.3029367923736572 }, { "compression_loss": 0.0, "distillation_loss": 0.1883765161037445, "epoch": 14.08, "learning_rate": 5.753398354099773e-06, "loss": 0.2286, "step": 38960, "task_loss": 0.6003336906433105 }, { "compression_loss": 0.0, "distillation_loss": 0.16090941429138184, "epoch": 14.08, "learning_rate": 5.7098163798089505e-06, "loss": 0.2301, "step": 38970, "task_loss": 1.0162906646728516 }, { "compression_loss": 0.0, "distillation_loss": 0.21309834718704224, "epoch": 14.09, "learning_rate": 5.666385435554192e-06, "loss": 0.2746, "step": 38980, "task_loss": 0.6796290874481201 }, { "compression_loss": 0.0, "distillation_loss": 0.20901435613632202, "epoch": 14.09, "learning_rate": 5.623105745280054e-06, "loss": 0.2181, "step": 38990, "task_loss": 0.4081113934516907 }, { "compression_loss": 0.0, "distillation_loss": 0.2516651749610901, "epoch": 14.09, "learning_rate": 5.5799775321511795e-06, "loss": 0.2362, "step": 39000, "task_loss": 0.3016287684440613 }, { "epoch": 14.09, "eval_exact_match": 83.56669820245979, "eval_f1": 90.22665241010446, "step": 39000 }, { "compression_loss": 0.0, "distillation_loss": 0.21509164571762085, "epoch": 14.1, "learning_rate": 5.53700101855112e-06, "loss": 0.2353, "step": 39010, "task_loss": 0.5538349747657776 }, { "compression_loss": 0.0, "distillation_loss": 0.24842707812786102, "epoch": 14.1, "learning_rate": 5.494176426081241e-06, "loss": 0.2412, "step": 39020, "task_loss": 0.48312896490097046 }, { "compression_loss": 0.0, "distillation_loss": 0.25581663846969604, "epoch": 14.11, "learning_rate": 5.45150397555954e-06, "loss": 0.2317, "step": 39030, "task_loss": 0.7214528918266296 }, { "compression_loss": 0.0, "distillation_loss": 0.22178195416927338, "epoch": 14.11, "learning_rate": 5.4089838870195325e-06, "loss": 0.2353, "step": 39040, "task_loss": 0.23669970035552979 }, { "compression_loss": 0.0, "distillation_loss": 0.2143976390361786, "epoch": 14.11, "learning_rate": 5.366616379709066e-06, "loss": 0.2262, "step": 39050, "task_loss": 0.7643693685531616 }, { "compression_loss": 0.0, "distillation_loss": 0.23287276923656464, "epoch": 14.12, "learning_rate": 5.324401672089289e-06, "loss": 0.2219, "step": 39060, "task_loss": 0.35092318058013916 }, { "compression_loss": 0.0, "distillation_loss": 0.19542303681373596, "epoch": 14.12, "learning_rate": 5.282339981833406e-06, "loss": 0.2282, "step": 39070, "task_loss": 0.43859049677848816 }, { "compression_loss": 0.0, "distillation_loss": 0.19342200458049774, "epoch": 14.12, "learning_rate": 5.240431525825641e-06, "loss": 0.238, "step": 39080, "task_loss": 0.4503197968006134 }, { "compression_loss": 0.0, "distillation_loss": 0.2664431035518646, "epoch": 14.13, "learning_rate": 5.198676520160087e-06, "loss": 0.2364, "step": 39090, "task_loss": 0.7081995010375977 }, { "compression_loss": 0.0, "distillation_loss": 0.2214469313621521, "epoch": 14.13, "learning_rate": 5.157075180139602e-06, "loss": 0.2593, "step": 39100, "task_loss": 0.6777288913726807 }, { "compression_loss": 0.0, "distillation_loss": 0.19831112027168274, "epoch": 14.13, "learning_rate": 5.1156277202746694e-06, "loss": 0.2219, "step": 39110, "task_loss": 0.3114470839500427 }, { "compression_loss": 0.0, "distillation_loss": 0.2304876744747162, "epoch": 14.14, "learning_rate": 5.074334354282335e-06, "loss": 0.2282, "step": 39120, "task_loss": 0.4329706132411957 }, { "compression_loss": 0.0, "distillation_loss": 0.2472088485956192, "epoch": 14.14, "learning_rate": 5.033195295085081e-06, "loss": 0.2383, "step": 39130, "task_loss": 0.4525589048862457 }, { "compression_loss": 0.0, "distillation_loss": 0.19407299160957336, "epoch": 14.15, "learning_rate": 4.992210754809733e-06, "loss": 0.2222, "step": 39140, "task_loss": 0.46681374311447144 }, { "compression_loss": 0.0, "distillation_loss": 0.21169891953468323, "epoch": 14.15, "learning_rate": 4.951380944786361e-06, "loss": 0.2294, "step": 39150, "task_loss": 0.44812092185020447 }, { "compression_loss": 0.0, "distillation_loss": 0.18998125195503235, "epoch": 14.15, "learning_rate": 4.910706075547198e-06, "loss": 0.203, "step": 39160, "task_loss": 0.23759853839874268 }, { "compression_loss": 0.0, "distillation_loss": 0.23228806257247925, "epoch": 14.16, "learning_rate": 4.8701863568255366e-06, "loss": 0.2103, "step": 39170, "task_loss": 0.4678202271461487 }, { "compression_loss": 0.0, "distillation_loss": 0.22845202684402466, "epoch": 14.16, "learning_rate": 4.829821997554683e-06, "loss": 0.2306, "step": 39180, "task_loss": 0.26507970690727234 }, { "compression_loss": 0.0, "distillation_loss": 0.22435474395751953, "epoch": 14.16, "learning_rate": 4.789613205866839e-06, "loss": 0.2266, "step": 39190, "task_loss": 0.41273343563079834 }, { "compression_loss": 0.0, "distillation_loss": 0.20098178088665009, "epoch": 14.17, "learning_rate": 4.749560189092066e-06, "loss": 0.2104, "step": 39200, "task_loss": 0.5112037658691406 }, { "compression_loss": 0.0, "distillation_loss": 0.15663176774978638, "epoch": 14.17, "learning_rate": 4.709663153757165e-06, "loss": 0.2332, "step": 39210, "task_loss": 0.496734082698822 }, { "compression_loss": 0.0, "distillation_loss": 0.21804989874362946, "epoch": 14.17, "learning_rate": 4.669922305584701e-06, "loss": 0.2141, "step": 39220, "task_loss": 0.4116891026496887 }, { "compression_loss": 0.0, "distillation_loss": 0.15763679146766663, "epoch": 14.18, "learning_rate": 4.630337849491818e-06, "loss": 0.2189, "step": 39230, "task_loss": 0.22102904319763184 }, { "compression_loss": 0.0, "distillation_loss": 0.2064918875694275, "epoch": 14.18, "learning_rate": 4.5909099895892995e-06, "loss": 0.227, "step": 39240, "task_loss": 0.31075674295425415 }, { "compression_loss": 0.0, "distillation_loss": 0.21560850739479065, "epoch": 14.19, "learning_rate": 4.551638929180444e-06, "loss": 0.221, "step": 39250, "task_loss": 0.34478896856307983 }, { "epoch": 14.19, "eval_exact_match": 83.55723746452223, "eval_f1": 90.20349881769643, "step": 39250 }, { "compression_loss": 0.0, "distillation_loss": 0.1906319260597229, "epoch": 14.19, "learning_rate": 4.512524870760054e-06, "loss": 0.2139, "step": 39260, "task_loss": 0.35769015550613403 }, { "compression_loss": 0.0, "distillation_loss": 0.25359436869621277, "epoch": 14.19, "learning_rate": 4.473568016013349e-06, "loss": 0.2418, "step": 39270, "task_loss": 0.5408420562744141 }, { "compression_loss": 0.0, "distillation_loss": 0.1744784563779831, "epoch": 14.2, "learning_rate": 4.4347685658149885e-06, "loss": 0.2174, "step": 39280, "task_loss": 0.6951953768730164 }, { "compression_loss": 0.0, "distillation_loss": 0.24684374034404755, "epoch": 14.2, "learning_rate": 4.396126720227975e-06, "loss": 0.2248, "step": 39290, "task_loss": 0.6458311080932617 }, { "compression_loss": 0.0, "distillation_loss": 0.15859639644622803, "epoch": 14.2, "learning_rate": 4.357642678502669e-06, "loss": 0.2302, "step": 39300, "task_loss": 0.617123007774353 }, { "compression_loss": 0.0, "distillation_loss": 0.19645214080810547, "epoch": 14.21, "learning_rate": 4.319316639075711e-06, "loss": 0.2244, "step": 39310, "task_loss": 0.3386276066303253 }, { "compression_loss": 0.0, "distillation_loss": 0.23448264598846436, "epoch": 14.21, "learning_rate": 4.281148799569073e-06, "loss": 0.2319, "step": 39320, "task_loss": 0.33614152669906616 }, { "compression_loss": 0.0, "distillation_loss": 0.1803082823753357, "epoch": 14.21, "learning_rate": 4.243139356788951e-06, "loss": 0.2391, "step": 39330, "task_loss": 0.28528082370758057 }, { "compression_loss": 0.0, "distillation_loss": 0.19339770078659058, "epoch": 14.22, "learning_rate": 4.205288506724823e-06, "loss": 0.2206, "step": 39340, "task_loss": 0.5189188718795776 }, { "compression_loss": 0.0, "distillation_loss": 0.20033058524131775, "epoch": 14.22, "learning_rate": 4.167596444548396e-06, "loss": 0.2131, "step": 39350, "task_loss": 0.5193696022033691 }, { "compression_loss": 0.0, "distillation_loss": 0.19192567467689514, "epoch": 14.22, "learning_rate": 4.130063364612621e-06, "loss": 0.2354, "step": 39360, "task_loss": 0.2575511336326599 }, { "compression_loss": 0.0, "distillation_loss": 0.16454698145389557, "epoch": 14.23, "learning_rate": 4.0926894604506725e-06, "loss": 0.2141, "step": 39370, "task_loss": 0.344363272190094 }, { "compression_loss": 0.0, "distillation_loss": 0.24127215147018433, "epoch": 14.23, "learning_rate": 4.055474924774975e-06, "loss": 0.2321, "step": 39380, "task_loss": 0.5481629371643066 }, { "compression_loss": 0.0, "distillation_loss": 0.2271345555782318, "epoch": 14.24, "learning_rate": 4.01841994947618e-06, "loss": 0.228, "step": 39390, "task_loss": 0.5968689918518066 }, { "compression_loss": 0.0, "distillation_loss": 0.23536965250968933, "epoch": 14.24, "learning_rate": 3.981524725622215e-06, "loss": 0.22, "step": 39400, "task_loss": 0.4662066102027893 }, { "compression_loss": 0.0, "distillation_loss": 0.22390343248844147, "epoch": 14.24, "learning_rate": 3.944789443457237e-06, "loss": 0.2461, "step": 39410, "task_loss": 0.687394917011261 }, { "compression_loss": 0.0, "distillation_loss": 0.23100188374519348, "epoch": 14.25, "learning_rate": 3.908214292400743e-06, "loss": 0.2261, "step": 39420, "task_loss": 1.0438381433486938 }, { "compression_loss": 0.0, "distillation_loss": 0.20339612662792206, "epoch": 14.25, "learning_rate": 3.871799461046491e-06, "loss": 0.2311, "step": 39430, "task_loss": 0.3669714629650116 }, { "compression_loss": 0.0, "distillation_loss": 0.1861114203929901, "epoch": 14.25, "learning_rate": 3.8355451371616e-06, "loss": 0.2234, "step": 39440, "task_loss": 0.5127718448638916 }, { "compression_loss": 0.0, "distillation_loss": 0.17946402728557587, "epoch": 14.26, "learning_rate": 3.799451507685557e-06, "loss": 0.2174, "step": 39450, "task_loss": 0.5029610991477966 }, { "compression_loss": 0.0, "distillation_loss": 0.2114979326725006, "epoch": 14.26, "learning_rate": 3.7635187587292616e-06, "loss": 0.2317, "step": 39460, "task_loss": 0.777336597442627 }, { "compression_loss": 0.0, "distillation_loss": 0.24398508667945862, "epoch": 14.26, "learning_rate": 3.727747075574033e-06, "loss": 0.2516, "step": 39470, "task_loss": 0.46529749035835266 }, { "compression_loss": 0.0, "distillation_loss": 0.23544840514659882, "epoch": 14.27, "learning_rate": 3.6921366426707064e-06, "loss": 0.2317, "step": 39480, "task_loss": 0.6463897228240967 }, { "compression_loss": 0.0, "distillation_loss": 0.23673115670681, "epoch": 14.27, "learning_rate": 3.656687643638646e-06, "loss": 0.2184, "step": 39490, "task_loss": 0.5956941843032837 }, { "compression_loss": 0.0, "distillation_loss": 0.21421056985855103, "epoch": 14.28, "learning_rate": 3.6214002612648187e-06, "loss": 0.2351, "step": 39500, "task_loss": 0.561495304107666 }, { "epoch": 14.28, "eval_exact_match": 83.55723746452223, "eval_f1": 90.22059674354702, "step": 39500 }, { "compression_loss": 0.0, "distillation_loss": 0.21173754334449768, "epoch": 14.28, "learning_rate": 3.5862746775028274e-06, "loss": 0.2371, "step": 39510, "task_loss": 0.5109820365905762 }, { "compression_loss": 0.0, "distillation_loss": 0.2444034218788147, "epoch": 14.28, "learning_rate": 3.5513110734719947e-06, "loss": 0.2274, "step": 39520, "task_loss": 0.3416980504989624 }, { "compression_loss": 0.0, "distillation_loss": 0.22895483672618866, "epoch": 14.29, "learning_rate": 3.5165096294564294e-06, "loss": 0.2376, "step": 39530, "task_loss": 0.4999857246875763 }, { "compression_loss": 0.0, "distillation_loss": 0.19510109722614288, "epoch": 14.29, "learning_rate": 3.481870524904063e-06, "loss": 0.2148, "step": 39540, "task_loss": 0.38726818561553955 }, { "compression_loss": 0.0, "distillation_loss": 0.18519558012485504, "epoch": 14.29, "learning_rate": 3.4473939384257713e-06, "loss": 0.2241, "step": 39550, "task_loss": 0.305391401052475 }, { "compression_loss": 0.0, "distillation_loss": 0.2279016077518463, "epoch": 14.3, "learning_rate": 3.4130800477944353e-06, "loss": 0.2413, "step": 39560, "task_loss": 0.4362182021141052 }, { "compression_loss": 0.0, "distillation_loss": 0.19982075691223145, "epoch": 14.3, "learning_rate": 3.378929029944014e-06, "loss": 0.2107, "step": 39570, "task_loss": 0.4617334008216858 }, { "compression_loss": 0.0, "distillation_loss": 0.17380177974700928, "epoch": 14.3, "learning_rate": 3.3449410609686222e-06, "loss": 0.225, "step": 39580, "task_loss": 0.5341837406158447 }, { "compression_loss": 0.0, "distillation_loss": 0.21998389065265656, "epoch": 14.31, "learning_rate": 3.311116316121681e-06, "loss": 0.2425, "step": 39590, "task_loss": 0.48477378487586975 }, { "compression_loss": 0.0, "distillation_loss": 0.2229992151260376, "epoch": 14.31, "learning_rate": 3.27745496981493e-06, "loss": 0.2219, "step": 39600, "task_loss": 0.3780628442764282 }, { "compression_loss": 0.0, "distillation_loss": 0.280860960483551, "epoch": 14.32, "learning_rate": 3.2439571956175955e-06, "loss": 0.236, "step": 39610, "task_loss": 0.7838178873062134 }, { "compression_loss": 0.0, "distillation_loss": 0.16504058241844177, "epoch": 14.32, "learning_rate": 3.2106231662554687e-06, "loss": 0.2116, "step": 39620, "task_loss": 0.38846850395202637 }, { "compression_loss": 0.0, "distillation_loss": 0.18678365647792816, "epoch": 14.32, "learning_rate": 3.17745305361002e-06, "loss": 0.2178, "step": 39630, "task_loss": 0.3168988525867462 }, { "compression_loss": 0.0, "distillation_loss": 0.24787138402462006, "epoch": 14.33, "learning_rate": 3.144447028717492e-06, "loss": 0.2207, "step": 39640, "task_loss": 0.3311496376991272 }, { "compression_loss": 0.0, "distillation_loss": 0.20401303470134735, "epoch": 14.33, "learning_rate": 3.1116052617680475e-06, "loss": 0.2311, "step": 39650, "task_loss": 0.2993205189704895 }, { "compression_loss": 0.0, "distillation_loss": 0.2255595624446869, "epoch": 14.33, "learning_rate": 3.078927922104888e-06, "loss": 0.2449, "step": 39660, "task_loss": 0.6406166553497314 }, { "compression_loss": 0.0, "distillation_loss": 0.26740413904190063, "epoch": 14.34, "learning_rate": 3.04641517822337e-06, "loss": 0.2378, "step": 39670, "task_loss": 0.8494973182678223 }, { "compression_loss": 0.0, "distillation_loss": 0.190795436501503, "epoch": 14.34, "learning_rate": 3.0140671977701126e-06, "loss": 0.2478, "step": 39680, "task_loss": 0.43718254566192627 }, { "compression_loss": 0.0, "distillation_loss": 0.24205990135669708, "epoch": 14.34, "learning_rate": 2.9818841475422074e-06, "loss": 0.2441, "step": 39690, "task_loss": 0.549262285232544 }, { "compression_loss": 0.0, "distillation_loss": 0.24730156362056732, "epoch": 14.35, "learning_rate": 2.9498661934862625e-06, "loss": 0.2497, "step": 39700, "task_loss": 0.48019862174987793 }, { "compression_loss": 0.0, "distillation_loss": 0.20119762420654297, "epoch": 14.35, "learning_rate": 2.9180135006976253e-06, "loss": 0.2274, "step": 39710, "task_loss": 0.28718143701553345 }, { "compression_loss": 0.0, "distillation_loss": 0.22375862300395966, "epoch": 14.35, "learning_rate": 2.8863262334194892e-06, "loss": 0.2073, "step": 39720, "task_loss": 0.5566632747650146 }, { "compression_loss": 0.0, "distillation_loss": 0.2002100944519043, "epoch": 14.36, "learning_rate": 2.854804555042066e-06, "loss": 0.2189, "step": 39730, "task_loss": 0.43000146746635437 }, { "compression_loss": 0.0, "distillation_loss": 0.19941216707229614, "epoch": 14.36, "learning_rate": 2.823448628101714e-06, "loss": 0.219, "step": 39740, "task_loss": 0.43014416098594666 }, { "compression_loss": 0.0, "distillation_loss": 0.20073619484901428, "epoch": 14.37, "learning_rate": 2.792258614280147e-06, "loss": 0.2455, "step": 39750, "task_loss": 0.4905133843421936 }, { "epoch": 14.37, "eval_exact_match": 83.61400189214758, "eval_f1": 90.27336271124364, "step": 39750 }, { "compression_loss": 0.0, "distillation_loss": 0.18684911727905273, "epoch": 14.37, "learning_rate": 2.761234674403565e-06, "loss": 0.225, "step": 39760, "task_loss": 0.3729088306427002 }, { "compression_loss": 0.0, "distillation_loss": 0.2193862795829773, "epoch": 14.37, "learning_rate": 2.730376968441837e-06, "loss": 0.2194, "step": 39770, "task_loss": 0.48270559310913086 }, { "compression_loss": 0.0, "distillation_loss": 0.19356009364128113, "epoch": 14.38, "learning_rate": 2.699685655507652e-06, "loss": 0.2208, "step": 39780, "task_loss": 0.43202751874923706 }, { "compression_loss": 0.0, "distillation_loss": 0.2428002506494522, "epoch": 14.38, "learning_rate": 2.6691608938557673e-06, "loss": 0.2174, "step": 39790, "task_loss": 0.57557213306427 }, { "compression_loss": 0.0, "distillation_loss": 0.18401771783828735, "epoch": 14.38, "learning_rate": 2.638802840882099e-06, "loss": 0.2166, "step": 39800, "task_loss": 0.38599061965942383 }, { "compression_loss": 0.0, "distillation_loss": 0.25447237491607666, "epoch": 14.39, "learning_rate": 2.608611653122982e-06, "loss": 0.2425, "step": 39810, "task_loss": 0.964231014251709 }, { "compression_loss": 0.0, "distillation_loss": 0.2148197889328003, "epoch": 14.39, "learning_rate": 2.5785874862543364e-06, "loss": 0.2342, "step": 39820, "task_loss": 0.5468176603317261 }, { "compression_loss": 0.0, "distillation_loss": 0.275215744972229, "epoch": 14.39, "learning_rate": 2.548730495090867e-06, "loss": 0.2414, "step": 39830, "task_loss": 0.6998399496078491 }, { "compression_loss": 0.0, "distillation_loss": 0.20677562057971954, "epoch": 14.4, "learning_rate": 2.5190408335852423e-06, "loss": 0.2275, "step": 39840, "task_loss": 0.8021308183670044 }, { "compression_loss": 0.0, "distillation_loss": 0.2932314872741699, "epoch": 14.4, "learning_rate": 2.4895186548273553e-06, "loss": 0.2277, "step": 39850, "task_loss": 0.35569363832473755 }, { "compression_loss": 0.0, "distillation_loss": 0.16546630859375, "epoch": 14.41, "learning_rate": 2.4601641110434842e-06, "loss": 0.2139, "step": 39860, "task_loss": 0.3520973324775696 }, { "compression_loss": 0.0, "distillation_loss": 0.20243391394615173, "epoch": 14.41, "learning_rate": 2.430977353595531e-06, "loss": 0.2541, "step": 39870, "task_loss": 0.41768544912338257 }, { "compression_loss": 0.0, "distillation_loss": 0.21307632327079773, "epoch": 14.41, "learning_rate": 2.401958532980213e-06, "loss": 0.2169, "step": 39880, "task_loss": 0.7245582342147827 }, { "compression_loss": 0.0, "distillation_loss": 0.22703363001346588, "epoch": 14.42, "learning_rate": 2.3731077988283558e-06, "loss": 0.2314, "step": 39890, "task_loss": 0.3949422240257263 }, { "compression_loss": 0.0, "distillation_loss": 0.28492501378059387, "epoch": 14.42, "learning_rate": 2.34442529990403e-06, "loss": 0.2391, "step": 39900, "task_loss": 0.8998227715492249 }, { "compression_loss": 0.0, "distillation_loss": 0.22178995609283447, "epoch": 14.42, "learning_rate": 2.315911184103842e-06, "loss": 0.2465, "step": 39910, "task_loss": 0.5418390035629272 }, { "compression_loss": 0.0, "distillation_loss": 0.18975073099136353, "epoch": 14.43, "learning_rate": 2.287565598456169e-06, "loss": 0.2399, "step": 39920, "task_loss": 0.41155844926834106 }, { "compression_loss": 0.0, "distillation_loss": 0.16683003306388855, "epoch": 14.43, "learning_rate": 2.259388689120386e-06, "loss": 0.225, "step": 39930, "task_loss": 0.4172460436820984 }, { "compression_loss": 0.0, "distillation_loss": 0.19753775000572205, "epoch": 14.43, "learning_rate": 2.2313806013861007e-06, "loss": 0.223, "step": 39940, "task_loss": 0.580410361289978 }, { "compression_loss": 0.0, "distillation_loss": 0.15868158638477325, "epoch": 14.44, "learning_rate": 2.2035414796724365e-06, "loss": 0.2266, "step": 39950, "task_loss": 0.234841987490654 }, { "compression_loss": 0.0, "distillation_loss": 0.1994379162788391, "epoch": 14.44, "learning_rate": 2.1758714675272663e-06, "loss": 0.2301, "step": 39960, "task_loss": 0.28453922271728516 }, { "compression_loss": 0.0, "distillation_loss": 0.22968558967113495, "epoch": 14.45, "learning_rate": 2.1483707076264827e-06, "loss": 0.2385, "step": 39970, "task_loss": 0.34896284341812134 }, { "compression_loss": 0.0, "distillation_loss": 0.2558647394180298, "epoch": 14.45, "learning_rate": 2.121039341773235e-06, "loss": 0.2324, "step": 39980, "task_loss": 0.5354092121124268 }, { "compression_loss": 0.0, "distillation_loss": 0.1978209763765335, "epoch": 14.45, "learning_rate": 2.0938775108972628e-06, "loss": 0.2397, "step": 39990, "task_loss": 0.3964638411998749 }, { "compression_loss": 0.0, "distillation_loss": 0.21577095985412598, "epoch": 14.46, "learning_rate": 2.0668853550540775e-06, "loss": 0.2277, "step": 40000, "task_loss": 0.24615420401096344 }, { "epoch": 14.46, "eval_exact_match": 83.37748344370861, "eval_f1": 90.24861808055506, "step": 40000 }, { "compression_loss": 0.0, "distillation_loss": 0.2140657901763916, "epoch": 14.46, "learning_rate": 2.0400630134243162e-06, "loss": 0.2277, "step": 40010, "task_loss": 0.3943939805030823 }, { "compression_loss": 0.0, "distillation_loss": 0.28893810510635376, "epoch": 14.46, "learning_rate": 2.013410624312997e-06, "loss": 0.2403, "step": 40020, "task_loss": 0.6239956617355347 }, { "compression_loss": 0.0, "distillation_loss": 0.21812288463115692, "epoch": 14.47, "learning_rate": 1.9869283251488022e-06, "loss": 0.2467, "step": 40030, "task_loss": 0.9183149337768555 }, { "compression_loss": 0.0, "distillation_loss": 0.2515392601490021, "epoch": 14.47, "learning_rate": 1.9606162524833615e-06, "loss": 0.241, "step": 40040, "task_loss": 0.2775781750679016 }, { "compression_loss": 0.0, "distillation_loss": 0.20658659934997559, "epoch": 14.47, "learning_rate": 1.9344745419905726e-06, "loss": 0.2436, "step": 40050, "task_loss": 0.43849068880081177 }, { "compression_loss": 0.0, "distillation_loss": 0.18757006525993347, "epoch": 14.48, "learning_rate": 1.9085033284658917e-06, "loss": 0.2378, "step": 40060, "task_loss": 0.36795151233673096 }, { "compression_loss": 0.0, "distillation_loss": 0.2144419550895691, "epoch": 14.48, "learning_rate": 1.8827027458256239e-06, "loss": 0.2404, "step": 40070, "task_loss": 0.2985188961029053 }, { "compression_loss": 0.0, "distillation_loss": 0.22410696744918823, "epoch": 14.49, "learning_rate": 1.857072927106244e-06, "loss": 0.2361, "step": 40080, "task_loss": 0.7707335948944092 }, { "compression_loss": 0.0, "distillation_loss": 0.2690439820289612, "epoch": 14.49, "learning_rate": 1.831614004463733e-06, "loss": 0.2433, "step": 40090, "task_loss": 0.44754502177238464 }, { "compression_loss": 0.0, "distillation_loss": 0.2019033432006836, "epoch": 14.49, "learning_rate": 1.806326109172838e-06, "loss": 0.2378, "step": 40100, "task_loss": 0.4766843914985657 }, { "compression_loss": 0.0, "distillation_loss": 0.2467237263917923, "epoch": 14.5, "learning_rate": 1.781209371626461e-06, "loss": 0.2516, "step": 40110, "task_loss": 0.5889577865600586 }, { "compression_loss": 0.0, "distillation_loss": 0.20924049615859985, "epoch": 14.5, "learning_rate": 1.75626392133494e-06, "loss": 0.2475, "step": 40120, "task_loss": 0.3500496745109558 }, { "compression_loss": 0.0, "distillation_loss": 0.23063108325004578, "epoch": 14.5, "learning_rate": 1.7314898869254157e-06, "loss": 0.2097, "step": 40130, "task_loss": 0.8268368244171143 }, { "compression_loss": 0.0, "distillation_loss": 0.19039425253868103, "epoch": 14.51, "learning_rate": 1.7068873961411294e-06, "loss": 0.2395, "step": 40140, "task_loss": 0.4907156527042389 }, { "compression_loss": 0.0, "distillation_loss": 0.1660790741443634, "epoch": 14.51, "learning_rate": 1.6824565758408027e-06, "loss": 0.2432, "step": 40150, "task_loss": 0.385088711977005 }, { "compression_loss": 0.0, "distillation_loss": 0.2549957036972046, "epoch": 14.51, "learning_rate": 1.6581975519979562e-06, "loss": 0.2305, "step": 40160, "task_loss": 0.37437817454338074 }, { "compression_loss": 0.0, "distillation_loss": 0.22340743243694305, "epoch": 14.52, "learning_rate": 1.6341104497002816e-06, "loss": 0.2425, "step": 40170, "task_loss": 0.4963937997817993 }, { "compression_loss": 0.0, "distillation_loss": 0.23421865701675415, "epoch": 14.52, "learning_rate": 1.6101953931489643e-06, "loss": 0.2153, "step": 40180, "task_loss": 0.5987656116485596 }, { "compression_loss": 0.0, "distillation_loss": 0.20578370988368988, "epoch": 14.52, "learning_rate": 1.5864525056580967e-06, "loss": 0.2197, "step": 40190, "task_loss": 0.49276524782180786 }, { "compression_loss": 0.0, "distillation_loss": 0.21253164112567902, "epoch": 14.53, "learning_rate": 1.5628819096539764e-06, "loss": 0.2373, "step": 40200, "task_loss": 0.39236021041870117 }, { "compression_loss": 0.0, "distillation_loss": 0.25942492485046387, "epoch": 14.53, "learning_rate": 1.5394837266745246e-06, "loss": 0.2421, "step": 40210, "task_loss": 0.8538446426391602 }, { "compression_loss": 0.0, "distillation_loss": 0.197534441947937, "epoch": 14.54, "learning_rate": 1.5162580773686438e-06, "loss": 0.2345, "step": 40220, "task_loss": 0.390971302986145 }, { "compression_loss": 0.0, "distillation_loss": 0.2024911344051361, "epoch": 14.54, "learning_rate": 1.4932050814955942e-06, "loss": 0.2328, "step": 40230, "task_loss": 0.24561244249343872 }, { "compression_loss": 0.0, "distillation_loss": 0.23663444817066193, "epoch": 14.54, "learning_rate": 1.470324857924365e-06, "loss": 0.2561, "step": 40240, "task_loss": 0.2838215231895447 }, { "compression_loss": 0.0, "distillation_loss": 0.17373546957969666, "epoch": 14.55, "learning_rate": 1.4476175246330906e-06, "loss": 0.2219, "step": 40250, "task_loss": 0.3289911150932312 }, { "epoch": 14.55, "eval_exact_match": 83.72753074739829, "eval_f1": 90.29893044950633, "step": 40250 }, { "compression_loss": 0.0, "distillation_loss": 0.19693242013454437, "epoch": 14.55, "learning_rate": 1.4250831987084155e-06, "loss": 0.232, "step": 40260, "task_loss": 0.5237109661102295 }, { "compression_loss": 0.0, "distillation_loss": 0.25203973054885864, "epoch": 14.55, "learning_rate": 1.4027219963449087e-06, "loss": 0.2456, "step": 40270, "task_loss": 0.6143960356712341 }, { "compression_loss": 0.0, "distillation_loss": 0.19381505250930786, "epoch": 14.56, "learning_rate": 1.3805340328444287e-06, "loss": 0.2183, "step": 40280, "task_loss": 0.33310258388519287 }, { "compression_loss": 0.0, "distillation_loss": 0.18629619479179382, "epoch": 14.56, "learning_rate": 1.358519422615601e-06, "loss": 0.2311, "step": 40290, "task_loss": 0.2998196482658386 }, { "compression_loss": 0.0, "distillation_loss": 0.23643043637275696, "epoch": 14.56, "learning_rate": 1.3366782791731396e-06, "loss": 0.2306, "step": 40300, "task_loss": 0.6539130210876465 }, { "compression_loss": 0.0, "distillation_loss": 0.17593146860599518, "epoch": 14.57, "learning_rate": 1.315010715137333e-06, "loss": 0.2361, "step": 40310, "task_loss": 0.36985522508621216 }, { "compression_loss": 0.0, "distillation_loss": 0.24259349703788757, "epoch": 14.57, "learning_rate": 1.2935168422334307e-06, "loss": 0.226, "step": 40320, "task_loss": 0.6864180564880371 }, { "compression_loss": 0.0, "distillation_loss": 0.2658407688140869, "epoch": 14.58, "learning_rate": 1.2721967712910648e-06, "loss": 0.2228, "step": 40330, "task_loss": 0.5224426984786987 }, { "compression_loss": 0.0, "distillation_loss": 0.2879309058189392, "epoch": 14.58, "learning_rate": 1.2510506122436886e-06, "loss": 0.2457, "step": 40340, "task_loss": 0.31292012333869934 }, { "compression_loss": 0.0, "distillation_loss": 0.20499932765960693, "epoch": 14.58, "learning_rate": 1.2300784741280078e-06, "loss": 0.241, "step": 40350, "task_loss": 0.533927857875824 }, { "compression_loss": 0.0, "distillation_loss": 0.21396301686763763, "epoch": 14.59, "learning_rate": 1.209280465083427e-06, "loss": 0.2439, "step": 40360, "task_loss": 0.6710692048072815 }, { "compression_loss": 0.0, "distillation_loss": 0.22768867015838623, "epoch": 14.59, "learning_rate": 1.1886566923514634e-06, "loss": 0.2508, "step": 40370, "task_loss": 0.5868658423423767 }, { "compression_loss": 0.0, "distillation_loss": 0.16053186357021332, "epoch": 14.59, "learning_rate": 1.1682072622752342e-06, "loss": 0.2185, "step": 40380, "task_loss": 0.35523098707199097 }, { "compression_loss": 0.0, "distillation_loss": 0.17393621802330017, "epoch": 14.6, "learning_rate": 1.147932280298877e-06, "loss": 0.2215, "step": 40390, "task_loss": 0.25129181146621704 }, { "compression_loss": 0.0, "distillation_loss": 0.18951040506362915, "epoch": 14.6, "learning_rate": 1.127831850967007e-06, "loss": 0.2484, "step": 40400, "task_loss": 0.7178854942321777 }, { "compression_loss": 0.0, "distillation_loss": 0.31038203835487366, "epoch": 14.6, "learning_rate": 1.1079060779242022e-06, "loss": 0.2424, "step": 40410, "task_loss": 0.8680423498153687 }, { "compression_loss": 0.0, "distillation_loss": 0.19648787379264832, "epoch": 14.61, "learning_rate": 1.0881550639144531e-06, "loss": 0.2384, "step": 40420, "task_loss": 0.40625467896461487 }, { "compression_loss": 0.0, "distillation_loss": 0.23141324520111084, "epoch": 14.61, "learning_rate": 1.0685789107806264e-06, "loss": 0.2243, "step": 40430, "task_loss": 0.5935995578765869 }, { "compression_loss": 0.0, "distillation_loss": 0.24341002106666565, "epoch": 14.62, "learning_rate": 1.0491777194639433e-06, "loss": 0.2549, "step": 40440, "task_loss": 0.7823227643966675 }, { "compression_loss": 0.0, "distillation_loss": 0.20551814138889313, "epoch": 14.62, "learning_rate": 1.029951590003487e-06, "loss": 0.2144, "step": 40450, "task_loss": 0.535841166973114 }, { "compression_loss": 0.0, "distillation_loss": 0.19820314645767212, "epoch": 14.62, "learning_rate": 1.0109006215356314e-06, "loss": 0.2259, "step": 40460, "task_loss": 0.4061843454837799 }, { "compression_loss": 0.0, "distillation_loss": 0.20060187578201294, "epoch": 14.63, "learning_rate": 9.920249122935775e-07, "loss": 0.2362, "step": 40470, "task_loss": 0.34745022654533386 }, { "compression_loss": 0.0, "distillation_loss": 0.19940611720085144, "epoch": 14.63, "learning_rate": 9.733245596068346e-07, "loss": 0.2439, "step": 40480, "task_loss": 0.47528618574142456 }, { "compression_loss": 0.0, "distillation_loss": 0.16198787093162537, "epoch": 14.63, "learning_rate": 9.547996599007135e-07, "loss": 0.2246, "step": 40490, "task_loss": 0.47620028257369995 }, { "compression_loss": 0.0, "distillation_loss": 0.265262633562088, "epoch": 14.64, "learning_rate": 9.364503086958147e-07, "loss": 0.2355, "step": 40500, "task_loss": 0.40639764070510864 }, { "epoch": 14.64, "eval_exact_match": 83.68968779564806, "eval_f1": 90.2170241386278, "step": 40500 }, { "compression_loss": 0.0, "distillation_loss": 0.22631597518920898, "epoch": 14.64, "learning_rate": 9.182766006075692e-07, "loss": 0.2346, "step": 40510, "task_loss": 0.6658477187156677 }, { "compression_loss": 0.0, "distillation_loss": 0.27636265754699707, "epoch": 14.64, "learning_rate": 9.002786293457259e-07, "loss": 0.2253, "step": 40520, "task_loss": 0.48663219809532166 }, { "compression_loss": 0.0, "distillation_loss": 0.231238454580307, "epoch": 14.65, "learning_rate": 8.824564877138851e-07, "loss": 0.2379, "step": 40530, "task_loss": 0.5162807703018188 }, { "compression_loss": 0.0, "distillation_loss": 0.23019526898860931, "epoch": 14.65, "learning_rate": 8.648102676089857e-07, "loss": 0.2215, "step": 40540, "task_loss": 0.7763354778289795 }, { "compression_loss": 0.0, "distillation_loss": 0.20232398808002472, "epoch": 14.65, "learning_rate": 8.473400600208857e-07, "loss": 0.22, "step": 40550, "task_loss": 0.3750525712966919 }, { "compression_loss": 0.0, "distillation_loss": 0.21048077940940857, "epoch": 14.66, "learning_rate": 8.30045955031845e-07, "loss": 0.236, "step": 40560, "task_loss": 0.27062106132507324 }, { "compression_loss": 0.0, "distillation_loss": 0.22069762647151947, "epoch": 14.66, "learning_rate": 8.129280418160867e-07, "loss": 0.2234, "step": 40570, "task_loss": 0.44164735078811646 }, { "compression_loss": 0.0, "distillation_loss": 0.20091667771339417, "epoch": 14.67, "learning_rate": 7.959864086393225e-07, "loss": 0.2289, "step": 40580, "task_loss": 0.5987957715988159 }, { "compression_loss": 0.0, "distillation_loss": 0.21585312485694885, "epoch": 14.67, "learning_rate": 7.792211428583184e-07, "loss": 0.2481, "step": 40590, "task_loss": 0.4686650037765503 }, { "compression_loss": 0.0, "distillation_loss": 0.19087418913841248, "epoch": 14.67, "learning_rate": 7.626323309204157e-07, "loss": 0.2306, "step": 40600, "task_loss": 0.6071935892105103 }, { "compression_loss": 0.0, "distillation_loss": 0.2141316682100296, "epoch": 14.68, "learning_rate": 7.462200583631045e-07, "loss": 0.2395, "step": 40610, "task_loss": 0.5493015050888062 }, { "compression_loss": 0.0, "distillation_loss": 0.24774694442749023, "epoch": 14.68, "learning_rate": 7.299844098135915e-07, "loss": 0.2227, "step": 40620, "task_loss": 0.5905795097351074 }, { "compression_loss": 0.0, "distillation_loss": 0.19672465324401855, "epoch": 14.68, "learning_rate": 7.139254689883305e-07, "loss": 0.2171, "step": 40630, "task_loss": 0.7069876194000244 }, { "compression_loss": 0.0, "distillation_loss": 0.20999035239219666, "epoch": 14.69, "learning_rate": 6.980433186926221e-07, "loss": 0.2293, "step": 40640, "task_loss": 0.24222975969314575 }, { "compression_loss": 0.0, "distillation_loss": 0.23579585552215576, "epoch": 14.69, "learning_rate": 6.823380408201817e-07, "loss": 0.243, "step": 40650, "task_loss": 0.5637131929397583 }, { "compression_loss": 0.0, "distillation_loss": 0.21097102761268616, "epoch": 14.69, "learning_rate": 6.668097163526936e-07, "loss": 0.2402, "step": 40660, "task_loss": 0.7529999017715454 }, { "compression_loss": 0.0, "distillation_loss": 0.2499391883611679, "epoch": 14.7, "learning_rate": 6.514584253594218e-07, "loss": 0.2128, "step": 40670, "task_loss": 0.42130064964294434 }, { "compression_loss": 0.0, "distillation_loss": 0.1964101791381836, "epoch": 14.7, "learning_rate": 6.362842469967905e-07, "loss": 0.238, "step": 40680, "task_loss": 0.38533705472946167 }, { "compression_loss": 0.0, "distillation_loss": 0.1837211400270462, "epoch": 14.71, "learning_rate": 6.212872595079643e-07, "loss": 0.2251, "step": 40690, "task_loss": 0.638512372970581 }, { "compression_loss": 0.0, "distillation_loss": 0.19221729040145874, "epoch": 14.71, "learning_rate": 6.064675402224444e-07, "loss": 0.2334, "step": 40700, "task_loss": 0.5338441133499146 }, { "compression_loss": 0.0, "distillation_loss": 0.23053930699825287, "epoch": 14.71, "learning_rate": 5.918251655556994e-07, "loss": 0.2217, "step": 40710, "task_loss": 0.5507243275642395 }, { "compression_loss": 0.0, "distillation_loss": 0.2665104269981384, "epoch": 14.72, "learning_rate": 5.773602110087295e-07, "loss": 0.2192, "step": 40720, "task_loss": 0.5469443202018738 }, { "compression_loss": 0.0, "distillation_loss": 0.20541295409202576, "epoch": 14.72, "learning_rate": 5.630727511677097e-07, "loss": 0.2415, "step": 40730, "task_loss": 0.3799017667770386 }, { "compression_loss": 0.0, "distillation_loss": 0.186172217130661, "epoch": 14.72, "learning_rate": 5.489628597035817e-07, "loss": 0.2563, "step": 40740, "task_loss": 0.44071048498153687 }, { "compression_loss": 0.0, "distillation_loss": 0.2783205509185791, "epoch": 14.73, "learning_rate": 5.350306093716961e-07, "loss": 0.2394, "step": 40750, "task_loss": 0.34275901317596436 }, { "epoch": 14.73, "eval_exact_match": 83.60454115421003, "eval_f1": 90.33495064433302, "step": 40750 }, { "compression_loss": 0.0, "distillation_loss": 0.2505665123462677, "epoch": 14.73, "learning_rate": 5.212760720114123e-07, "loss": 0.2319, "step": 40760, "task_loss": 0.45389094948768616 }, { "compression_loss": 0.0, "distillation_loss": 0.24596086144447327, "epoch": 14.73, "learning_rate": 5.076993185457568e-07, "loss": 0.2276, "step": 40770, "task_loss": 0.7891265153884888 }, { "compression_loss": 0.0, "distillation_loss": 0.2095947414636612, "epoch": 14.74, "learning_rate": 4.943004189810379e-07, "loss": 0.2466, "step": 40780, "task_loss": 0.546643853187561 }, { "compression_loss": 0.0, "distillation_loss": 0.2007887363433838, "epoch": 14.74, "learning_rate": 4.810794424064813e-07, "loss": 0.2357, "step": 40790, "task_loss": 0.5920835733413696 }, { "compression_loss": 0.0, "distillation_loss": 0.2600013315677643, "epoch": 14.75, "learning_rate": 4.6803645699389105e-07, "loss": 0.2341, "step": 40800, "task_loss": 0.507832407951355 }, { "compression_loss": 0.0, "distillation_loss": 0.20566104352474213, "epoch": 14.75, "learning_rate": 4.551715299972852e-07, "loss": 0.2377, "step": 40810, "task_loss": 0.4447738528251648 }, { "compression_loss": 0.0, "distillation_loss": 0.17498670518398285, "epoch": 14.75, "learning_rate": 4.424847277525534e-07, "loss": 0.2181, "step": 40820, "task_loss": 0.2954162657260895 }, { "compression_loss": 0.0, "distillation_loss": 0.23140192031860352, "epoch": 14.76, "learning_rate": 4.299761156771148e-07, "loss": 0.2491, "step": 40830, "task_loss": 0.5240877866744995 }, { "compression_loss": 0.0, "distillation_loss": 0.237901508808136, "epoch": 14.76, "learning_rate": 4.1764575826957634e-07, "loss": 0.2321, "step": 40840, "task_loss": 0.5805107355117798 }, { "compression_loss": 0.0, "distillation_loss": 0.2360258400440216, "epoch": 14.76, "learning_rate": 4.054937191094143e-07, "loss": 0.226, "step": 40850, "task_loss": 0.4690331816673279 }, { "compression_loss": 0.0, "distillation_loss": 0.198323056101799, "epoch": 14.77, "learning_rate": 3.935200608566241e-07, "loss": 0.2309, "step": 40860, "task_loss": 0.4538593292236328 }, { "compression_loss": 0.0, "distillation_loss": 0.18780264258384705, "epoch": 14.77, "learning_rate": 3.817248452514138e-07, "loss": 0.2387, "step": 40870, "task_loss": 0.43486571311950684 }, { "compression_loss": 0.0, "distillation_loss": 0.2032957375049591, "epoch": 14.77, "learning_rate": 3.701081331138772e-07, "loss": 0.2278, "step": 40880, "task_loss": 0.46985048055648804 }, { "compression_loss": 0.0, "distillation_loss": 0.21978767216205597, "epoch": 14.78, "learning_rate": 3.586699843436991e-07, "loss": 0.2466, "step": 40890, "task_loss": 0.5232217311859131 }, { "compression_loss": 0.0, "distillation_loss": 0.2598414421081543, "epoch": 14.78, "learning_rate": 3.474104579198128e-07, "loss": 0.2476, "step": 40900, "task_loss": 0.3403189480304718 }, { "compression_loss": 0.0, "distillation_loss": 0.1761493980884552, "epoch": 14.78, "learning_rate": 3.363296119001246e-07, "loss": 0.2419, "step": 40910, "task_loss": 0.2819960117340088 }, { "compression_loss": 0.0, "distillation_loss": 0.21891748905181885, "epoch": 14.79, "learning_rate": 3.254275034212028e-07, "loss": 0.2313, "step": 40920, "task_loss": 0.6238331198692322 }, { "compression_loss": 0.0, "distillation_loss": 0.19511379301548004, "epoch": 14.79, "learning_rate": 3.1470418869799007e-07, "loss": 0.2242, "step": 40930, "task_loss": 0.3012145161628723 }, { "compression_loss": 0.0, "distillation_loss": 0.21911239624023438, "epoch": 14.8, "learning_rate": 3.041597230234888e-07, "loss": 0.2193, "step": 40940, "task_loss": 0.4811599850654602 }, { "compression_loss": 0.0, "distillation_loss": 0.3620220422744751, "epoch": 14.8, "learning_rate": 2.9379416076852405e-07, "loss": 0.2397, "step": 40950, "task_loss": 0.6693317890167236 }, { "compression_loss": 0.0, "distillation_loss": 0.2984312176704407, "epoch": 14.8, "learning_rate": 2.836075553814171e-07, "loss": 0.2383, "step": 40960, "task_loss": 0.8876379728317261 }, { "compression_loss": 0.0, "distillation_loss": 0.20343519747257233, "epoch": 14.81, "learning_rate": 2.735999593877253e-07, "loss": 0.21, "step": 40970, "task_loss": 0.48944130539894104 }, { "compression_loss": 0.0, "distillation_loss": 0.1964031308889389, "epoch": 14.81, "learning_rate": 2.6377142438998134e-07, "loss": 0.2216, "step": 40980, "task_loss": 0.39222803711891174 }, { "compression_loss": 0.0, "distillation_loss": 0.24534821510314941, "epoch": 14.81, "learning_rate": 2.5412200106742166e-07, "loss": 0.2391, "step": 40990, "task_loss": 0.7029440402984619 }, { "compression_loss": 0.0, "distillation_loss": 0.2845875322818756, "epoch": 14.82, "learning_rate": 2.4465173917571023e-07, "loss": 0.2691, "step": 41000, "task_loss": 0.5345797538757324 }, { "epoch": 14.82, "eval_exact_match": 83.4247871333964, "eval_f1": 90.21943083788699, "step": 41000 }, { "compression_loss": 0.0, "distillation_loss": 0.20766814053058624, "epoch": 14.82, "learning_rate": 2.3536068754670568e-07, "loss": 0.2328, "step": 41010, "task_loss": 0.43465369939804077 }, { "compression_loss": 0.0, "distillation_loss": 0.22689062356948853, "epoch": 14.82, "learning_rate": 2.262488940881968e-07, "loss": 0.2223, "step": 41020, "task_loss": 0.578322172164917 }, { "compression_loss": 0.0, "distillation_loss": 0.16850513219833374, "epoch": 14.83, "learning_rate": 2.1731640578365016e-07, "loss": 0.2158, "step": 41030, "task_loss": 0.36379289627075195 }, { "compression_loss": 0.0, "distillation_loss": 0.3516436517238617, "epoch": 14.83, "learning_rate": 2.0856326869198082e-07, "loss": 0.25, "step": 41040, "task_loss": 0.681757390499115 }, { "compression_loss": 0.0, "distillation_loss": 0.20471343398094177, "epoch": 14.84, "learning_rate": 1.999895279473074e-07, "loss": 0.2079, "step": 41050, "task_loss": 0.4940853714942932 }, { "compression_loss": 0.0, "distillation_loss": 0.24832800030708313, "epoch": 14.84, "learning_rate": 1.9159522775871906e-07, "loss": 0.2473, "step": 41060, "task_loss": 0.5383921265602112 }, { "compression_loss": 0.0, "distillation_loss": 0.1713516116142273, "epoch": 14.84, "learning_rate": 1.83380411410054e-07, "loss": 0.2302, "step": 41070, "task_loss": 0.4822900593280792 }, { "compression_loss": 0.0, "distillation_loss": 0.2330705225467682, "epoch": 14.85, "learning_rate": 1.7534512125966237e-07, "loss": 0.2271, "step": 41080, "task_loss": 0.46958887577056885 }, { "compression_loss": 0.0, "distillation_loss": 0.1865522861480713, "epoch": 14.85, "learning_rate": 1.6748939874020818e-07, "loss": 0.2261, "step": 41090, "task_loss": 0.3367389142513275 }, { "compression_loss": 0.0, "distillation_loss": 0.22266581654548645, "epoch": 14.85, "learning_rate": 1.598132843584321e-07, "loss": 0.2236, "step": 41100, "task_loss": 0.2846643328666687 }, { "compression_loss": 0.0, "distillation_loss": 0.2285633385181427, "epoch": 14.86, "learning_rate": 1.5231681769496517e-07, "loss": 0.2501, "step": 41110, "task_loss": 0.41894304752349854 }, { "compression_loss": 0.0, "distillation_loss": 0.18297666311264038, "epoch": 14.86, "learning_rate": 1.4500003740410715e-07, "loss": 0.2222, "step": 41120, "task_loss": 0.27748382091522217 }, { "compression_loss": 0.0, "distillation_loss": 0.27647221088409424, "epoch": 14.86, "learning_rate": 1.3786298121364392e-07, "loss": 0.2177, "step": 41130, "task_loss": 0.48041605949401855 }, { "compression_loss": 0.0, "distillation_loss": 0.2500835955142975, "epoch": 14.87, "learning_rate": 1.3090568592462603e-07, "loss": 0.2289, "step": 41140, "task_loss": 0.9432734251022339 }, { "compression_loss": 0.0, "distillation_loss": 0.25542983412742615, "epoch": 14.87, "learning_rate": 1.24128187411221e-07, "loss": 0.233, "step": 41150, "task_loss": 0.25100335478782654 }, { "compression_loss": 0.0, "distillation_loss": 0.198891282081604, "epoch": 14.88, "learning_rate": 1.1753052062048018e-07, "loss": 0.2451, "step": 41160, "task_loss": 0.2514464557170868 }, { "compression_loss": 0.0, "distillation_loss": 0.21026000380516052, "epoch": 14.88, "learning_rate": 1.1111271957219503e-07, "loss": 0.2173, "step": 41170, "task_loss": 0.7069941759109497 }, { "compression_loss": 0.0, "distillation_loss": 0.19374729692935944, "epoch": 14.88, "learning_rate": 1.0487481735870662e-07, "loss": 0.2266, "step": 41180, "task_loss": 0.5104742646217346 }, { "compression_loss": 0.0, "distillation_loss": 0.24391800165176392, "epoch": 14.89, "learning_rate": 9.881684614473861e-08, "loss": 0.23, "step": 41190, "task_loss": 0.6051548719406128 }, { "compression_loss": 0.0, "distillation_loss": 0.21831145882606506, "epoch": 14.89, "learning_rate": 9.29388371672224e-08, "loss": 0.2269, "step": 41200, "task_loss": 0.44926178455352783 }, { "compression_loss": 0.0, "distillation_loss": 0.18416348099708557, "epoch": 14.89, "learning_rate": 8.724082073515326e-08, "loss": 0.2154, "step": 41210, "task_loss": 0.4953691065311432 }, { "compression_loss": 0.0, "distillation_loss": 0.20730862021446228, "epoch": 14.9, "learning_rate": 8.172282622941562e-08, "loss": 0.2198, "step": 41220, "task_loss": 0.3815509080886841 }, { "compression_loss": 0.0, "distillation_loss": 0.21018816530704498, "epoch": 14.9, "learning_rate": 7.638488210265081e-08, "loss": 0.2203, "step": 41230, "task_loss": 0.5777589678764343 }, { "compression_loss": 0.0, "distillation_loss": 0.1664731502532959, "epoch": 14.9, "learning_rate": 7.122701587908619e-08, "loss": 0.226, "step": 41240, "task_loss": 0.28747373819351196 }, { "compression_loss": 0.0, "distillation_loss": 0.18536382913589478, "epoch": 14.91, "learning_rate": 6.62492541544224e-08, "loss": 0.2528, "step": 41250, "task_loss": 0.2921023368835449 }, { "epoch": 14.91, "eval_exact_match": 83.59508041627247, "eval_f1": 90.3394868872591, "step": 41250 }, { "compression_loss": 0.0, "distillation_loss": 0.23831024765968323, "epoch": 14.91, "learning_rate": 6.145162259566628e-08, "loss": 0.2462, "step": 41260, "task_loss": 0.546221137046814 }, { "compression_loss": 0.0, "distillation_loss": 0.1984724998474121, "epoch": 14.92, "learning_rate": 5.683414594102209e-08, "loss": 0.2253, "step": 41270, "task_loss": 0.4562731683254242 }, { "compression_loss": 0.0, "distillation_loss": 0.20618247985839844, "epoch": 14.92, "learning_rate": 5.2396847999751635e-08, "loss": 0.2312, "step": 41280, "task_loss": 0.5624033212661743 }, { "compression_loss": 0.0, "distillation_loss": 0.2884097099304199, "epoch": 14.92, "learning_rate": 4.813975165205763e-08, "loss": 0.2308, "step": 41290, "task_loss": 0.6284477114677429 }, { "compression_loss": 0.0, "distillation_loss": 0.20076477527618408, "epoch": 14.93, "learning_rate": 4.406287884895943e-08, "loss": 0.2219, "step": 41300, "task_loss": 0.31397148966789246 }, { "compression_loss": 0.0, "distillation_loss": 0.19199396669864655, "epoch": 14.93, "learning_rate": 4.016625061218026e-08, "loss": 0.2335, "step": 41310, "task_loss": 0.2553127408027649 }, { "compression_loss": 0.0, "distillation_loss": 0.19816777110099792, "epoch": 14.93, "learning_rate": 3.644988703405016e-08, "loss": 0.2214, "step": 41320, "task_loss": 0.3272134065628052 }, { "compression_loss": 0.0, "distillation_loss": 0.21667525172233582, "epoch": 14.94, "learning_rate": 3.2913807277385464e-08, "loss": 0.2311, "step": 41330, "task_loss": 0.6902732849121094 }, { "compression_loss": 0.0, "distillation_loss": 0.2011464536190033, "epoch": 14.94, "learning_rate": 2.9558029575407204e-08, "loss": 0.2092, "step": 41340, "task_loss": 0.38788002729415894 }, { "compression_loss": 0.0, "distillation_loss": 0.19852641224861145, "epoch": 14.94, "learning_rate": 2.6382571231628456e-08, "loss": 0.2389, "step": 41350, "task_loss": 0.48264265060424805 }, { "compression_loss": 0.0, "distillation_loss": 0.18369928002357483, "epoch": 14.95, "learning_rate": 2.3387448619784367e-08, "loss": 0.2286, "step": 41360, "task_loss": 0.4098126292228699 }, { "compression_loss": 0.0, "distillation_loss": 0.258108526468277, "epoch": 14.95, "learning_rate": 2.0572677183731122e-08, "loss": 0.2408, "step": 41370, "task_loss": 0.47995632886886597 }, { "compression_loss": 0.0, "distillation_loss": 0.20995879173278809, "epoch": 14.95, "learning_rate": 1.7938271437379913e-08, "loss": 0.2241, "step": 41380, "task_loss": 0.8705676198005676 }, { "compression_loss": 0.0, "distillation_loss": 0.1764739751815796, "epoch": 14.96, "learning_rate": 1.5484244964611427e-08, "loss": 0.2331, "step": 41390, "task_loss": 0.258797287940979 }, { "compression_loss": 0.0, "distillation_loss": 0.20735256373882294, "epoch": 14.96, "learning_rate": 1.3210610419213673e-08, "loss": 0.2153, "step": 41400, "task_loss": 0.7630883455276489 }, { "compression_loss": 0.0, "distillation_loss": 0.22570012509822845, "epoch": 14.97, "learning_rate": 1.1117379524804293e-08, "loss": 0.2368, "step": 41410, "task_loss": 0.5106080174446106 }, { "compression_loss": 0.0, "distillation_loss": 0.19130569696426392, "epoch": 14.97, "learning_rate": 9.204563074791671e-09, "loss": 0.2303, "step": 41420, "task_loss": 0.5977288484573364 }, { "compression_loss": 0.0, "distillation_loss": 0.30169206857681274, "epoch": 14.97, "learning_rate": 7.47217093229724e-09, "loss": 0.2319, "step": 41430, "task_loss": 0.6631218194961548 }, { "compression_loss": 0.0, "distillation_loss": 0.22515720129013062, "epoch": 14.98, "learning_rate": 5.9202120301204966e-09, "loss": 0.2271, "step": 41440, "task_loss": 0.4315108358860016 }, { "compression_loss": 0.0, "distillation_loss": 0.2272672951221466, "epoch": 14.98, "learning_rate": 4.548694370680728e-09, "loss": 0.2382, "step": 41450, "task_loss": 0.8516280651092529 }, { "compression_loss": 0.0, "distillation_loss": 0.18287137150764465, "epoch": 14.98, "learning_rate": 3.3576250259859106e-09, "loss": 0.2305, "step": 41460, "task_loss": 0.19684766232967377 }, { "compression_loss": 0.0, "distillation_loss": 0.22013694047927856, "epoch": 14.99, "learning_rate": 2.3470101375860917e-09, "loss": 0.228, "step": 41470, "task_loss": 0.7428228855133057 }, { "compression_loss": 0.0, "distillation_loss": 0.27389436960220337, "epoch": 14.99, "learning_rate": 1.5168549165578415e-09, "loss": 0.2314, "step": 41480, "task_loss": 0.6218991279602051 }, { "compression_loss": 0.0, "distillation_loss": 0.17496860027313232, "epoch": 14.99, "learning_rate": 8.671636434576246e-10, "loss": 0.2295, "step": 41490, "task_loss": 0.33579161763191223 }, { "compression_loss": 0.0, "distillation_loss": 0.22415469586849213, "epoch": 15.0, "learning_rate": 4.3674096117052304e-10, "loss": 0.2327, "step": 41500, "task_loss": 0.2707256078720093 }, { "epoch": 15.0, "eval_exact_match": 83.68968779564806, "eval_f1": 90.37926101768134, "step": 41500 } ], "max_steps": 41505, "num_train_epochs": 15, "total_flos": 3.16732419781632e+16, "trial_name": null, "trial_params": null }