{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 27665, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.2878532409667969, "learning_rate": 4.3371403787769264e-09, "loss": 0.3044, "step": 1, "teacher_loss": 0.30623510479927063 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.49937474727630615, "learning_rate": 8.674280757553853e-09, "loss": 0.3654, "step": 2, "teacher_loss": 0.35050299763679504 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.7952067852020264, "learning_rate": 1.301142113633078e-08, "loss": 0.4253, "step": 3, "teacher_loss": 0.38419413566589355 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.752353310585022, "learning_rate": 1.7348561515107706e-08, "loss": 0.4879, "step": 4, "teacher_loss": 0.4584733247756958 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.6486436724662781, "learning_rate": 2.1685701893884633e-08, "loss": 0.3889, "step": 5, "teacher_loss": 0.3600001633167267 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.6064157485961914, "learning_rate": 2.602284227266156e-08, "loss": 0.3314, "step": 6, "teacher_loss": 0.3008692264556885 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.603389322757721, "learning_rate": 3.0359982651438484e-08, "loss": 0.426, "step": 7, "teacher_loss": 0.40631771087646484 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.47560954093933105, "learning_rate": 3.469712303021541e-08, "loss": 0.8501, "step": 8, "teacher_loss": 0.8917432427406311 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.35841572284698486, "learning_rate": 3.903426340899234e-08, "loss": 0.3169, "step": 9, "teacher_loss": 0.3123272657394409 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.4316421151161194, "learning_rate": 4.3371403787769266e-08, "loss": 0.5676, "step": 10, "teacher_loss": 0.5826940536499023 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.5786064863204956, "learning_rate": 4.770854416654619e-08, "loss": 0.3257, "step": 11, "teacher_loss": 0.29761967062950134 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.18219050765037537, "learning_rate": 5.204568454532312e-08, "loss": 0.2459, "step": 12, "teacher_loss": 0.25292351841926575 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.1468430459499359, "learning_rate": 5.638282492410005e-08, "loss": 0.4292, "step": 13, "teacher_loss": 0.4606162905693054 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.2759707570075989, "learning_rate": 6.071996530287697e-08, "loss": 0.2951, "step": 14, "teacher_loss": 0.29724395275115967 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.615320086479187, "learning_rate": 6.505710568165389e-08, "loss": 0.4713, "step": 15, "teacher_loss": 0.4552942216396332 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.6560722589492798, "learning_rate": 6.939424606043082e-08, "loss": 0.5152, "step": 16, "teacher_loss": 0.49955758452415466 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.7656878232955933, "learning_rate": 7.373138643920774e-08, "loss": 0.4166, "step": 17, "teacher_loss": 0.3777827024459839 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.3205189108848572, "learning_rate": 7.806852681798468e-08, "loss": 0.4887, "step": 18, "teacher_loss": 0.5074136853218079 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.5319740772247314, "learning_rate": 8.24056671967616e-08, "loss": 0.4459, "step": 19, "teacher_loss": 0.43633657693862915 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.7975113987922668, "learning_rate": 8.674280757553853e-08, "loss": 0.3598, "step": 20, "teacher_loss": 0.31118321418762207 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.5031394362449646, "learning_rate": 9.107994795431545e-08, "loss": 0.2944, "step": 21, "teacher_loss": 0.2712216377258301 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.327789843082428, "learning_rate": 9.541708833309239e-08, "loss": 0.2686, "step": 22, "teacher_loss": 0.26207101345062256 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.7447530627250671, "learning_rate": 9.975422871186931e-08, "loss": 0.3677, "step": 23, "teacher_loss": 0.3258216381072998 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.5696777105331421, "learning_rate": 1.0409136909064624e-07, "loss": 0.432, "step": 24, "teacher_loss": 0.41666150093078613 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.31212449073791504, "learning_rate": 1.0842850946942316e-07, "loss": 0.6133, "step": 25, "teacher_loss": 0.646796703338623 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.21366344392299652, "learning_rate": 1.127656498482001e-07, "loss": 0.4887, "step": 26, "teacher_loss": 0.5192395448684692 }, { "compression_loss": 0.0, "epoch": 0.0, "label_loss": 0.5291810035705566, "learning_rate": 1.1710279022697702e-07, "loss": 0.4335, "step": 27, "teacher_loss": 0.4228705167770386 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.5960581302642822, "learning_rate": 1.2143993060575394e-07, "loss": 0.3965, "step": 28, "teacher_loss": 0.37433257699012756 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.2765660583972931, "learning_rate": 1.2577707098453087e-07, "loss": 0.3472, "step": 29, "teacher_loss": 0.35501280426979065 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.3112162947654724, "learning_rate": 1.3011421136330778e-07, "loss": 0.2953, "step": 30, "teacher_loss": 0.2934904396533966 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.5781139135360718, "learning_rate": 1.3445135174208474e-07, "loss": 0.6268, "step": 31, "teacher_loss": 0.6322437524795532 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.551255464553833, "learning_rate": 1.3878849212086165e-07, "loss": 0.3947, "step": 32, "teacher_loss": 0.3773103356361389 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.6234651803970337, "learning_rate": 1.4312563249963858e-07, "loss": 0.4112, "step": 33, "teacher_loss": 0.38762766122817993 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.9360308647155762, "learning_rate": 1.4746277287841549e-07, "loss": 0.4008, "step": 34, "teacher_loss": 0.34132856130599976 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.47657620906829834, "learning_rate": 1.5179991325719245e-07, "loss": 0.5855, "step": 35, "teacher_loss": 0.5975528359413147 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.40065234899520874, "learning_rate": 1.5613705363596935e-07, "loss": 0.3151, "step": 36, "teacher_loss": 0.30564457178115845 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.2311306595802307, "learning_rate": 1.604741940147463e-07, "loss": 0.2869, "step": 37, "teacher_loss": 0.2930516302585602 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.3395083546638489, "learning_rate": 1.648113343935232e-07, "loss": 0.3718, "step": 38, "teacher_loss": 0.37534695863723755 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.167999267578125, "learning_rate": 1.6914847477230013e-07, "loss": 0.2363, "step": 39, "teacher_loss": 0.24388030171394348 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.6877390742301941, "learning_rate": 1.7348561515107706e-07, "loss": 0.3837, "step": 40, "teacher_loss": 0.34989410638809204 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.33083808422088623, "learning_rate": 1.7782275552985397e-07, "loss": 0.3266, "step": 41, "teacher_loss": 0.3260805606842041 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.32144296169281006, "learning_rate": 1.821598959086309e-07, "loss": 0.704, "step": 42, "teacher_loss": 0.7464689016342163 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.6025416851043701, "learning_rate": 1.8649703628740784e-07, "loss": 0.4745, "step": 43, "teacher_loss": 0.460231751203537 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.5173925161361694, "learning_rate": 1.9083417666618477e-07, "loss": 0.4872, "step": 44, "teacher_loss": 0.4838038682937622 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.3744485378265381, "learning_rate": 1.9517131704496168e-07, "loss": 0.3537, "step": 45, "teacher_loss": 0.35138148069381714 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.21013106405735016, "learning_rate": 1.9950845742373861e-07, "loss": 0.2938, "step": 46, "teacher_loss": 0.3030865490436554 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.540357768535614, "learning_rate": 2.0384559780251555e-07, "loss": 0.3767, "step": 47, "teacher_loss": 0.3585663437843323 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.4985116422176361, "learning_rate": 2.0818273818129248e-07, "loss": 0.3204, "step": 48, "teacher_loss": 0.30065175890922546 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.4947139024734497, "learning_rate": 2.125198785600694e-07, "loss": 0.6806, "step": 49, "teacher_loss": 0.7013063430786133 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.3116215467453003, "learning_rate": 2.1685701893884632e-07, "loss": 0.3689, "step": 50, "teacher_loss": 0.37528449296951294 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.40373694896698, "learning_rate": 2.2119415931762326e-07, "loss": 0.5921, "step": 51, "teacher_loss": 0.6130533814430237 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.5308322906494141, "learning_rate": 2.255312996964002e-07, "loss": 0.4236, "step": 52, "teacher_loss": 0.41172850131988525 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.42343878746032715, "learning_rate": 2.298684400751771e-07, "loss": 0.3904, "step": 53, "teacher_loss": 0.3866812586784363 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.36457860469818115, "learning_rate": 2.3420558045395403e-07, "loss": 0.342, "step": 54, "teacher_loss": 0.33951157331466675 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.33712244033813477, "learning_rate": 2.38542720832731e-07, "loss": 0.259, "step": 55, "teacher_loss": 0.25035154819488525 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.42537349462509155, "learning_rate": 2.4287986121150787e-07, "loss": 0.3422, "step": 56, "teacher_loss": 0.3330062925815582 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.7673524618148804, "learning_rate": 2.472170015902848e-07, "loss": 0.3707, "step": 57, "teacher_loss": 0.326668918132782 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.43040502071380615, "learning_rate": 2.5155414196906174e-07, "loss": 0.542, "step": 58, "teacher_loss": 0.5543990731239319 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.41028231382369995, "learning_rate": 2.558912823478386e-07, "loss": 0.3497, "step": 59, "teacher_loss": 0.3429994285106659 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 1.0286916494369507, "learning_rate": 2.6022842272661556e-07, "loss": 0.4371, "step": 60, "teacher_loss": 0.3713400959968567 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.5336390137672424, "learning_rate": 2.6456556310539254e-07, "loss": 0.343, "step": 61, "teacher_loss": 0.32187139987945557 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.19562755525112152, "learning_rate": 2.689027034841695e-07, "loss": 0.2846, "step": 62, "teacher_loss": 0.2945200204849243 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.5079911351203918, "learning_rate": 2.7323984386294636e-07, "loss": 0.3185, "step": 63, "teacher_loss": 0.29742881655693054 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.3633435368537903, "learning_rate": 2.775769842417233e-07, "loss": 0.3915, "step": 64, "teacher_loss": 0.3946162462234497 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.4795653522014618, "learning_rate": 2.819141246205002e-07, "loss": 0.3785, "step": 65, "teacher_loss": 0.3672950863838196 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.5867092609405518, "learning_rate": 2.8625126499927716e-07, "loss": 0.526, "step": 66, "teacher_loss": 0.5192420482635498 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.3838410973548889, "learning_rate": 2.9058840537805404e-07, "loss": 0.3074, "step": 67, "teacher_loss": 0.29894983768463135 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.2822248339653015, "learning_rate": 2.9492554575683097e-07, "loss": 0.3579, "step": 68, "teacher_loss": 0.366265207529068 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.4573609232902527, "learning_rate": 2.9926268613560796e-07, "loss": 0.3274, "step": 69, "teacher_loss": 0.31290900707244873 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.17300641536712646, "learning_rate": 3.035998265143849e-07, "loss": 0.4856, "step": 70, "teacher_loss": 0.5203573703765869 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.36407849192619324, "learning_rate": 3.079369668931618e-07, "loss": 0.3696, "step": 71, "teacher_loss": 0.3702436089515686 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.4354317784309387, "learning_rate": 3.122741072719387e-07, "loss": 0.4809, "step": 72, "teacher_loss": 0.4859127402305603 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.3354983329772949, "learning_rate": 3.1661124765071564e-07, "loss": 0.3284, "step": 73, "teacher_loss": 0.32761678099632263 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.4563247561454773, "learning_rate": 3.209483880294926e-07, "loss": 0.4131, "step": 74, "teacher_loss": 0.4083379805088043 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.34043627977371216, "learning_rate": 3.2528552840826946e-07, "loss": 0.2446, "step": 75, "teacher_loss": 0.23390792310237885 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.5458319187164307, "learning_rate": 3.296226687870464e-07, "loss": 0.4366, "step": 76, "teacher_loss": 0.42451536655426025 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.6412752866744995, "learning_rate": 3.339598091658234e-07, "loss": 0.4724, "step": 77, "teacher_loss": 0.4536617398262024 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.5172377228736877, "learning_rate": 3.3829694954460026e-07, "loss": 0.4266, "step": 78, "teacher_loss": 0.4165247082710266 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.39089351892471313, "learning_rate": 3.426340899233772e-07, "loss": 0.3328, "step": 79, "teacher_loss": 0.3263006806373596 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.43537408113479614, "learning_rate": 3.4697123030215413e-07, "loss": 0.3144, "step": 80, "teacher_loss": 0.3009788990020752 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 0.6011801958084106, "learning_rate": 3.5130837068093106e-07, "loss": 0.4713, "step": 81, "teacher_loss": 0.45690327882766724 }, { "compression_loss": 0.0, "epoch": 0.01, "label_loss": 1.076367735862732, "learning_rate": 3.5564551105970794e-07, "loss": 0.5857, "step": 82, "teacher_loss": 0.5311605930328369 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.37369078397750854, "learning_rate": 3.599826514384849e-07, "loss": 0.35, "step": 83, "teacher_loss": 0.34740591049194336 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.3828126788139343, "learning_rate": 3.643197918172618e-07, "loss": 0.5781, "step": 84, "teacher_loss": 0.5998474359512329 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.5417852997779846, "learning_rate": 3.686569321960388e-07, "loss": 0.2808, "step": 85, "teacher_loss": 0.2518404424190521 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.3188953101634979, "learning_rate": 3.729940725748157e-07, "loss": 0.4551, "step": 86, "teacher_loss": 0.47025638818740845 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.27739542722702026, "learning_rate": 3.773312129535926e-07, "loss": 0.4346, "step": 87, "teacher_loss": 0.45206791162490845 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.8092740774154663, "learning_rate": 3.8166835333236954e-07, "loss": 0.5619, "step": 88, "teacher_loss": 0.5344049334526062 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.5842029452323914, "learning_rate": 3.860054937111465e-07, "loss": 0.4171, "step": 89, "teacher_loss": 0.39858120679855347 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.37819138169288635, "learning_rate": 3.9034263408992336e-07, "loss": 0.2797, "step": 90, "teacher_loss": 0.2687514126300812 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.6116352081298828, "learning_rate": 3.946797744687003e-07, "loss": 0.4149, "step": 91, "teacher_loss": 0.393058180809021 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.24421319365501404, "learning_rate": 3.9901691484747723e-07, "loss": 0.2807, "step": 92, "teacher_loss": 0.28480595350265503 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.19853071868419647, "learning_rate": 4.033540552262542e-07, "loss": 0.225, "step": 93, "teacher_loss": 0.2279570996761322 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.8776455521583557, "learning_rate": 4.076911956050311e-07, "loss": 0.3779, "step": 94, "teacher_loss": 0.32237839698791504 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.3355174958705902, "learning_rate": 4.1202833598380803e-07, "loss": 0.3764, "step": 95, "teacher_loss": 0.38089990615844727 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.7735798358917236, "learning_rate": 4.1636547636258496e-07, "loss": 0.44, "step": 96, "teacher_loss": 0.40291130542755127 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.33850619196891785, "learning_rate": 4.2070261674136184e-07, "loss": 0.2829, "step": 97, "teacher_loss": 0.27667152881622314 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.31478065252304077, "learning_rate": 4.250397571201388e-07, "loss": 0.2783, "step": 98, "teacher_loss": 0.27422410249710083 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.3673118054866791, "learning_rate": 4.293768974989157e-07, "loss": 0.2723, "step": 99, "teacher_loss": 0.2617168724536896 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.37465184926986694, "learning_rate": 4.3371403787769265e-07, "loss": 0.2707, "step": 100, "teacher_loss": 0.25916990637779236 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.3754885196685791, "learning_rate": 4.380511782564696e-07, "loss": 0.2643, "step": 101, "teacher_loss": 0.2519530951976776 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.4311053454875946, "learning_rate": 4.423883186352465e-07, "loss": 0.3775, "step": 102, "teacher_loss": 0.37158575654029846 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.35080692172050476, "learning_rate": 4.4672545901402345e-07, "loss": 0.3724, "step": 103, "teacher_loss": 0.3748095631599426 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.24182593822479248, "learning_rate": 4.510625993928004e-07, "loss": 0.2525, "step": 104, "teacher_loss": 0.25370627641677856 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.34194818139076233, "learning_rate": 4.5539973977157726e-07, "loss": 0.23, "step": 105, "teacher_loss": 0.21761192381381989 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.24426543712615967, "learning_rate": 4.597368801503542e-07, "loss": 0.4005, "step": 106, "teacher_loss": 0.4178938865661621 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.27553167939186096, "learning_rate": 4.6407402052913113e-07, "loss": 0.2505, "step": 107, "teacher_loss": 0.24769991636276245 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 1.2734107971191406, "learning_rate": 4.6841116090790806e-07, "loss": 0.4416, "step": 108, "teacher_loss": 0.34919288754463196 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.37502509355545044, "learning_rate": 4.72748301286685e-07, "loss": 0.3259, "step": 109, "teacher_loss": 0.32042086124420166 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.6901859045028687, "learning_rate": 4.77085441665462e-07, "loss": 0.3745, "step": 110, "teacher_loss": 0.33943992853164673 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.6412104964256287, "learning_rate": 4.814225820442389e-07, "loss": 0.4131, "step": 111, "teacher_loss": 0.38770169019699097 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.2801129221916199, "learning_rate": 4.857597224230157e-07, "loss": 0.3256, "step": 112, "teacher_loss": 0.33063459396362305 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.1641349494457245, "learning_rate": 4.900968628017927e-07, "loss": 0.2595, "step": 113, "teacher_loss": 0.27009546756744385 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.25451192259788513, "learning_rate": 4.944340031805696e-07, "loss": 0.2682, "step": 114, "teacher_loss": 0.26969805359840393 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.8173410892486572, "learning_rate": 4.987711435593465e-07, "loss": 0.4288, "step": 115, "teacher_loss": 0.3856676518917084 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.30786746740341187, "learning_rate": 5.031082839381235e-07, "loss": 0.1893, "step": 116, "teacher_loss": 0.17607331275939941 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.23670032620429993, "learning_rate": 5.074454243169004e-07, "loss": 0.364, "step": 117, "teacher_loss": 0.37815040349960327 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.5879061222076416, "learning_rate": 5.117825646956772e-07, "loss": 0.687, "step": 118, "teacher_loss": 0.6979761719703674 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.3606983721256256, "learning_rate": 5.161197050744542e-07, "loss": 0.3899, "step": 119, "teacher_loss": 0.3930942416191101 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.27582406997680664, "learning_rate": 5.204568454532311e-07, "loss": 0.2923, "step": 120, "teacher_loss": 0.2941736578941345 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.2806050181388855, "learning_rate": 5.247939858320082e-07, "loss": 0.2927, "step": 121, "teacher_loss": 0.2940514087677002 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.40702688694000244, "learning_rate": 5.291311262107851e-07, "loss": 0.3119, "step": 122, "teacher_loss": 0.30137813091278076 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.3387115001678467, "learning_rate": 5.33468266589562e-07, "loss": 0.27, "step": 123, "teacher_loss": 0.262349396944046 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.3801390826702118, "learning_rate": 5.37805406968339e-07, "loss": 0.4172, "step": 124, "teacher_loss": 0.4213544428348541 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.6031049489974976, "learning_rate": 5.421425473471158e-07, "loss": 0.366, "step": 125, "teacher_loss": 0.33967217803001404 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.26995834708213806, "learning_rate": 5.464796877258927e-07, "loss": 0.3053, "step": 126, "teacher_loss": 0.3091879189014435 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.2996470332145691, "learning_rate": 5.508168281046697e-07, "loss": 0.3781, "step": 127, "teacher_loss": 0.3867761194705963 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.6594407558441162, "learning_rate": 5.551539684834466e-07, "loss": 0.2807, "step": 128, "teacher_loss": 0.23862676322460175 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.3946491479873657, "learning_rate": 5.594911088622236e-07, "loss": 0.3163, "step": 129, "teacher_loss": 0.30759650468826294 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.23987199366092682, "learning_rate": 5.638282492410004e-07, "loss": 0.1924, "step": 130, "teacher_loss": 0.18717840313911438 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.32025086879730225, "learning_rate": 5.681653896197773e-07, "loss": 0.3402, "step": 131, "teacher_loss": 0.34237128496170044 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.7483360767364502, "learning_rate": 5.725025299985543e-07, "loss": 0.6402, "step": 132, "teacher_loss": 0.6281335353851318 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.6435651779174805, "learning_rate": 5.768396703773312e-07, "loss": 0.6577, "step": 133, "teacher_loss": 0.6593012809753418 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.2137606143951416, "learning_rate": 5.811768107561081e-07, "loss": 0.4271, "step": 134, "teacher_loss": 0.45077282190322876 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.38604655861854553, "learning_rate": 5.855139511348851e-07, "loss": 0.5313, "step": 135, "teacher_loss": 0.5474059581756592 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.5229781270027161, "learning_rate": 5.898510915136619e-07, "loss": 0.4228, "step": 136, "teacher_loss": 0.4116794168949127 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.6987260580062866, "learning_rate": 5.941882318924389e-07, "loss": 0.4832, "step": 137, "teacher_loss": 0.4592827558517456 }, { "compression_loss": 0.0, "epoch": 0.02, "label_loss": 0.47588491439819336, "learning_rate": 5.985253722712159e-07, "loss": 0.4062, "step": 138, "teacher_loss": 0.39845460653305054 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.2496635913848877, "learning_rate": 6.028625126499928e-07, "loss": 0.2436, "step": 139, "teacher_loss": 0.24296367168426514 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.44500523805618286, "learning_rate": 6.071996530287698e-07, "loss": 0.3509, "step": 140, "teacher_loss": 0.34049880504608154 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.7203101515769958, "learning_rate": 6.115367934075467e-07, "loss": 0.5543, "step": 141, "teacher_loss": 0.5358462333679199 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.7729004621505737, "learning_rate": 6.158739337863235e-07, "loss": 0.4685, "step": 142, "teacher_loss": 0.43462610244750977 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.5708900094032288, "learning_rate": 6.202110741651005e-07, "loss": 0.2883, "step": 143, "teacher_loss": 0.25688982009887695 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.46314001083374023, "learning_rate": 6.245482145438774e-07, "loss": 0.3364, "step": 144, "teacher_loss": 0.32227838039398193 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.5948927998542786, "learning_rate": 6.288853549226543e-07, "loss": 0.3916, "step": 145, "teacher_loss": 0.36899033188819885 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.31737908720970154, "learning_rate": 6.332224953014313e-07, "loss": 0.2703, "step": 146, "teacher_loss": 0.2650870382785797 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.4784914255142212, "learning_rate": 6.375596356802082e-07, "loss": 0.4235, "step": 147, "teacher_loss": 0.41739553213119507 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.2840213179588318, "learning_rate": 6.418967760589852e-07, "loss": 0.3489, "step": 148, "teacher_loss": 0.3561570644378662 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.6581293344497681, "learning_rate": 6.46233916437762e-07, "loss": 0.4675, "step": 149, "teacher_loss": 0.4463242292404175 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.31044745445251465, "learning_rate": 6.505710568165389e-07, "loss": 0.3008, "step": 150, "teacher_loss": 0.29976487159729004 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.39795494079589844, "learning_rate": 6.549081971953159e-07, "loss": 0.2962, "step": 151, "teacher_loss": 0.2849386930465698 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.18419547379016876, "learning_rate": 6.592453375740928e-07, "loss": 0.1948, "step": 152, "teacher_loss": 0.19600102305412292 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.4073745012283325, "learning_rate": 6.635824779528698e-07, "loss": 0.332, "step": 153, "teacher_loss": 0.3236081600189209 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.23148149251937866, "learning_rate": 6.679196183316468e-07, "loss": 0.2851, "step": 154, "teacher_loss": 0.29110246896743774 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.4480724334716797, "learning_rate": 6.722567587104236e-07, "loss": 0.3282, "step": 155, "teacher_loss": 0.31482595205307007 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.3575660288333893, "learning_rate": 6.765938990892005e-07, "loss": 0.2659, "step": 156, "teacher_loss": 0.25568193197250366 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.45173072814941406, "learning_rate": 6.809310394679775e-07, "loss": 0.2062, "step": 157, "teacher_loss": 0.17894917726516724 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.6072745323181152, "learning_rate": 6.852681798467544e-07, "loss": 0.4497, "step": 158, "teacher_loss": 0.43217384815216064 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.35858872532844543, "learning_rate": 6.896053202255314e-07, "loss": 0.3444, "step": 159, "teacher_loss": 0.34277233481407166 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.6806064248085022, "learning_rate": 6.939424606043083e-07, "loss": 0.3469, "step": 160, "teacher_loss": 0.30976685881614685 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.46310800313949585, "learning_rate": 6.982796009830851e-07, "loss": 0.4423, "step": 161, "teacher_loss": 0.43999171257019043 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.5555820465087891, "learning_rate": 7.026167413618621e-07, "loss": 0.2843, "step": 162, "teacher_loss": 0.25418922305107117 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.3326151967048645, "learning_rate": 7.06953881740639e-07, "loss": 0.3325, "step": 163, "teacher_loss": 0.3324737250804901 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.1965804100036621, "learning_rate": 7.112910221194159e-07, "loss": 0.2458, "step": 164, "teacher_loss": 0.2512645423412323 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.289686381816864, "learning_rate": 7.156281624981929e-07, "loss": 0.2338, "step": 165, "teacher_loss": 0.22755871713161469 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.6694118976593018, "learning_rate": 7.199653028769698e-07, "loss": 0.3307, "step": 166, "teacher_loss": 0.29301539063453674 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.5715506672859192, "learning_rate": 7.243024432557467e-07, "loss": 0.449, "step": 167, "teacher_loss": 0.43539315462112427 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.49966850876808167, "learning_rate": 7.286395836345236e-07, "loss": 0.3159, "step": 168, "teacher_loss": 0.29553017020225525 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.4972558617591858, "learning_rate": 7.329767240133006e-07, "loss": 0.3065, "step": 169, "teacher_loss": 0.28530749678611755 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.5740405321121216, "learning_rate": 7.373138643920776e-07, "loss": 0.2995, "step": 170, "teacher_loss": 0.26899081468582153 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.5747886300086975, "learning_rate": 7.416510047708545e-07, "loss": 0.2857, "step": 171, "teacher_loss": 0.2536314129829407 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.6649643182754517, "learning_rate": 7.459881451496314e-07, "loss": 0.3111, "step": 172, "teacher_loss": 0.27174609899520874 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.3147515058517456, "learning_rate": 7.503252855284083e-07, "loss": 0.3875, "step": 173, "teacher_loss": 0.395530641078949 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.1841243952512741, "learning_rate": 7.546624259071852e-07, "loss": 0.3245, "step": 174, "teacher_loss": 0.3400971293449402 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.6122177839279175, "learning_rate": 7.589995662859621e-07, "loss": 0.4994, "step": 175, "teacher_loss": 0.4869192838668823 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.2748439610004425, "learning_rate": 7.633367066647391e-07, "loss": 0.2897, "step": 176, "teacher_loss": 0.2912960648536682 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.804057240486145, "learning_rate": 7.67673847043516e-07, "loss": 0.4528, "step": 177, "teacher_loss": 0.4138133227825165 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.6062012910842896, "learning_rate": 7.72010987422293e-07, "loss": 0.4437, "step": 178, "teacher_loss": 0.4256832003593445 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.3464525640010834, "learning_rate": 7.763481278010698e-07, "loss": 0.3721, "step": 179, "teacher_loss": 0.3749992251396179 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.9590786695480347, "learning_rate": 7.806852681798467e-07, "loss": 0.3323, "step": 180, "teacher_loss": 0.2627091407775879 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.7242481708526611, "learning_rate": 7.850224085586237e-07, "loss": 0.355, "step": 181, "teacher_loss": 0.3139296770095825 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.7277974486351013, "learning_rate": 7.893595489374006e-07, "loss": 0.3634, "step": 182, "teacher_loss": 0.32291775941848755 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.29894939064979553, "learning_rate": 7.936966893161775e-07, "loss": 0.2436, "step": 183, "teacher_loss": 0.2374768853187561 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.31897616386413574, "learning_rate": 7.980338296949545e-07, "loss": 0.2987, "step": 184, "teacher_loss": 0.2964322566986084 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.4672470986843109, "learning_rate": 8.023709700737314e-07, "loss": 0.3647, "step": 185, "teacher_loss": 0.3532944321632385 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.6552913188934326, "learning_rate": 8.067081104525084e-07, "loss": 0.4423, "step": 186, "teacher_loss": 0.4186667203903198 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.4703465700149536, "learning_rate": 8.110452508312853e-07, "loss": 0.2905, "step": 187, "teacher_loss": 0.2704945504665375 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.36512064933776855, "learning_rate": 8.153823912100622e-07, "loss": 0.2582, "step": 188, "teacher_loss": 0.24634620547294617 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.5008628368377686, "learning_rate": 8.197195315888392e-07, "loss": 0.287, "step": 189, "teacher_loss": 0.2632533311843872 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.2761531472206116, "learning_rate": 8.240566719676161e-07, "loss": 0.3818, "step": 190, "teacher_loss": 0.39357393980026245 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.4236161410808563, "learning_rate": 8.283938123463929e-07, "loss": 0.3699, "step": 191, "teacher_loss": 0.36391544342041016 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.38792508840560913, "learning_rate": 8.327309527251699e-07, "loss": 0.3698, "step": 192, "teacher_loss": 0.3677966594696045 }, { "compression_loss": 0.0, "epoch": 0.03, "label_loss": 0.7151434421539307, "learning_rate": 8.370680931039468e-07, "loss": 0.3354, "step": 193, "teacher_loss": 0.2932465076446533 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.29336994886398315, "learning_rate": 8.414052334827237e-07, "loss": 0.3233, "step": 194, "teacher_loss": 0.3265939950942993 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.39612430334091187, "learning_rate": 8.457423738615007e-07, "loss": 0.3076, "step": 195, "teacher_loss": 0.29779279232025146 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.3977528512477875, "learning_rate": 8.500795142402776e-07, "loss": 0.253, "step": 196, "teacher_loss": 0.23690246045589447 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.3796065151691437, "learning_rate": 8.544166546190545e-07, "loss": 0.3437, "step": 197, "teacher_loss": 0.33966565132141113 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.717301607131958, "learning_rate": 8.587537949978314e-07, "loss": 0.5056, "step": 198, "teacher_loss": 0.48209643363952637 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.6161582469940186, "learning_rate": 8.630909353766083e-07, "loss": 0.4483, "step": 199, "teacher_loss": 0.4296002984046936 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.6727553606033325, "learning_rate": 8.674280757553853e-07, "loss": 0.3356, "step": 200, "teacher_loss": 0.2981756329536438 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.5859873294830322, "learning_rate": 8.717652161341623e-07, "loss": 0.3323, "step": 201, "teacher_loss": 0.30413758754730225 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.6775317788124084, "learning_rate": 8.761023565129392e-07, "loss": 0.3207, "step": 202, "teacher_loss": 0.28099697828292847 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.687187910079956, "learning_rate": 8.804394968917161e-07, "loss": 0.3559, "step": 203, "teacher_loss": 0.31906983256340027 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.5126361846923828, "learning_rate": 8.84776637270493e-07, "loss": 0.2829, "step": 204, "teacher_loss": 0.2573986053466797 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.23460006713867188, "learning_rate": 8.8911377764927e-07, "loss": 0.2505, "step": 205, "teacher_loss": 0.2523079514503479 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.4875229597091675, "learning_rate": 8.934509180280469e-07, "loss": 0.5581, "step": 206, "teacher_loss": 0.5659719109535217 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.3942033648490906, "learning_rate": 8.977880584068238e-07, "loss": 0.3104, "step": 207, "teacher_loss": 0.30104804039001465 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.4788144826889038, "learning_rate": 9.021251987856008e-07, "loss": 0.2747, "step": 208, "teacher_loss": 0.2520306408405304 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.5390474200248718, "learning_rate": 9.064623391643776e-07, "loss": 0.2919, "step": 209, "teacher_loss": 0.2644907236099243 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.47657227516174316, "learning_rate": 9.107994795431545e-07, "loss": 0.284, "step": 210, "teacher_loss": 0.26265081763267517 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.17647041380405426, "learning_rate": 9.151366199219315e-07, "loss": 0.3022, "step": 211, "teacher_loss": 0.3162067234516144 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.4461628794670105, "learning_rate": 9.194737603007084e-07, "loss": 0.5094, "step": 212, "teacher_loss": 0.516472339630127 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.21757963299751282, "learning_rate": 9.238109006794853e-07, "loss": 0.3133, "step": 213, "teacher_loss": 0.32392221689224243 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.1596691757440567, "learning_rate": 9.281480410582623e-07, "loss": 0.2204, "step": 214, "teacher_loss": 0.22716379165649414 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.3218694031238556, "learning_rate": 9.324851814370391e-07, "loss": 0.2393, "step": 215, "teacher_loss": 0.23013153672218323 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.3074747920036316, "learning_rate": 9.368223218158161e-07, "loss": 0.2936, "step": 216, "teacher_loss": 0.292053759098053 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.2528434991836548, "learning_rate": 9.411594621945931e-07, "loss": 0.3194, "step": 217, "teacher_loss": 0.32680851221084595 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.6210227608680725, "learning_rate": 9.4549660257337e-07, "loss": 0.2997, "step": 218, "teacher_loss": 0.2639586925506592 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.5951622724533081, "learning_rate": 9.498337429521469e-07, "loss": 0.3087, "step": 219, "teacher_loss": 0.2769015431404114 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.3769147992134094, "learning_rate": 9.54170883330924e-07, "loss": 0.4781, "step": 220, "teacher_loss": 0.4893210828304291 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.6473550200462341, "learning_rate": 9.585080237097007e-07, "loss": 0.3045, "step": 221, "teacher_loss": 0.2664129436016083 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.32416659593582153, "learning_rate": 9.628451640884777e-07, "loss": 0.3289, "step": 222, "teacher_loss": 0.3293718695640564 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.9988908767700195, "learning_rate": 9.671823044672545e-07, "loss": 0.3651, "step": 223, "teacher_loss": 0.2947273850440979 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.39788341522216797, "learning_rate": 9.715194448460315e-07, "loss": 0.2987, "step": 224, "teacher_loss": 0.2877279818058014 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.9215387105941772, "learning_rate": 9.758565852248085e-07, "loss": 0.4385, "step": 225, "teacher_loss": 0.38485661149024963 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.4114275574684143, "learning_rate": 9.801937256035855e-07, "loss": 0.4904, "step": 226, "teacher_loss": 0.49916720390319824 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.6249231696128845, "learning_rate": 9.845308659823625e-07, "loss": 0.1951, "step": 227, "teacher_loss": 0.14731940627098083 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.4257274568080902, "learning_rate": 9.888680063611392e-07, "loss": 0.2733, "step": 228, "teacher_loss": 0.2563665211200714 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.3597075939178467, "learning_rate": 9.932051467399162e-07, "loss": 0.6172, "step": 229, "teacher_loss": 0.6458524465560913 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.18291598558425903, "learning_rate": 9.97542287118693e-07, "loss": 0.2182, "step": 230, "teacher_loss": 0.2221117615699768 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.23957034945487976, "learning_rate": 1.0018794274974702e-06, "loss": 0.4058, "step": 231, "teacher_loss": 0.42427605390548706 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.34674763679504395, "learning_rate": 1.006216567876247e-06, "loss": 0.3248, "step": 232, "teacher_loss": 0.3223832845687866 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.392194926738739, "learning_rate": 1.010553708255024e-06, "loss": 0.2606, "step": 233, "teacher_loss": 0.24595263600349426 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.3266582489013672, "learning_rate": 1.0148908486338007e-06, "loss": 0.2618, "step": 234, "teacher_loss": 0.2546347677707672 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.23488172888755798, "learning_rate": 1.0192279890125777e-06, "loss": 0.2398, "step": 235, "teacher_loss": 0.24030104279518127 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.31779786944389343, "learning_rate": 1.0235651293913545e-06, "loss": 0.2895, "step": 236, "teacher_loss": 0.28630515933036804 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.7872130870819092, "learning_rate": 1.0279022697701317e-06, "loss": 0.3855, "step": 237, "teacher_loss": 0.3409123420715332 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.7485690116882324, "learning_rate": 1.0322394101489085e-06, "loss": 0.3265, "step": 238, "teacher_loss": 0.2796207368373871 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.3279935121536255, "learning_rate": 1.0365765505276854e-06, "loss": 0.2793, "step": 239, "teacher_loss": 0.2738860845565796 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.3581310510635376, "learning_rate": 1.0409136909064622e-06, "loss": 0.2538, "step": 240, "teacher_loss": 0.24225902557373047 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.43405529856681824, "learning_rate": 1.0452508312852392e-06, "loss": 0.319, "step": 241, "teacher_loss": 0.30625149607658386 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.47915583848953247, "learning_rate": 1.0495879716640164e-06, "loss": 0.3991, "step": 242, "teacher_loss": 0.3902336359024048 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.1838829219341278, "learning_rate": 1.0539251120427932e-06, "loss": 0.2737, "step": 243, "teacher_loss": 0.28363755345344543 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.08305107057094574, "learning_rate": 1.0582622524215702e-06, "loss": 0.2508, "step": 244, "teacher_loss": 0.26947861909866333 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.273042231798172, "learning_rate": 1.062599392800347e-06, "loss": 0.3815, "step": 245, "teacher_loss": 0.39350050687789917 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.46982133388519287, "learning_rate": 1.066936533179124e-06, "loss": 0.2989, "step": 246, "teacher_loss": 0.279954731464386 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.7464994192123413, "learning_rate": 1.0712736735579007e-06, "loss": 0.3518, "step": 247, "teacher_loss": 0.30791768431663513 }, { "compression_loss": 0.0, "epoch": 0.04, "label_loss": 0.1897316426038742, "learning_rate": 1.075610813936678e-06, "loss": 0.2672, "step": 248, "teacher_loss": 0.27575311064720154 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.35607877373695374, "learning_rate": 1.0799479543154547e-06, "loss": 0.2801, "step": 249, "teacher_loss": 0.2716768980026245 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.33084583282470703, "learning_rate": 1.0842850946942317e-06, "loss": 0.2449, "step": 250, "teacher_loss": 0.23533692955970764 }, { "epoch": 0.05, "eval_exact_match": 79.57426679280984, "eval_f1": 87.09971198800199, "step": 250 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.3847602307796478, "learning_rate": 1.0886222350730084e-06, "loss": 0.3639, "step": 251, "teacher_loss": 0.36158716678619385 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.3344457745552063, "learning_rate": 1.0929593754517854e-06, "loss": 0.1738, "step": 252, "teacher_loss": 0.15597465634346008 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.19527855515480042, "learning_rate": 1.0972965158305624e-06, "loss": 0.2675, "step": 253, "teacher_loss": 0.27551722526550293 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.4267202317714691, "learning_rate": 1.1016336562093394e-06, "loss": 0.258, "step": 254, "teacher_loss": 0.23925891518592834 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 1.2726855278015137, "learning_rate": 1.1059707965881162e-06, "loss": 0.5782, "step": 255, "teacher_loss": 0.5010079145431519 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.38246625661849976, "learning_rate": 1.1103079369668932e-06, "loss": 0.3419, "step": 256, "teacher_loss": 0.33744117617607117 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.46521979570388794, "learning_rate": 1.1146450773456701e-06, "loss": 0.2633, "step": 257, "teacher_loss": 0.2408798187971115 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.3050382733345032, "learning_rate": 1.1189822177244471e-06, "loss": 0.3395, "step": 258, "teacher_loss": 0.34336796402931213 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.45095938444137573, "learning_rate": 1.1233193581032241e-06, "loss": 0.3171, "step": 259, "teacher_loss": 0.3022615313529968 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.4104680120944977, "learning_rate": 1.127656498482001e-06, "loss": 0.3668, "step": 260, "teacher_loss": 0.36197105050086975 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.5469045042991638, "learning_rate": 1.1319936388607779e-06, "loss": 0.3028, "step": 261, "teacher_loss": 0.2756377160549164 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.3915804922580719, "learning_rate": 1.1363307792395547e-06, "loss": 0.2776, "step": 262, "teacher_loss": 0.2649552524089813 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.22109833359718323, "learning_rate": 1.1406679196183316e-06, "loss": 0.2476, "step": 263, "teacher_loss": 0.2505549192428589 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.4426504671573639, "learning_rate": 1.1450050599971086e-06, "loss": 0.4039, "step": 264, "teacher_loss": 0.3996136784553528 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.5688486099243164, "learning_rate": 1.1493422003758856e-06, "loss": 0.3441, "step": 265, "teacher_loss": 0.3190777003765106 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.5123586058616638, "learning_rate": 1.1536793407546624e-06, "loss": 0.3118, "step": 266, "teacher_loss": 0.2895471155643463 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.6764093637466431, "learning_rate": 1.1580164811334394e-06, "loss": 0.4376, "step": 267, "teacher_loss": 0.41103094816207886 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.578113317489624, "learning_rate": 1.1623536215122162e-06, "loss": 0.5465, "step": 268, "teacher_loss": 0.5429891347885132 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.9408819079399109, "learning_rate": 1.1666907618909934e-06, "loss": 0.3645, "step": 269, "teacher_loss": 0.3004879653453827 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.3857704997062683, "learning_rate": 1.1710279022697701e-06, "loss": 0.1969, "step": 270, "teacher_loss": 0.17585934698581696 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.5578367710113525, "learning_rate": 1.1753650426485471e-06, "loss": 0.288, "step": 271, "teacher_loss": 0.25804078578948975 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.3610136806964874, "learning_rate": 1.1797021830273239e-06, "loss": 0.2635, "step": 272, "teacher_loss": 0.25263839960098267 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.4490371644496918, "learning_rate": 1.1840393234061009e-06, "loss": 0.2865, "step": 273, "teacher_loss": 0.26838675141334534 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.18405231833457947, "learning_rate": 1.1883764637848779e-06, "loss": 0.188, "step": 274, "teacher_loss": 0.18840843439102173 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.3971933126449585, "learning_rate": 1.1927136041636549e-06, "loss": 0.3303, "step": 275, "teacher_loss": 0.3228839337825775 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.43243539333343506, "learning_rate": 1.1970507445424318e-06, "loss": 0.2265, "step": 276, "teacher_loss": 0.20363551378250122 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.18577873706817627, "learning_rate": 1.2013878849212086e-06, "loss": 0.1833, "step": 277, "teacher_loss": 0.18302816152572632 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.48472264409065247, "learning_rate": 1.2057250252999856e-06, "loss": 0.2076, "step": 278, "teacher_loss": 0.1768307089805603 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.8165194988250732, "learning_rate": 1.2100621656787624e-06, "loss": 0.498, "step": 279, "teacher_loss": 0.46261414885520935 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.5283606052398682, "learning_rate": 1.2143993060575396e-06, "loss": 0.321, "step": 280, "teacher_loss": 0.29796096682548523 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.43578922748565674, "learning_rate": 1.2187364464363164e-06, "loss": 0.2935, "step": 281, "teacher_loss": 0.27770906686782837 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.21357658505439758, "learning_rate": 1.2230735868150933e-06, "loss": 0.225, "step": 282, "teacher_loss": 0.22626854479312897 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.38164523243904114, "learning_rate": 1.2274107271938701e-06, "loss": 0.3308, "step": 283, "teacher_loss": 0.32512742280960083 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.4603731036186218, "learning_rate": 1.231747867572647e-06, "loss": 0.3014, "step": 284, "teacher_loss": 0.2837747037410736 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.4667670726776123, "learning_rate": 1.2360850079514239e-06, "loss": 0.2557, "step": 285, "teacher_loss": 0.23228047788143158 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.654017448425293, "learning_rate": 1.240422148330201e-06, "loss": 0.7453, "step": 286, "teacher_loss": 0.7554295063018799 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.6986708641052246, "learning_rate": 1.2447592887089778e-06, "loss": 0.2876, "step": 287, "teacher_loss": 0.24195826053619385 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.3751755356788635, "learning_rate": 1.2490964290877548e-06, "loss": 0.2488, "step": 288, "teacher_loss": 0.23473919928073883 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.4674665629863739, "learning_rate": 1.2534335694665318e-06, "loss": 0.3623, "step": 289, "teacher_loss": 0.3506540060043335 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.9339957237243652, "learning_rate": 1.2577707098453086e-06, "loss": 0.447, "step": 290, "teacher_loss": 0.39291030168533325 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.32550048828125, "learning_rate": 1.2621078502240858e-06, "loss": 0.2291, "step": 291, "teacher_loss": 0.21833573281764984 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.2830648720264435, "learning_rate": 1.2664449906028626e-06, "loss": 0.26, "step": 292, "teacher_loss": 0.257387638092041 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.5135868191719055, "learning_rate": 1.2707821309816396e-06, "loss": 0.311, "step": 293, "teacher_loss": 0.28852254152297974 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.6661826968193054, "learning_rate": 1.2751192713604163e-06, "loss": 0.3246, "step": 294, "teacher_loss": 0.28665751218795776 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.10539337992668152, "learning_rate": 1.2794564117391933e-06, "loss": 0.2193, "step": 295, "teacher_loss": 0.23196059465408325 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.4741356372833252, "learning_rate": 1.2837935521179703e-06, "loss": 0.3949, "step": 296, "teacher_loss": 0.386050820350647 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.19788937270641327, "learning_rate": 1.2881306924967473e-06, "loss": 0.2118, "step": 297, "teacher_loss": 0.21335843205451965 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.2720310688018799, "learning_rate": 1.292467832875524e-06, "loss": 0.3443, "step": 298, "teacher_loss": 0.35228031873703003 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.5976897478103638, "learning_rate": 1.296804973254301e-06, "loss": 0.5105, "step": 299, "teacher_loss": 0.5007622241973877 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.29341644048690796, "learning_rate": 1.3011421136330778e-06, "loss": 0.2938, "step": 300, "teacher_loss": 0.2938333749771118 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.3075222373008728, "learning_rate": 1.3054792540118548e-06, "loss": 0.2691, "step": 301, "teacher_loss": 0.2647859454154968 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.49865618348121643, "learning_rate": 1.3098163943906318e-06, "loss": 0.2604, "step": 302, "teacher_loss": 0.23394174873828888 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.48242807388305664, "learning_rate": 1.3141535347694088e-06, "loss": 0.3184, "step": 303, "teacher_loss": 0.300139844417572 }, { "compression_loss": 0.0, "epoch": 0.05, "label_loss": 0.15943261981010437, "learning_rate": 1.3184906751481856e-06, "loss": 0.2188, "step": 304, "teacher_loss": 0.22538039088249207 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.6063594818115234, "learning_rate": 1.3228278155269626e-06, "loss": 0.3055, "step": 305, "teacher_loss": 0.2721177339553833 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.36577171087265015, "learning_rate": 1.3271649559057395e-06, "loss": 0.2231, "step": 306, "teacher_loss": 0.20729850232601166 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.6283776164054871, "learning_rate": 1.3315020962845165e-06, "loss": 0.3843, "step": 307, "teacher_loss": 0.35716527700424194 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.5405994653701782, "learning_rate": 1.3358392366632935e-06, "loss": 0.3138, "step": 308, "teacher_loss": 0.2886366844177246 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.5965635776519775, "learning_rate": 1.3401763770420703e-06, "loss": 0.2536, "step": 309, "teacher_loss": 0.2154558002948761 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.5021859407424927, "learning_rate": 1.3445135174208473e-06, "loss": 0.3334, "step": 310, "teacher_loss": 0.31462663412094116 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.4530951976776123, "learning_rate": 1.348850657799624e-06, "loss": 0.3532, "step": 311, "teacher_loss": 0.3420509397983551 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.6128342151641846, "learning_rate": 1.353187798178401e-06, "loss": 0.3002, "step": 312, "teacher_loss": 0.26549673080444336 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.6090314388275146, "learning_rate": 1.357524938557178e-06, "loss": 0.3407, "step": 313, "teacher_loss": 0.31087803840637207 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.22318555414676666, "learning_rate": 1.361862078935955e-06, "loss": 0.2475, "step": 314, "teacher_loss": 0.25018370151519775 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.24272391200065613, "learning_rate": 1.3661992193147318e-06, "loss": 0.241, "step": 315, "teacher_loss": 0.2408469021320343 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.6072691679000854, "learning_rate": 1.3705363596935088e-06, "loss": 0.3729, "step": 316, "teacher_loss": 0.3468794524669647 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.179803729057312, "learning_rate": 1.3748735000722855e-06, "loss": 0.1928, "step": 317, "teacher_loss": 0.19420017302036285 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.8709033727645874, "learning_rate": 1.3792106404510627e-06, "loss": 0.3194, "step": 318, "teacher_loss": 0.2581326961517334 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.24580241739749908, "learning_rate": 1.3835477808298395e-06, "loss": 0.2602, "step": 319, "teacher_loss": 0.2618195712566376 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.42237046360969543, "learning_rate": 1.3878849212086165e-06, "loss": 0.6006, "step": 320, "teacher_loss": 0.6204584836959839 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.7018339037895203, "learning_rate": 1.3922220615873935e-06, "loss": 0.321, "step": 321, "teacher_loss": 0.2787395119667053 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.2072555273771286, "learning_rate": 1.3965592019661703e-06, "loss": 0.2243, "step": 322, "teacher_loss": 0.2262054681777954 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.49878421425819397, "learning_rate": 1.4008963423449473e-06, "loss": 0.3428, "step": 323, "teacher_loss": 0.32551002502441406 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.49750423431396484, "learning_rate": 1.4052334827237242e-06, "loss": 0.2733, "step": 324, "teacher_loss": 0.2483411729335785 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.47724393010139465, "learning_rate": 1.4095706231025012e-06, "loss": 0.3038, "step": 325, "teacher_loss": 0.2844970226287842 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.2629703879356384, "learning_rate": 1.413907763481278e-06, "loss": 0.2195, "step": 326, "teacher_loss": 0.21464210748672485 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.4069712162017822, "learning_rate": 1.418244903860055e-06, "loss": 0.2487, "step": 327, "teacher_loss": 0.231143057346344 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.3577013611793518, "learning_rate": 1.4225820442388318e-06, "loss": 0.2773, "step": 328, "teacher_loss": 0.26837706565856934 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.2630511522293091, "learning_rate": 1.426919184617609e-06, "loss": 0.3246, "step": 329, "teacher_loss": 0.3313871920108795 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.3583536446094513, "learning_rate": 1.4312563249963857e-06, "loss": 0.2763, "step": 330, "teacher_loss": 0.26717931032180786 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.6040447354316711, "learning_rate": 1.4355934653751627e-06, "loss": 0.3525, "step": 331, "teacher_loss": 0.324535071849823 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.3912610411643982, "learning_rate": 1.4399306057539395e-06, "loss": 0.3091, "step": 332, "teacher_loss": 0.2999173402786255 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.217939555644989, "learning_rate": 1.4442677461327165e-06, "loss": 0.2451, "step": 333, "teacher_loss": 0.24812567234039307 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.5532275438308716, "learning_rate": 1.4486048865114935e-06, "loss": 0.4276, "step": 334, "teacher_loss": 0.41367918252944946 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.5887179374694824, "learning_rate": 1.4529420268902705e-06, "loss": 0.3989, "step": 335, "teacher_loss": 0.3777720034122467 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.5182253122329712, "learning_rate": 1.4572791672690472e-06, "loss": 0.2488, "step": 336, "teacher_loss": 0.21885734796524048 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.5497493743896484, "learning_rate": 1.4616163076478242e-06, "loss": 0.2501, "step": 337, "teacher_loss": 0.2168208807706833 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.6350235939025879, "learning_rate": 1.4659534480266012e-06, "loss": 0.3647, "step": 338, "teacher_loss": 0.3346971273422241 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.4286232888698578, "learning_rate": 1.470290588405378e-06, "loss": 0.2719, "step": 339, "teacher_loss": 0.25453823804855347 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.597405195236206, "learning_rate": 1.4746277287841552e-06, "loss": 0.2909, "step": 340, "teacher_loss": 0.2568548321723938 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.5068219304084778, "learning_rate": 1.478964869162932e-06, "loss": 0.2928, "step": 341, "teacher_loss": 0.2690381705760956 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.3326978087425232, "learning_rate": 1.483302009541709e-06, "loss": 0.2937, "step": 342, "teacher_loss": 0.2893740236759186 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.3101786971092224, "learning_rate": 1.4876391499204857e-06, "loss": 0.3049, "step": 343, "teacher_loss": 0.3043462038040161 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.653206467628479, "learning_rate": 1.4919762902992627e-06, "loss": 0.311, "step": 344, "teacher_loss": 0.27302148938179016 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.37953609228134155, "learning_rate": 1.4963134306780397e-06, "loss": 0.2532, "step": 345, "teacher_loss": 0.23919625580310822 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.2678660452365875, "learning_rate": 1.5006505710568167e-06, "loss": 0.2476, "step": 346, "teacher_loss": 0.245295912027359 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.4508707821369171, "learning_rate": 1.5049877114355935e-06, "loss": 0.2403, "step": 347, "teacher_loss": 0.2168768048286438 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.47383183240890503, "learning_rate": 1.5093248518143704e-06, "loss": 0.2995, "step": 348, "teacher_loss": 0.28011664748191833 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.3042389452457428, "learning_rate": 1.5136619921931472e-06, "loss": 0.3169, "step": 349, "teacher_loss": 0.31831949949264526 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.12408284842967987, "learning_rate": 1.5179991325719242e-06, "loss": 0.5567, "step": 350, "teacher_loss": 0.6047139167785645 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.5269701480865479, "learning_rate": 1.5223362729507012e-06, "loss": 0.2722, "step": 351, "teacher_loss": 0.2439274787902832 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.5357444882392883, "learning_rate": 1.5266734133294782e-06, "loss": 0.327, "step": 352, "teacher_loss": 0.30385881662368774 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.34682193398475647, "learning_rate": 1.531010553708255e-06, "loss": 0.3136, "step": 353, "teacher_loss": 0.30985987186431885 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.3019476532936096, "learning_rate": 1.535347694087032e-06, "loss": 0.2591, "step": 354, "teacher_loss": 0.25438013672828674 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.1920035183429718, "learning_rate": 1.539684834465809e-06, "loss": 0.2727, "step": 355, "teacher_loss": 0.2816593050956726 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.2970641255378723, "learning_rate": 1.544021974844586e-06, "loss": 0.2986, "step": 356, "teacher_loss": 0.2987341284751892 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.4337921738624573, "learning_rate": 1.548359115223363e-06, "loss": 0.4537, "step": 357, "teacher_loss": 0.4558786451816559 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.2025834023952484, "learning_rate": 1.5526962556021397e-06, "loss": 0.2338, "step": 358, "teacher_loss": 0.23727522790431976 }, { "compression_loss": 0.0, "epoch": 0.06, "label_loss": 0.2037421315908432, "learning_rate": 1.5570333959809167e-06, "loss": 0.2828, "step": 359, "teacher_loss": 0.29158878326416016 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.6193740367889404, "learning_rate": 1.5613705363596934e-06, "loss": 0.3379, "step": 360, "teacher_loss": 0.3066261410713196 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.721538782119751, "learning_rate": 1.5657076767384704e-06, "loss": 0.4034, "step": 361, "teacher_loss": 0.36805254220962524 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.7160407304763794, "learning_rate": 1.5700448171172474e-06, "loss": 0.29, "step": 362, "teacher_loss": 0.24262619018554688 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.37309473752975464, "learning_rate": 1.5743819574960244e-06, "loss": 0.2789, "step": 363, "teacher_loss": 0.2683855891227722 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.42594701051712036, "learning_rate": 1.5787190978748012e-06, "loss": 0.3065, "step": 364, "teacher_loss": 0.293210506439209 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.40625637769699097, "learning_rate": 1.5830562382535782e-06, "loss": 0.2443, "step": 365, "teacher_loss": 0.22632664442062378 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.24801987409591675, "learning_rate": 1.587393378632355e-06, "loss": 0.2364, "step": 366, "teacher_loss": 0.23507001996040344 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.3892616033554077, "learning_rate": 1.5917305190111321e-06, "loss": 0.2747, "step": 367, "teacher_loss": 0.2619546949863434 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.2886156141757965, "learning_rate": 1.596067659389909e-06, "loss": 0.2641, "step": 368, "teacher_loss": 0.26142188906669617 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.5245214700698853, "learning_rate": 1.600404799768686e-06, "loss": 0.2673, "step": 369, "teacher_loss": 0.2386949360370636 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.38306134939193726, "learning_rate": 1.6047419401474629e-06, "loss": 0.4083, "step": 370, "teacher_loss": 0.4110714793205261 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.4856521785259247, "learning_rate": 1.6090790805262397e-06, "loss": 0.2535, "step": 371, "teacher_loss": 0.2276982069015503 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.3180727958679199, "learning_rate": 1.6134162209050169e-06, "loss": 0.2837, "step": 372, "teacher_loss": 0.2799244523048401 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.3749374747276306, "learning_rate": 1.6177533612837936e-06, "loss": 0.2494, "step": 373, "teacher_loss": 0.2354069948196411 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.4214995801448822, "learning_rate": 1.6220905016625706e-06, "loss": 0.2711, "step": 374, "teacher_loss": 0.2543748617172241 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.40043288469314575, "learning_rate": 1.6264276420413474e-06, "loss": 0.2743, "step": 375, "teacher_loss": 0.26031285524368286 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 1.037966251373291, "learning_rate": 1.6307647824201244e-06, "loss": 0.3784, "step": 376, "teacher_loss": 0.30508655309677124 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.33204352855682373, "learning_rate": 1.6351019227989012e-06, "loss": 0.4159, "step": 377, "teacher_loss": 0.4251980185508728 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.19326746463775635, "learning_rate": 1.6394390631776784e-06, "loss": 0.2566, "step": 378, "teacher_loss": 0.26364821195602417 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.20111048221588135, "learning_rate": 1.6437762035564551e-06, "loss": 0.196, "step": 379, "teacher_loss": 0.19547748565673828 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.15460160374641418, "learning_rate": 1.6481133439352321e-06, "loss": 0.2378, "step": 380, "teacher_loss": 0.24707093834877014 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.5643184185028076, "learning_rate": 1.6524504843140089e-06, "loss": 0.5085, "step": 381, "teacher_loss": 0.5023406744003296 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.46036678552627563, "learning_rate": 1.6567876246927859e-06, "loss": 0.2234, "step": 382, "teacher_loss": 0.1970236748456955 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.393317848443985, "learning_rate": 1.6611247650715629e-06, "loss": 0.3106, "step": 383, "teacher_loss": 0.3013565242290497 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.3466640114784241, "learning_rate": 1.6654619054503399e-06, "loss": 0.4081, "step": 384, "teacher_loss": 0.41491347551345825 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.42049169540405273, "learning_rate": 1.6697990458291166e-06, "loss": 0.2571, "step": 385, "teacher_loss": 0.23896369338035583 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.4995405375957489, "learning_rate": 1.6741361862078936e-06, "loss": 0.3218, "step": 386, "teacher_loss": 0.3020709156990051 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.3334900736808777, "learning_rate": 1.6784733265866706e-06, "loss": 0.2438, "step": 387, "teacher_loss": 0.23387368023395538 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.41797807812690735, "learning_rate": 1.6828104669654474e-06, "loss": 0.394, "step": 388, "teacher_loss": 0.39138734340667725 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.38038504123687744, "learning_rate": 1.6871476073442246e-06, "loss": 0.2249, "step": 389, "teacher_loss": 0.20758700370788574 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.5758942365646362, "learning_rate": 1.6914847477230013e-06, "loss": 0.6417, "step": 390, "teacher_loss": 0.6490577459335327 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.5764553546905518, "learning_rate": 1.6958218881017783e-06, "loss": 0.3365, "step": 391, "teacher_loss": 0.30982881784439087 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.408348023891449, "learning_rate": 1.7001590284805551e-06, "loss": 0.2754, "step": 392, "teacher_loss": 0.2605964243412018 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.6151769161224365, "learning_rate": 1.704496168859332e-06, "loss": 0.3425, "step": 393, "teacher_loss": 0.31217846274375916 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.23972713947296143, "learning_rate": 1.708833309238109e-06, "loss": 0.2535, "step": 394, "teacher_loss": 0.25504136085510254 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.16879598796367645, "learning_rate": 1.713170449616886e-06, "loss": 0.2167, "step": 395, "teacher_loss": 0.2219766527414322 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.26204800605773926, "learning_rate": 1.7175075899956628e-06, "loss": 0.2036, "step": 396, "teacher_loss": 0.197129487991333 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.43005844950675964, "learning_rate": 1.7218447303744398e-06, "loss": 0.2262, "step": 397, "teacher_loss": 0.20354320108890533 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.3438878655433655, "learning_rate": 1.7261818707532166e-06, "loss": 0.2442, "step": 398, "teacher_loss": 0.23312923312187195 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.38927507400512695, "learning_rate": 1.7305190111319936e-06, "loss": 0.1928, "step": 399, "teacher_loss": 0.17091867327690125 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.3770056962966919, "learning_rate": 1.7348561515107706e-06, "loss": 0.27, "step": 400, "teacher_loss": 0.25815635919570923 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.7730339765548706, "learning_rate": 1.7391932918895476e-06, "loss": 0.3726, "step": 401, "teacher_loss": 0.3281252384185791 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.4280671775341034, "learning_rate": 1.7435304322683246e-06, "loss": 0.2508, "step": 402, "teacher_loss": 0.23111285269260406 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.20550131797790527, "learning_rate": 1.7478675726471013e-06, "loss": 0.2005, "step": 403, "teacher_loss": 0.19990915060043335 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.48787587881088257, "learning_rate": 1.7522047130258783e-06, "loss": 0.4844, "step": 404, "teacher_loss": 0.48398470878601074 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.2736685574054718, "learning_rate": 1.7565418534046553e-06, "loss": 0.4097, "step": 405, "teacher_loss": 0.4247642755508423 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.5428891777992249, "learning_rate": 1.7608789937834323e-06, "loss": 0.3313, "step": 406, "teacher_loss": 0.3077879250049591 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.2727331519126892, "learning_rate": 1.765216134162209e-06, "loss": 0.2919, "step": 407, "teacher_loss": 0.29404160380363464 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.330168753862381, "learning_rate": 1.769553274540986e-06, "loss": 0.2686, "step": 408, "teacher_loss": 0.26173800230026245 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.5277129411697388, "learning_rate": 1.7738904149197628e-06, "loss": 0.4467, "step": 409, "teacher_loss": 0.4377474784851074 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.3135142922401428, "learning_rate": 1.77822755529854e-06, "loss": 0.2187, "step": 410, "teacher_loss": 0.20820513367652893 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.2130378931760788, "learning_rate": 1.7825646956773168e-06, "loss": 0.2234, "step": 411, "teacher_loss": 0.22456574440002441 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.7675861120223999, "learning_rate": 1.7869018360560938e-06, "loss": 0.3381, "step": 412, "teacher_loss": 0.2903895974159241 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.4666363596916199, "learning_rate": 1.7912389764348706e-06, "loss": 0.2097, "step": 413, "teacher_loss": 0.18114443123340607 }, { "compression_loss": 0.0, "epoch": 0.07, "label_loss": 0.5393398404121399, "learning_rate": 1.7955761168136475e-06, "loss": 0.2752, "step": 414, "teacher_loss": 0.24581646919250488 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.29600080847740173, "learning_rate": 1.7999132571924243e-06, "loss": 0.2741, "step": 415, "teacher_loss": 0.2717086672782898 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.3500150144100189, "learning_rate": 1.8042503975712015e-06, "loss": 0.2643, "step": 416, "teacher_loss": 0.25477874279022217 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.5084381699562073, "learning_rate": 1.8085875379499783e-06, "loss": 0.363, "step": 417, "teacher_loss": 0.3468474745750427 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.49075108766555786, "learning_rate": 1.8129246783287553e-06, "loss": 0.3361, "step": 418, "teacher_loss": 0.3189225196838379 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.327508807182312, "learning_rate": 1.8172618187075323e-06, "loss": 0.2164, "step": 419, "teacher_loss": 0.2040756493806839 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 1.1091556549072266, "learning_rate": 1.821598959086309e-06, "loss": 0.4772, "step": 420, "teacher_loss": 0.4069884717464447 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.6659070253372192, "learning_rate": 1.8259360994650862e-06, "loss": 0.3733, "step": 421, "teacher_loss": 0.34075677394866943 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.34834855794906616, "learning_rate": 1.830273239843863e-06, "loss": 0.2507, "step": 422, "teacher_loss": 0.23987625539302826 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.6248312592506409, "learning_rate": 1.83461038022264e-06, "loss": 0.2723, "step": 423, "teacher_loss": 0.233115553855896 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.4233854413032532, "learning_rate": 1.8389475206014168e-06, "loss": 0.195, "step": 424, "teacher_loss": 0.16966009140014648 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.21375705301761627, "learning_rate": 1.8432846609801938e-06, "loss": 0.273, "step": 425, "teacher_loss": 0.27958256006240845 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.4494283199310303, "learning_rate": 1.8476218013589705e-06, "loss": 0.2123, "step": 426, "teacher_loss": 0.18590784072875977 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.6090162396430969, "learning_rate": 1.8519589417377477e-06, "loss": 0.3485, "step": 427, "teacher_loss": 0.319607675075531 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.44910502433776855, "learning_rate": 1.8562960821165245e-06, "loss": 0.3034, "step": 428, "teacher_loss": 0.28720515966415405 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.18220269680023193, "learning_rate": 1.8606332224953015e-06, "loss": 0.2636, "step": 429, "teacher_loss": 0.2726060748100281 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.6217266917228699, "learning_rate": 1.8649703628740783e-06, "loss": 0.2855, "step": 430, "teacher_loss": 0.2481021285057068 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.2947242856025696, "learning_rate": 1.8693075032528553e-06, "loss": 0.3036, "step": 431, "teacher_loss": 0.304562509059906 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.7192350625991821, "learning_rate": 1.8736446436316323e-06, "loss": 0.2519, "step": 432, "teacher_loss": 0.199924498796463 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.24471047520637512, "learning_rate": 1.8779817840104092e-06, "loss": 0.2277, "step": 433, "teacher_loss": 0.2258528470993042 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.2768610715866089, "learning_rate": 1.8823189243891862e-06, "loss": 0.2634, "step": 434, "teacher_loss": 0.26193076372146606 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.44071686267852783, "learning_rate": 1.886656064767963e-06, "loss": 0.2564, "step": 435, "teacher_loss": 0.2359606921672821 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.22673246264457703, "learning_rate": 1.89099320514674e-06, "loss": 0.3108, "step": 436, "teacher_loss": 0.32010000944137573 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.9465312361717224, "learning_rate": 1.8953303455255168e-06, "loss": 0.7238, "step": 437, "teacher_loss": 0.6990618705749512 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.7088655233383179, "learning_rate": 1.8996674859042937e-06, "loss": 0.2849, "step": 438, "teacher_loss": 0.23781141638755798 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.32122692465782166, "learning_rate": 1.9040046262830705e-06, "loss": 0.2542, "step": 439, "teacher_loss": 0.2468041330575943 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.19299942255020142, "learning_rate": 1.908341766661848e-06, "loss": 0.2949, "step": 440, "teacher_loss": 0.3062227964401245 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.32944902777671814, "learning_rate": 1.9126789070406247e-06, "loss": 0.3241, "step": 441, "teacher_loss": 0.32347506284713745 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.19571147859096527, "learning_rate": 1.9170160474194015e-06, "loss": 0.2531, "step": 442, "teacher_loss": 0.2594839930534363 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.2997242212295532, "learning_rate": 1.9213531877981787e-06, "loss": 0.2433, "step": 443, "teacher_loss": 0.2370450645685196 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.2977994680404663, "learning_rate": 1.9256903281769555e-06, "loss": 0.5177, "step": 444, "teacher_loss": 0.5421225428581238 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.5032090544700623, "learning_rate": 1.9300274685557322e-06, "loss": 0.2675, "step": 445, "teacher_loss": 0.2413441240787506 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.37059757113456726, "learning_rate": 1.934364608934509e-06, "loss": 0.1822, "step": 446, "teacher_loss": 0.16121342778205872 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.24433265626430511, "learning_rate": 1.938701749313286e-06, "loss": 0.2393, "step": 447, "teacher_loss": 0.23873546719551086 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.5554165840148926, "learning_rate": 1.943038889692063e-06, "loss": 0.2665, "step": 448, "teacher_loss": 0.23443107306957245 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.66746985912323, "learning_rate": 1.9473760300708398e-06, "loss": 0.4268, "step": 449, "teacher_loss": 0.40007448196411133 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.31764930486679077, "learning_rate": 1.951713170449617e-06, "loss": 0.2901, "step": 450, "teacher_loss": 0.2870023548603058 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.37764155864715576, "learning_rate": 1.956050310828394e-06, "loss": 0.2791, "step": 451, "teacher_loss": 0.2681971490383148 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.43080389499664307, "learning_rate": 1.960387451207171e-06, "loss": 0.2807, "step": 452, "teacher_loss": 0.2640213370323181 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.47938570380210876, "learning_rate": 1.9647245915859477e-06, "loss": 0.6168, "step": 453, "teacher_loss": 0.6321121454238892 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.32987815141677856, "learning_rate": 1.969061731964725e-06, "loss": 0.1858, "step": 454, "teacher_loss": 0.16981291770935059 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.5893688201904297, "learning_rate": 1.9733988723435017e-06, "loss": 0.2976, "step": 455, "teacher_loss": 0.26523154973983765 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.4144860804080963, "learning_rate": 1.9777360127222785e-06, "loss": 0.2705, "step": 456, "teacher_loss": 0.2545433044433594 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.9025605916976929, "learning_rate": 1.9820731531010552e-06, "loss": 0.4697, "step": 457, "teacher_loss": 0.42164015769958496 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.4072743058204651, "learning_rate": 1.9864102934798324e-06, "loss": 0.291, "step": 458, "teacher_loss": 0.2780720293521881 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.36306989192962646, "learning_rate": 1.990747433858609e-06, "loss": 0.2436, "step": 459, "teacher_loss": 0.23030637204647064 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.5169907212257385, "learning_rate": 1.995084574237386e-06, "loss": 0.3379, "step": 460, "teacher_loss": 0.3180461823940277 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.6494100093841553, "learning_rate": 1.9994217146161628e-06, "loss": 0.2999, "step": 461, "teacher_loss": 0.2610490620136261 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.3072289824485779, "learning_rate": 2.0037588549949404e-06, "loss": 0.2342, "step": 462, "teacher_loss": 0.22611698508262634 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.3994767665863037, "learning_rate": 2.008095995373717e-06, "loss": 0.3317, "step": 463, "teacher_loss": 0.32413503527641296 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.4888478219509125, "learning_rate": 2.012433135752494e-06, "loss": 0.2647, "step": 464, "teacher_loss": 0.23980139195919037 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.4228232800960541, "learning_rate": 2.0167702761312707e-06, "loss": 0.2307, "step": 465, "teacher_loss": 0.2093290090560913 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.629534125328064, "learning_rate": 2.021107416510048e-06, "loss": 0.2641, "step": 466, "teacher_loss": 0.22347673773765564 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.7569173574447632, "learning_rate": 2.0254445568888247e-06, "loss": 0.348, "step": 467, "teacher_loss": 0.3025675415992737 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.4804191589355469, "learning_rate": 2.0297816972676014e-06, "loss": 0.2235, "step": 468, "teacher_loss": 0.19495120644569397 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.2551216185092926, "learning_rate": 2.0341188376463786e-06, "loss": 0.2386, "step": 469, "teacher_loss": 0.23679262399673462 }, { "compression_loss": 0.0, "epoch": 0.08, "label_loss": 0.2082975208759308, "learning_rate": 2.0384559780251554e-06, "loss": 0.1883, "step": 470, "teacher_loss": 0.18602964282035828 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.2937382757663727, "learning_rate": 2.042793118403932e-06, "loss": 0.2065, "step": 471, "teacher_loss": 0.19685763120651245 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.18604007363319397, "learning_rate": 2.047130258782709e-06, "loss": 0.4785, "step": 472, "teacher_loss": 0.5109646320343018 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.2973581850528717, "learning_rate": 2.0514673991614866e-06, "loss": 0.2446, "step": 473, "teacher_loss": 0.23872309923171997 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.5202877521514893, "learning_rate": 2.0558045395402634e-06, "loss": 0.2232, "step": 474, "teacher_loss": 0.19023792445659637 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.5331915616989136, "learning_rate": 2.06014167991904e-06, "loss": 0.2618, "step": 475, "teacher_loss": 0.2316759079694748 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.277387797832489, "learning_rate": 2.064478820297817e-06, "loss": 0.206, "step": 476, "teacher_loss": 0.19810503721237183 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.486581027507782, "learning_rate": 2.068815960676594e-06, "loss": 0.487, "step": 477, "teacher_loss": 0.4870527982711792 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.38122931122779846, "learning_rate": 2.073153101055371e-06, "loss": 0.2039, "step": 478, "teacher_loss": 0.18414457142353058 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.2526359558105469, "learning_rate": 2.0774902414341477e-06, "loss": 0.2823, "step": 479, "teacher_loss": 0.28556084632873535 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.22659647464752197, "learning_rate": 2.0818273818129244e-06, "loss": 0.2134, "step": 480, "teacher_loss": 0.21197384595870972 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.1992442011833191, "learning_rate": 2.0861645221917016e-06, "loss": 0.1937, "step": 481, "teacher_loss": 0.19306373596191406 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.5664013624191284, "learning_rate": 2.0905016625704784e-06, "loss": 0.3027, "step": 482, "teacher_loss": 0.27343302965164185 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.3245323896408081, "learning_rate": 2.094838802949255e-06, "loss": 0.2011, "step": 483, "teacher_loss": 0.18735739588737488 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.9848895072937012, "learning_rate": 2.099175943328033e-06, "loss": 0.4683, "step": 484, "teacher_loss": 0.4109257757663727 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.5511682629585266, "learning_rate": 2.1035130837068096e-06, "loss": 0.5622, "step": 485, "teacher_loss": 0.5634675025939941 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.593747615814209, "learning_rate": 2.1078502240855864e-06, "loss": 0.204, "step": 486, "teacher_loss": 0.16068899631500244 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.2801670432090759, "learning_rate": 2.112187364464363e-06, "loss": 0.2083, "step": 487, "teacher_loss": 0.20030780136585236 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.3808116912841797, "learning_rate": 2.1165245048431403e-06, "loss": 0.2938, "step": 488, "teacher_loss": 0.2841397523880005 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.27547040581703186, "learning_rate": 2.120861645221917e-06, "loss": 0.2648, "step": 489, "teacher_loss": 0.26362812519073486 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.7601871490478516, "learning_rate": 2.125198785600694e-06, "loss": 0.3349, "step": 490, "teacher_loss": 0.2876013517379761 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.3447526693344116, "learning_rate": 2.1295359259794707e-06, "loss": 0.1761, "step": 491, "teacher_loss": 0.1573595404624939 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.4270593523979187, "learning_rate": 2.133873066358248e-06, "loss": 0.45, "step": 492, "teacher_loss": 0.45252642035484314 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.4965693950653076, "learning_rate": 2.1382102067370246e-06, "loss": 0.2625, "step": 493, "teacher_loss": 0.23646843433380127 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.4682677090167999, "learning_rate": 2.1425473471158014e-06, "loss": 0.2653, "step": 494, "teacher_loss": 0.2427406907081604 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.18518725037574768, "learning_rate": 2.1468844874945786e-06, "loss": 0.2164, "step": 495, "teacher_loss": 0.21982312202453613 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.39510196447372437, "learning_rate": 2.151221627873356e-06, "loss": 0.2742, "step": 496, "teacher_loss": 0.2608032822608948 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.23207172751426697, "learning_rate": 2.1555587682521326e-06, "loss": 0.3791, "step": 497, "teacher_loss": 0.3954426050186157 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.711067795753479, "learning_rate": 2.1598959086309094e-06, "loss": 0.28, "step": 498, "teacher_loss": 0.2320762425661087 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 1.173708438873291, "learning_rate": 2.1642330490096866e-06, "loss": 0.3219, "step": 499, "teacher_loss": 0.22727572917938232 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.5813969373703003, "learning_rate": 2.1685701893884633e-06, "loss": 0.2299, "step": 500, "teacher_loss": 0.19085891544818878 }, { "epoch": 0.09, "eval_exact_match": 79.93377483443709, "eval_f1": 87.34056773477343, "step": 500 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.2771046459674835, "learning_rate": 2.17290732976724e-06, "loss": 0.3274, "step": 501, "teacher_loss": 0.33294737339019775 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.22974810004234314, "learning_rate": 2.177244470146017e-06, "loss": 0.2027, "step": 502, "teacher_loss": 0.19970601797103882 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.6079027652740479, "learning_rate": 2.181581610524794e-06, "loss": 0.2981, "step": 503, "teacher_loss": 0.263627827167511 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.39554834365844727, "learning_rate": 2.185918750903571e-06, "loss": 0.2559, "step": 504, "teacher_loss": 0.2404191792011261 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.578843891620636, "learning_rate": 2.190255891282348e-06, "loss": 0.2453, "step": 505, "teacher_loss": 0.20824384689331055 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.5476224422454834, "learning_rate": 2.194593031661125e-06, "loss": 0.3513, "step": 506, "teacher_loss": 0.32946211099624634 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.29302459955215454, "learning_rate": 2.198930172039902e-06, "loss": 0.2339, "step": 507, "teacher_loss": 0.22737549245357513 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.895563006401062, "learning_rate": 2.203267312418679e-06, "loss": 0.2782, "step": 508, "teacher_loss": 0.20957829058170319 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.4441918432712555, "learning_rate": 2.2076044527974556e-06, "loss": 0.3246, "step": 509, "teacher_loss": 0.3112950325012207 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.2914983630180359, "learning_rate": 2.2119415931762324e-06, "loss": 0.2769, "step": 510, "teacher_loss": 0.2753213942050934 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.544796347618103, "learning_rate": 2.2162787335550096e-06, "loss": 0.277, "step": 511, "teacher_loss": 0.24726511538028717 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.5013731122016907, "learning_rate": 2.2206158739337863e-06, "loss": 0.2505, "step": 512, "teacher_loss": 0.22265717387199402 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.3373941481113434, "learning_rate": 2.224953014312563e-06, "loss": 0.3402, "step": 513, "teacher_loss": 0.340520977973938 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.32549047470092773, "learning_rate": 2.2292901546913403e-06, "loss": 0.2519, "step": 514, "teacher_loss": 0.2436991184949875 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.43331462144851685, "learning_rate": 2.233627295070117e-06, "loss": 0.3781, "step": 515, "teacher_loss": 0.3719749450683594 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.48328661918640137, "learning_rate": 2.2379644354488943e-06, "loss": 0.2191, "step": 516, "teacher_loss": 0.18973436951637268 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.4888170063495636, "learning_rate": 2.242301575827671e-06, "loss": 0.2753, "step": 517, "teacher_loss": 0.25160840153694153 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.18766647577285767, "learning_rate": 2.2466387162064482e-06, "loss": 0.2615, "step": 518, "teacher_loss": 0.26968494057655334 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.9773036241531372, "learning_rate": 2.250975856585225e-06, "loss": 0.3672, "step": 519, "teacher_loss": 0.2994511127471924 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.1253749430179596, "learning_rate": 2.255312996964002e-06, "loss": 0.2172, "step": 520, "teacher_loss": 0.2274502068758011 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.9317730069160461, "learning_rate": 2.2596501373427786e-06, "loss": 0.4157, "step": 521, "teacher_loss": 0.3583186864852905 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.19191187620162964, "learning_rate": 2.2639872777215558e-06, "loss": 0.3742, "step": 522, "teacher_loss": 0.39444875717163086 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.16487179696559906, "learning_rate": 2.2683244181003325e-06, "loss": 0.1609, "step": 523, "teacher_loss": 0.16045315563678741 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.735713005065918, "learning_rate": 2.2726615584791093e-06, "loss": 0.295, "step": 524, "teacher_loss": 0.24605339765548706 }, { "compression_loss": 0.0, "epoch": 0.09, "label_loss": 0.9322723150253296, "learning_rate": 2.276998698857886e-06, "loss": 0.5034, "step": 525, "teacher_loss": 0.4557216167449951 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.3997858166694641, "learning_rate": 2.2813358392366633e-06, "loss": 0.2411, "step": 526, "teacher_loss": 0.2234981507062912 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.5708207488059998, "learning_rate": 2.2856729796154405e-06, "loss": 0.2833, "step": 527, "teacher_loss": 0.25130024552345276 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.5658714771270752, "learning_rate": 2.2900101199942173e-06, "loss": 0.3149, "step": 528, "teacher_loss": 0.28703922033309937 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.5094200372695923, "learning_rate": 2.294347260372994e-06, "loss": 0.3207, "step": 529, "teacher_loss": 0.29976028203964233 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.5431407690048218, "learning_rate": 2.2986844007517712e-06, "loss": 0.3439, "step": 530, "teacher_loss": 0.3217581510543823 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.19011594355106354, "learning_rate": 2.303021541130548e-06, "loss": 0.2466, "step": 531, "teacher_loss": 0.2528422772884369 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.512394368648529, "learning_rate": 2.307358681509325e-06, "loss": 0.343, "step": 532, "teacher_loss": 0.3241584002971649 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.4413803815841675, "learning_rate": 2.311695821888102e-06, "loss": 0.2907, "step": 533, "teacher_loss": 0.2739686369895935 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.6413787007331848, "learning_rate": 2.3160329622668788e-06, "loss": 0.3494, "step": 534, "teacher_loss": 0.31693750619888306 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.43459856510162354, "learning_rate": 2.3203701026456555e-06, "loss": 0.2386, "step": 535, "teacher_loss": 0.2168196439743042 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.49601468443870544, "learning_rate": 2.3247072430244323e-06, "loss": 0.3205, "step": 536, "teacher_loss": 0.3009607791900635 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.6900720596313477, "learning_rate": 2.3290443834032095e-06, "loss": 0.291, "step": 537, "teacher_loss": 0.24667127430438995 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.31261855363845825, "learning_rate": 2.3333815237819867e-06, "loss": 0.2045, "step": 538, "teacher_loss": 0.19246099889278412 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.8484382629394531, "learning_rate": 2.3377186641607635e-06, "loss": 0.4827, "step": 539, "teacher_loss": 0.4420757591724396 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.4161158502101898, "learning_rate": 2.3420558045395403e-06, "loss": 0.2889, "step": 540, "teacher_loss": 0.27474966645240784 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.425876647233963, "learning_rate": 2.3463929449183175e-06, "loss": 0.233, "step": 541, "teacher_loss": 0.2116113007068634 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.42528918385505676, "learning_rate": 2.3507300852970942e-06, "loss": 0.2988, "step": 542, "teacher_loss": 0.28477996587753296 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.6934197545051575, "learning_rate": 2.355067225675871e-06, "loss": 0.3753, "step": 543, "teacher_loss": 0.33992424607276917 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.42282840609550476, "learning_rate": 2.3594043660546478e-06, "loss": 0.2656, "step": 544, "teacher_loss": 0.24810999631881714 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.3808443248271942, "learning_rate": 2.363741506433425e-06, "loss": 0.2999, "step": 545, "teacher_loss": 0.2908857464790344 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.46012943983078003, "learning_rate": 2.3680786468122018e-06, "loss": 0.3068, "step": 546, "teacher_loss": 0.28980737924575806 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.7822651863098145, "learning_rate": 2.3724157871909785e-06, "loss": 0.3352, "step": 547, "teacher_loss": 0.2855678200721741 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.23164251446723938, "learning_rate": 2.3767529275697557e-06, "loss": 0.1892, "step": 548, "teacher_loss": 0.18445700407028198 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.22667209804058075, "learning_rate": 2.381090067948533e-06, "loss": 0.2639, "step": 549, "teacher_loss": 0.2680549621582031 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 1.0708098411560059, "learning_rate": 2.3854272083273097e-06, "loss": 0.5789, "step": 550, "teacher_loss": 0.5242176055908203 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.45995640754699707, "learning_rate": 2.3897643487060865e-06, "loss": 0.4241, "step": 551, "teacher_loss": 0.42016488313674927 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.52562415599823, "learning_rate": 2.3941014890848637e-06, "loss": 0.2929, "step": 552, "teacher_loss": 0.2669890522956848 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.2499690055847168, "learning_rate": 2.3984386294636405e-06, "loss": 0.2479, "step": 553, "teacher_loss": 0.24763701856136322 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.24137386679649353, "learning_rate": 2.4027757698424172e-06, "loss": 0.2254, "step": 554, "teacher_loss": 0.2235698699951172 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.3888161778450012, "learning_rate": 2.407112910221194e-06, "loss": 0.2376, "step": 555, "teacher_loss": 0.22075967490673065 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.31894826889038086, "learning_rate": 2.411450050599971e-06, "loss": 0.2192, "step": 556, "teacher_loss": 0.20806646347045898 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.248812735080719, "learning_rate": 2.415787190978748e-06, "loss": 0.2175, "step": 557, "teacher_loss": 0.21404403448104858 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.34315747022628784, "learning_rate": 2.4201243313575248e-06, "loss": 0.2403, "step": 558, "teacher_loss": 0.22885122895240784 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.4260071814060211, "learning_rate": 2.4244614717363015e-06, "loss": 0.2778, "step": 559, "teacher_loss": 0.2613573670387268 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.3047531247138977, "learning_rate": 2.428798612115079e-06, "loss": 0.1531, "step": 560, "teacher_loss": 0.13628603518009186 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.39074549078941345, "learning_rate": 2.433135752493856e-06, "loss": 0.3214, "step": 561, "teacher_loss": 0.3136478662490845 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.332958459854126, "learning_rate": 2.4374728928726327e-06, "loss": 0.1695, "step": 562, "teacher_loss": 0.15136626362800598 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.27515649795532227, "learning_rate": 2.44181003325141e-06, "loss": 0.209, "step": 563, "teacher_loss": 0.2016858160495758 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.2551475167274475, "learning_rate": 2.4461471736301867e-06, "loss": 0.1781, "step": 564, "teacher_loss": 0.16953346133232117 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.31979674100875854, "learning_rate": 2.4504843140089635e-06, "loss": 0.2742, "step": 565, "teacher_loss": 0.2690945863723755 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.5875164270401001, "learning_rate": 2.4548214543877402e-06, "loss": 0.2876, "step": 566, "teacher_loss": 0.2542589604854584 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.7272858619689941, "learning_rate": 2.4591585947665174e-06, "loss": 0.3647, "step": 567, "teacher_loss": 0.32438141107559204 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.3112044334411621, "learning_rate": 2.463495735145294e-06, "loss": 0.3472, "step": 568, "teacher_loss": 0.3511584997177124 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.35305026173591614, "learning_rate": 2.467832875524071e-06, "loss": 0.2094, "step": 569, "teacher_loss": 0.19340015947818756 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.24011370539665222, "learning_rate": 2.4721700159028477e-06, "loss": 0.2487, "step": 570, "teacher_loss": 0.24964269995689392 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.7539844512939453, "learning_rate": 2.4765071562816254e-06, "loss": 0.2901, "step": 571, "teacher_loss": 0.23853300511837006 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.4385092258453369, "learning_rate": 2.480844296660402e-06, "loss": 0.2123, "step": 572, "teacher_loss": 0.1871197521686554 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.2902497351169586, "learning_rate": 2.485181437039179e-06, "loss": 0.1961, "step": 573, "teacher_loss": 0.18561913073062897 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.3843946158885956, "learning_rate": 2.4895185774179557e-06, "loss": 0.2503, "step": 574, "teacher_loss": 0.23541009426116943 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.9851076602935791, "learning_rate": 2.493855717796733e-06, "loss": 0.3746, "step": 575, "teacher_loss": 0.30675411224365234 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.075462706387043, "learning_rate": 2.4981928581755097e-06, "loss": 0.1577, "step": 576, "teacher_loss": 0.1667826622724533 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.47844499349594116, "learning_rate": 2.5025299985542864e-06, "loss": 0.5685, "step": 577, "teacher_loss": 0.5784728527069092 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.12621146440505981, "learning_rate": 2.5068671389330636e-06, "loss": 0.1968, "step": 578, "teacher_loss": 0.20459681749343872 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.5721890926361084, "learning_rate": 2.5112042793118404e-06, "loss": 0.3177, "step": 579, "teacher_loss": 0.28942903876304626 }, { "compression_loss": 0.0, "epoch": 0.1, "label_loss": 0.3502286374568939, "learning_rate": 2.515541419690617e-06, "loss": 0.2811, "step": 580, "teacher_loss": 0.273428738117218 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.3754081130027771, "learning_rate": 2.5198785600693944e-06, "loss": 0.2641, "step": 581, "teacher_loss": 0.25171878933906555 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.19309651851654053, "learning_rate": 2.5242157004481716e-06, "loss": 0.1955, "step": 582, "teacher_loss": 0.19573545455932617 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.9328293204307556, "learning_rate": 2.5285528408269484e-06, "loss": 0.3228, "step": 583, "teacher_loss": 0.25504833459854126 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.4727112352848053, "learning_rate": 2.532889981205725e-06, "loss": 0.2623, "step": 584, "teacher_loss": 0.2389705628156662 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.25859129428863525, "learning_rate": 2.537227121584502e-06, "loss": 0.1909, "step": 585, "teacher_loss": 0.1833484172821045 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.4703024625778198, "learning_rate": 2.541564261963279e-06, "loss": 0.2628, "step": 586, "teacher_loss": 0.23972968757152557 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.5282617807388306, "learning_rate": 2.545901402342056e-06, "loss": 0.2257, "step": 587, "teacher_loss": 0.19205081462860107 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.6147003173828125, "learning_rate": 2.5502385427208327e-06, "loss": 0.4944, "step": 588, "teacher_loss": 0.48107486963272095 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.6036140322685242, "learning_rate": 2.5545756830996094e-06, "loss": 0.2811, "step": 589, "teacher_loss": 0.24524986743927002 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.30564770102500916, "learning_rate": 2.5589128234783866e-06, "loss": 0.2573, "step": 590, "teacher_loss": 0.2519502639770508 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.2896535098552704, "learning_rate": 2.5632499638571634e-06, "loss": 0.2088, "step": 591, "teacher_loss": 0.19982947409152985 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.29479333758354187, "learning_rate": 2.5675871042359406e-06, "loss": 0.1996, "step": 592, "teacher_loss": 0.18897177278995514 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.40671059489250183, "learning_rate": 2.5719242446147174e-06, "loss": 0.2508, "step": 593, "teacher_loss": 0.23344384133815765 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.4257102906703949, "learning_rate": 2.5762613849934946e-06, "loss": 0.2808, "step": 594, "teacher_loss": 0.2647053599357605 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.5725194215774536, "learning_rate": 2.5805985253722714e-06, "loss": 0.3676, "step": 595, "teacher_loss": 0.3448231816291809 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.2081710696220398, "learning_rate": 2.584935665751048e-06, "loss": 0.2166, "step": 596, "teacher_loss": 0.2175840586423874 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.29855772852897644, "learning_rate": 2.5892728061298253e-06, "loss": 0.3396, "step": 597, "teacher_loss": 0.3441075086593628 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.46510887145996094, "learning_rate": 2.593609946508602e-06, "loss": 0.3173, "step": 598, "teacher_loss": 0.30083948373794556 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.3710855543613434, "learning_rate": 2.597947086887379e-06, "loss": 0.2498, "step": 599, "teacher_loss": 0.2362690418958664 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.7629257440567017, "learning_rate": 2.6022842272661557e-06, "loss": 0.2722, "step": 600, "teacher_loss": 0.21764832735061646 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.36502036452293396, "learning_rate": 2.606621367644933e-06, "loss": 0.3853, "step": 601, "teacher_loss": 0.3875422477722168 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.7474969625473022, "learning_rate": 2.6109585080237096e-06, "loss": 0.8236, "step": 602, "teacher_loss": 0.832101047039032 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.616251528263092, "learning_rate": 2.615295648402487e-06, "loss": 0.2672, "step": 603, "teacher_loss": 0.22836601734161377 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.5053845643997192, "learning_rate": 2.6196327887812636e-06, "loss": 0.2442, "step": 604, "teacher_loss": 0.2151930332183838 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.8031680583953857, "learning_rate": 2.623969929160041e-06, "loss": 0.3599, "step": 605, "teacher_loss": 0.3106900453567505 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.42707711458206177, "learning_rate": 2.6283070695388176e-06, "loss": 0.3834, "step": 606, "teacher_loss": 0.3785204589366913 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.32388240098953247, "learning_rate": 2.6326442099175944e-06, "loss": 0.2524, "step": 607, "teacher_loss": 0.24450750648975372 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.8198894262313843, "learning_rate": 2.636981350296371e-06, "loss": 0.3624, "step": 608, "teacher_loss": 0.3115271031856537 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.48805996775627136, "learning_rate": 2.6413184906751483e-06, "loss": 0.2706, "step": 609, "teacher_loss": 0.24648529291152954 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.38548821210861206, "learning_rate": 2.645655631053925e-06, "loss": 0.207, "step": 610, "teacher_loss": 0.18722303211688995 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.36671972274780273, "learning_rate": 2.649992771432702e-06, "loss": 0.3008, "step": 611, "teacher_loss": 0.29345256090164185 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.5331743359565735, "learning_rate": 2.654329911811479e-06, "loss": 0.2645, "step": 612, "teacher_loss": 0.23459255695343018 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.528983473777771, "learning_rate": 2.658667052190256e-06, "loss": 0.4801, "step": 613, "teacher_loss": 0.47472190856933594 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.29589593410491943, "learning_rate": 2.663004192569033e-06, "loss": 0.2403, "step": 614, "teacher_loss": 0.23415866494178772 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.47103583812713623, "learning_rate": 2.66734133294781e-06, "loss": 0.314, "step": 615, "teacher_loss": 0.2965882420539856 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.3213235139846802, "learning_rate": 2.671678473326587e-06, "loss": 0.2536, "step": 616, "teacher_loss": 0.24612219631671906 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.3729945123195648, "learning_rate": 2.676015613705364e-06, "loss": 0.2534, "step": 617, "teacher_loss": 0.2400979995727539 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.1334015429019928, "learning_rate": 2.6803527540841406e-06, "loss": 0.1957, "step": 618, "teacher_loss": 0.20257017016410828 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.586087167263031, "learning_rate": 2.6846898944629173e-06, "loss": 0.2343, "step": 619, "teacher_loss": 0.1952415406703949 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.7722219824790955, "learning_rate": 2.6890270348416945e-06, "loss": 0.339, "step": 620, "teacher_loss": 0.29087793827056885 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.4624701738357544, "learning_rate": 2.6933641752204713e-06, "loss": 0.2831, "step": 621, "teacher_loss": 0.26322197914123535 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.49739134311676025, "learning_rate": 2.697701315599248e-06, "loss": 0.364, "step": 622, "teacher_loss": 0.3492274284362793 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.3222472071647644, "learning_rate": 2.702038455978025e-06, "loss": 0.2405, "step": 623, "teacher_loss": 0.2313927710056305 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.46010762453079224, "learning_rate": 2.706375596356802e-06, "loss": 0.2425, "step": 624, "teacher_loss": 0.21834558248519897 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.2531857192516327, "learning_rate": 2.7107127367355793e-06, "loss": 0.2099, "step": 625, "teacher_loss": 0.2051263004541397 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.11980043351650238, "learning_rate": 2.715049877114356e-06, "loss": 0.2235, "step": 626, "teacher_loss": 0.23500694334506989 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.22379769384860992, "learning_rate": 2.7193870174931332e-06, "loss": 0.256, "step": 627, "teacher_loss": 0.25958359241485596 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.9325609803199768, "learning_rate": 2.72372415787191e-06, "loss": 0.3507, "step": 628, "teacher_loss": 0.2860622704029083 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.33083322644233704, "learning_rate": 2.728061298250687e-06, "loss": 0.3106, "step": 629, "teacher_loss": 0.3083771765232086 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.5008382797241211, "learning_rate": 2.7323984386294636e-06, "loss": 0.2427, "step": 630, "teacher_loss": 0.2140641063451767 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.28264960646629333, "learning_rate": 2.7367355790082408e-06, "loss": 0.3993, "step": 631, "teacher_loss": 0.41220974922180176 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.6034846305847168, "learning_rate": 2.7410727193870175e-06, "loss": 0.2468, "step": 632, "teacher_loss": 0.2071148157119751 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.46136119961738586, "learning_rate": 2.7454098597657943e-06, "loss": 0.3234, "step": 633, "teacher_loss": 0.30802151560783386 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.4579277038574219, "learning_rate": 2.749747000144571e-06, "loss": 0.3117, "step": 634, "teacher_loss": 0.29548346996307373 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.49554747343063354, "learning_rate": 2.7540841405233483e-06, "loss": 0.2649, "step": 635, "teacher_loss": 0.23922522366046906 }, { "compression_loss": 0.0, "epoch": 0.11, "label_loss": 0.4182722568511963, "learning_rate": 2.7584212809021255e-06, "loss": 0.2286, "step": 636, "teacher_loss": 0.20752517879009247 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.3358771800994873, "learning_rate": 2.7627584212809023e-06, "loss": 0.2652, "step": 637, "teacher_loss": 0.25736862421035767 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.3030616343021393, "learning_rate": 2.767095561659679e-06, "loss": 0.2204, "step": 638, "teacher_loss": 0.21119824051856995 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.5646266341209412, "learning_rate": 2.7714327020384562e-06, "loss": 0.2735, "step": 639, "teacher_loss": 0.2411031424999237 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.36879676580429077, "learning_rate": 2.775769842417233e-06, "loss": 0.232, "step": 640, "teacher_loss": 0.21680012345314026 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.40672093629837036, "learning_rate": 2.78010698279601e-06, "loss": 0.2302, "step": 641, "teacher_loss": 0.21060490608215332 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.49960020184516907, "learning_rate": 2.784444123174787e-06, "loss": 0.3092, "step": 642, "teacher_loss": 0.2879989445209503 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.33145076036453247, "learning_rate": 2.7887812635535638e-06, "loss": 0.2538, "step": 643, "teacher_loss": 0.24521225690841675 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.8160698413848877, "learning_rate": 2.7931184039323405e-06, "loss": 0.3184, "step": 644, "teacher_loss": 0.2630925476551056 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.5901575088500977, "learning_rate": 2.7974555443111173e-06, "loss": 0.271, "step": 645, "teacher_loss": 0.235523521900177 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.3425023555755615, "learning_rate": 2.8017926846898945e-06, "loss": 0.2651, "step": 646, "teacher_loss": 0.2564578652381897 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.24096450209617615, "learning_rate": 2.8061298250686717e-06, "loss": 0.2015, "step": 647, "teacher_loss": 0.19714361429214478 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.4299507439136505, "learning_rate": 2.8104669654474485e-06, "loss": 0.2061, "step": 648, "teacher_loss": 0.18122011423110962 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.40944671630859375, "learning_rate": 2.8148041058262253e-06, "loss": 0.2472, "step": 649, "teacher_loss": 0.229185089468956 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.30898597836494446, "learning_rate": 2.8191412462050025e-06, "loss": 0.2291, "step": 650, "teacher_loss": 0.22021184861660004 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.38677287101745605, "learning_rate": 2.8234783865837792e-06, "loss": 0.325, "step": 651, "teacher_loss": 0.3181048631668091 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.13863763213157654, "learning_rate": 2.827815526962556e-06, "loss": 0.1586, "step": 652, "teacher_loss": 0.16076484322547913 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.1925647109746933, "learning_rate": 2.8321526673413328e-06, "loss": 0.1833, "step": 653, "teacher_loss": 0.18221831321716309 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.6435182690620422, "learning_rate": 2.83648980772011e-06, "loss": 0.3567, "step": 654, "teacher_loss": 0.32485026121139526 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.5975439548492432, "learning_rate": 2.8408269480988868e-06, "loss": 0.2514, "step": 655, "teacher_loss": 0.2129727303981781 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 1.0947407484054565, "learning_rate": 2.8451640884776635e-06, "loss": 0.5505, "step": 656, "teacher_loss": 0.48999854922294617 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.48540788888931274, "learning_rate": 2.8495012288564407e-06, "loss": 0.3447, "step": 657, "teacher_loss": 0.32911601662635803 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.19058403372764587, "learning_rate": 2.853838369235218e-06, "loss": 0.2083, "step": 658, "teacher_loss": 0.21025574207305908 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.49009907245635986, "learning_rate": 2.8581755096139947e-06, "loss": 0.2939, "step": 659, "teacher_loss": 0.27211275696754456 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.44107699394226074, "learning_rate": 2.8625126499927715e-06, "loss": 0.3083, "step": 660, "teacher_loss": 0.29355186223983765 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.2993045151233673, "learning_rate": 2.8668497903715487e-06, "loss": 0.2565, "step": 661, "teacher_loss": 0.25171995162963867 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.3849508464336395, "learning_rate": 2.8711869307503255e-06, "loss": 0.2775, "step": 662, "teacher_loss": 0.26558274030685425 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.4832845628261566, "learning_rate": 2.8755240711291022e-06, "loss": 0.2213, "step": 663, "teacher_loss": 0.1921844184398651 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.21154847741127014, "learning_rate": 2.879861211507879e-06, "loss": 0.225, "step": 664, "teacher_loss": 0.22644278407096863 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.875374972820282, "learning_rate": 2.884198351886656e-06, "loss": 0.3541, "step": 665, "teacher_loss": 0.2961379289627075 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.26852554082870483, "learning_rate": 2.888535492265433e-06, "loss": 0.2428, "step": 666, "teacher_loss": 0.2399265021085739 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.9165135622024536, "learning_rate": 2.8928726326442098e-06, "loss": 0.3651, "step": 667, "teacher_loss": 0.3038022518157959 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.4079369008541107, "learning_rate": 2.897209773022987e-06, "loss": 0.2339, "step": 668, "teacher_loss": 0.21461129188537598 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.566765546798706, "learning_rate": 2.901546913401764e-06, "loss": 0.281, "step": 669, "teacher_loss": 0.2492278516292572 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.35190725326538086, "learning_rate": 2.905884053780541e-06, "loss": 0.201, "step": 670, "teacher_loss": 0.18422357738018036 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.3302437663078308, "learning_rate": 2.9102211941593177e-06, "loss": 0.2335, "step": 671, "teacher_loss": 0.222714364528656 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.4518120288848877, "learning_rate": 2.9145583345380945e-06, "loss": 0.2271, "step": 672, "teacher_loss": 0.20215407013893127 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.7353434562683105, "learning_rate": 2.9188954749168717e-06, "loss": 0.2333, "step": 673, "teacher_loss": 0.17753830552101135 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.2602654993534088, "learning_rate": 2.9232326152956484e-06, "loss": 0.2398, "step": 674, "teacher_loss": 0.2375141978263855 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.2193537950515747, "learning_rate": 2.9275697556744252e-06, "loss": 0.2044, "step": 675, "teacher_loss": 0.20269176363945007 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.3218291699886322, "learning_rate": 2.9319068960532024e-06, "loss": 0.2302, "step": 676, "teacher_loss": 0.2199709713459015 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.5002689957618713, "learning_rate": 2.936244036431979e-06, "loss": 0.4245, "step": 677, "teacher_loss": 0.41603514552116394 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.724240779876709, "learning_rate": 2.940581176810756e-06, "loss": 0.3331, "step": 678, "teacher_loss": 0.28959372639656067 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.8729182481765747, "learning_rate": 2.944918317189533e-06, "loss": 0.301, "step": 679, "teacher_loss": 0.23748816549777985 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.3842735290527344, "learning_rate": 2.9492554575683104e-06, "loss": 0.2296, "step": 680, "teacher_loss": 0.21238964796066284 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.529210090637207, "learning_rate": 2.953592597947087e-06, "loss": 0.2688, "step": 681, "teacher_loss": 0.2399199903011322 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.9025293588638306, "learning_rate": 2.957929738325864e-06, "loss": 0.4873, "step": 682, "teacher_loss": 0.4411899149417877 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.43346190452575684, "learning_rate": 2.9622668787046407e-06, "loss": 0.2425, "step": 683, "teacher_loss": 0.22132349014282227 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.615806519985199, "learning_rate": 2.966604019083418e-06, "loss": 0.2619, "step": 684, "teacher_loss": 0.2226225882768631 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.7495501041412354, "learning_rate": 2.9709411594621947e-06, "loss": 0.4868, "step": 685, "teacher_loss": 0.45756083726882935 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.22026360034942627, "learning_rate": 2.9752782998409714e-06, "loss": 0.3317, "step": 686, "teacher_loss": 0.34406578540802 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.32584649324417114, "learning_rate": 2.9796154402197482e-06, "loss": 0.2413, "step": 687, "teacher_loss": 0.23193639516830444 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.47305089235305786, "learning_rate": 2.9839525805985254e-06, "loss": 0.2498, "step": 688, "teacher_loss": 0.22497621178627014 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.4568033516407013, "learning_rate": 2.988289720977302e-06, "loss": 0.2522, "step": 689, "teacher_loss": 0.22951287031173706 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 1.03564453125, "learning_rate": 2.9926268613560794e-06, "loss": 0.3985, "step": 690, "teacher_loss": 0.3276674747467041 }, { "compression_loss": 0.0, "epoch": 0.12, "label_loss": 0.30183354020118713, "learning_rate": 2.9969640017348566e-06, "loss": 0.2005, "step": 691, "teacher_loss": 0.18919554352760315 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 1.2837392091751099, "learning_rate": 3.0013011421136334e-06, "loss": 0.6489, "step": 692, "teacher_loss": 0.5784119963645935 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.37290507555007935, "learning_rate": 3.00563828249241e-06, "loss": 0.2159, "step": 693, "teacher_loss": 0.19843712449073792 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.39942124485969543, "learning_rate": 3.009975422871187e-06, "loss": 0.2365, "step": 694, "teacher_loss": 0.21843993663787842 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.14800088107585907, "learning_rate": 3.014312563249964e-06, "loss": 0.1527, "step": 695, "teacher_loss": 0.15322959423065186 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.5956727266311646, "learning_rate": 3.018649703628741e-06, "loss": 0.2825, "step": 696, "teacher_loss": 0.24765954911708832 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.6925480365753174, "learning_rate": 3.0229868440075177e-06, "loss": 0.351, "step": 697, "teacher_loss": 0.31300532817840576 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.26216253638267517, "learning_rate": 3.0273239843862944e-06, "loss": 0.249, "step": 698, "teacher_loss": 0.24751925468444824 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.3048938512802124, "learning_rate": 3.0316611247650716e-06, "loss": 0.1754, "step": 699, "teacher_loss": 0.16102465987205505 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.45099547505378723, "learning_rate": 3.0359982651438484e-06, "loss": 0.3119, "step": 700, "teacher_loss": 0.2964308559894562 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.6762874126434326, "learning_rate": 3.0403354055226256e-06, "loss": 0.2286, "step": 701, "teacher_loss": 0.17884431779384613 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.24709640443325043, "learning_rate": 3.0446725459014024e-06, "loss": 0.1938, "step": 702, "teacher_loss": 0.18782320618629456 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.3207492530345917, "learning_rate": 3.0490096862801796e-06, "loss": 0.2962, "step": 703, "teacher_loss": 0.2934865653514862 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.23919939994812012, "learning_rate": 3.0533468266589564e-06, "loss": 0.183, "step": 704, "teacher_loss": 0.17681071162223816 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.3603303134441376, "learning_rate": 3.057683967037733e-06, "loss": 0.2886, "step": 705, "teacher_loss": 0.28064680099487305 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.5991061925888062, "learning_rate": 3.06202110741651e-06, "loss": 0.2929, "step": 706, "teacher_loss": 0.25882866978645325 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.5482435822486877, "learning_rate": 3.066358247795287e-06, "loss": 0.2489, "step": 707, "teacher_loss": 0.21561436355113983 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.5382323861122131, "learning_rate": 3.070695388174064e-06, "loss": 0.2916, "step": 708, "teacher_loss": 0.2642223834991455 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.2919009029865265, "learning_rate": 3.0750325285528407e-06, "loss": 0.2967, "step": 709, "teacher_loss": 0.2972421944141388 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.4450782239437103, "learning_rate": 3.079369668931618e-06, "loss": 0.2236, "step": 710, "teacher_loss": 0.19893765449523926 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.5930566787719727, "learning_rate": 3.0837068093103946e-06, "loss": 0.2892, "step": 711, "teacher_loss": 0.2554784417152405 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.18835720419883728, "learning_rate": 3.088043949689172e-06, "loss": 0.2581, "step": 712, "teacher_loss": 0.2658957242965698 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.906446099281311, "learning_rate": 3.0923810900679486e-06, "loss": 0.3265, "step": 713, "teacher_loss": 0.26210784912109375 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.6868412494659424, "learning_rate": 3.096718230446726e-06, "loss": 0.5075, "step": 714, "teacher_loss": 0.48762717843055725 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.7534281611442566, "learning_rate": 3.1010553708255026e-06, "loss": 0.3946, "step": 715, "teacher_loss": 0.35474893450737 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 1.2374625205993652, "learning_rate": 3.1053925112042794e-06, "loss": 0.5817, "step": 716, "teacher_loss": 0.5088511109352112 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.5261648893356323, "learning_rate": 3.109729651583056e-06, "loss": 0.2943, "step": 717, "teacher_loss": 0.2685818076133728 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.5450520515441895, "learning_rate": 3.1140667919618333e-06, "loss": 0.3611, "step": 718, "teacher_loss": 0.3406655788421631 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.6108411550521851, "learning_rate": 3.11840393234061e-06, "loss": 0.2455, "step": 719, "teacher_loss": 0.2048671692609787 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.13773435354232788, "learning_rate": 3.122741072719387e-06, "loss": 0.1687, "step": 720, "teacher_loss": 0.17214879393577576 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.3014611601829529, "learning_rate": 3.1270782130981637e-06, "loss": 0.2169, "step": 721, "teacher_loss": 0.2075299620628357 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.5451130270957947, "learning_rate": 3.131415353476941e-06, "loss": 0.4403, "step": 722, "teacher_loss": 0.42868685722351074 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.6346304416656494, "learning_rate": 3.135752493855718e-06, "loss": 0.283, "step": 723, "teacher_loss": 0.24390821158885956 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.25397324562072754, "learning_rate": 3.140089634234495e-06, "loss": 0.2028, "step": 724, "teacher_loss": 0.19710049033164978 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.8822857737541199, "learning_rate": 3.144426774613272e-06, "loss": 0.2763, "step": 725, "teacher_loss": 0.20893266797065735 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.37545570731163025, "learning_rate": 3.148763914992049e-06, "loss": 0.1806, "step": 726, "teacher_loss": 0.1590040773153305 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.3882199227809906, "learning_rate": 3.1531010553708256e-06, "loss": 0.2071, "step": 727, "teacher_loss": 0.18702653050422668 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.6223392486572266, "learning_rate": 3.1574381957496023e-06, "loss": 0.5601, "step": 728, "teacher_loss": 0.5531485676765442 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.2948831617832184, "learning_rate": 3.1617753361283795e-06, "loss": 0.2305, "step": 729, "teacher_loss": 0.2233971804380417 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.24321597814559937, "learning_rate": 3.1661124765071563e-06, "loss": 0.1904, "step": 730, "teacher_loss": 0.18450817465782166 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.28971239924430847, "learning_rate": 3.170449616885933e-06, "loss": 0.2374, "step": 731, "teacher_loss": 0.23156000673770905 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.3926895260810852, "learning_rate": 3.17478675726471e-06, "loss": 0.2545, "step": 732, "teacher_loss": 0.23916417360305786 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.24792593717575073, "learning_rate": 3.1791238976434875e-06, "loss": 0.2995, "step": 733, "teacher_loss": 0.3052656650543213 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.7794806957244873, "learning_rate": 3.1834610380222643e-06, "loss": 0.2642, "step": 734, "teacher_loss": 0.2069830298423767 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.6176691055297852, "learning_rate": 3.187798178401041e-06, "loss": 0.2906, "step": 735, "teacher_loss": 0.2542843222618103 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.5163308382034302, "learning_rate": 3.192135318779818e-06, "loss": 0.2322, "step": 736, "teacher_loss": 0.20063847303390503 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.35234588384628296, "learning_rate": 3.196472459158595e-06, "loss": 0.2627, "step": 737, "teacher_loss": 0.25275570154190063 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.5535691976547241, "learning_rate": 3.200809599537372e-06, "loss": 0.2693, "step": 738, "teacher_loss": 0.2376859188079834 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.4028955399990082, "learning_rate": 3.2051467399161486e-06, "loss": 0.2317, "step": 739, "teacher_loss": 0.21270275115966797 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.1146426647901535, "learning_rate": 3.2094838802949258e-06, "loss": 0.3064, "step": 740, "teacher_loss": 0.32768210768699646 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.49734219908714294, "learning_rate": 3.2138210206737025e-06, "loss": 0.425, "step": 741, "teacher_loss": 0.4169650077819824 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.8750267028808594, "learning_rate": 3.2181581610524793e-06, "loss": 0.4349, "step": 742, "teacher_loss": 0.3860322833061218 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.8160274028778076, "learning_rate": 3.222495301431256e-06, "loss": 0.4751, "step": 743, "teacher_loss": 0.4372653365135193 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.562263011932373, "learning_rate": 3.2268324418100337e-06, "loss": 0.255, "step": 744, "teacher_loss": 0.22087424993515015 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.670231819152832, "learning_rate": 3.2311695821888105e-06, "loss": 0.3416, "step": 745, "teacher_loss": 0.30503684282302856 }, { "compression_loss": 0.0, "epoch": 0.13, "label_loss": 0.23462998867034912, "learning_rate": 3.2355067225675873e-06, "loss": 0.2661, "step": 746, "teacher_loss": 0.26962926983833313 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.3502556383609772, "learning_rate": 3.239843862946364e-06, "loss": 0.1967, "step": 747, "teacher_loss": 0.17964071035385132 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.3415452837944031, "learning_rate": 3.2441810033251412e-06, "loss": 0.2643, "step": 748, "teacher_loss": 0.2556981146335602 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.47424471378326416, "learning_rate": 3.248518143703918e-06, "loss": 0.2529, "step": 749, "teacher_loss": 0.228349506855011 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.25609880685806274, "learning_rate": 3.2528552840826948e-06, "loss": 0.247, "step": 750, "teacher_loss": 0.24598433077335358 }, { "epoch": 0.14, "eval_exact_match": 79.7445600756859, "eval_f1": 87.14584125635479, "step": 750 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.3531723618507385, "learning_rate": 3.2571924244614716e-06, "loss": 0.2439, "step": 751, "teacher_loss": 0.2317531853914261 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.3110284209251404, "learning_rate": 3.2615295648402488e-06, "loss": 0.2328, "step": 752, "teacher_loss": 0.22415274381637573 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.42995554208755493, "learning_rate": 3.2658667052190255e-06, "loss": 0.2421, "step": 753, "teacher_loss": 0.22127076983451843 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 1.3285260200500488, "learning_rate": 3.2702038455978023e-06, "loss": 0.3176, "step": 754, "teacher_loss": 0.2053188681602478 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.7816797494888306, "learning_rate": 3.27454098597658e-06, "loss": 0.3153, "step": 755, "teacher_loss": 0.2635067105293274 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5141528844833374, "learning_rate": 3.2788781263553567e-06, "loss": 0.3618, "step": 756, "teacher_loss": 0.34491318464279175 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.9236624240875244, "learning_rate": 3.2832152667341335e-06, "loss": 0.3505, "step": 757, "teacher_loss": 0.2868138551712036 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.6472077369689941, "learning_rate": 3.2875524071129103e-06, "loss": 0.2557, "step": 758, "teacher_loss": 0.2122364640235901 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.6403120756149292, "learning_rate": 3.2918895474916875e-06, "loss": 0.2963, "step": 759, "teacher_loss": 0.2581288516521454 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5622844696044922, "learning_rate": 3.2962266878704642e-06, "loss": 0.33, "step": 760, "teacher_loss": 0.3041532635688782 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5142181515693665, "learning_rate": 3.300563828249241e-06, "loss": 0.3063, "step": 761, "teacher_loss": 0.28316062688827515 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.22890210151672363, "learning_rate": 3.3049009686280178e-06, "loss": 0.3462, "step": 762, "teacher_loss": 0.35924601554870605 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.3845955431461334, "learning_rate": 3.309238109006795e-06, "loss": 0.3149, "step": 763, "teacher_loss": 0.3071707487106323 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.839165210723877, "learning_rate": 3.3135752493855718e-06, "loss": 0.3819, "step": 764, "teacher_loss": 0.33108288049697876 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.38261520862579346, "learning_rate": 3.3179123897643485e-06, "loss": 0.201, "step": 765, "teacher_loss": 0.18077123165130615 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.24388937652111053, "learning_rate": 3.3222495301431257e-06, "loss": 0.1753, "step": 766, "teacher_loss": 0.16766047477722168 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5354130268096924, "learning_rate": 3.326586670521903e-06, "loss": 0.2528, "step": 767, "teacher_loss": 0.22134891152381897 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5595734715461731, "learning_rate": 3.3309238109006797e-06, "loss": 0.2817, "step": 768, "teacher_loss": 0.2508693337440491 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.1843225210905075, "learning_rate": 3.3352609512794565e-06, "loss": 0.224, "step": 769, "teacher_loss": 0.22846084833145142 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.34531301259994507, "learning_rate": 3.3395980916582333e-06, "loss": 0.3044, "step": 770, "teacher_loss": 0.2998705506324768 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.3853307366371155, "learning_rate": 3.3439352320370104e-06, "loss": 0.3218, "step": 771, "teacher_loss": 0.31475961208343506 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.32597827911376953, "learning_rate": 3.3482723724157872e-06, "loss": 0.2461, "step": 772, "teacher_loss": 0.23727241158485413 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.48131901025772095, "learning_rate": 3.352609512794564e-06, "loss": 0.2197, "step": 773, "teacher_loss": 0.19060340523719788 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.9750180244445801, "learning_rate": 3.356946653173341e-06, "loss": 0.3523, "step": 774, "teacher_loss": 0.28305715322494507 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5041630268096924, "learning_rate": 3.361283793552118e-06, "loss": 0.3727, "step": 775, "teacher_loss": 0.3580412268638611 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5291510820388794, "learning_rate": 3.3656209339308947e-06, "loss": 0.3608, "step": 776, "teacher_loss": 0.34212982654571533 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5057750940322876, "learning_rate": 3.369958074309672e-06, "loss": 0.3006, "step": 777, "teacher_loss": 0.2778407633304596 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.3278176784515381, "learning_rate": 3.374295214688449e-06, "loss": 0.3175, "step": 778, "teacher_loss": 0.3163798451423645 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.27269527316093445, "learning_rate": 3.378632355067226e-06, "loss": 0.2248, "step": 779, "teacher_loss": 0.21945229172706604 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.4998913109302521, "learning_rate": 3.3829694954460027e-06, "loss": 0.3142, "step": 780, "teacher_loss": 0.293518602848053 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.15515512228012085, "learning_rate": 3.3873066358247795e-06, "loss": 0.2447, "step": 781, "teacher_loss": 0.25470009446144104 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.6261863708496094, "learning_rate": 3.3916437762035567e-06, "loss": 0.257, "step": 782, "teacher_loss": 0.21595072746276855 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.46851101517677307, "learning_rate": 3.3959809165823334e-06, "loss": 0.1975, "step": 783, "teacher_loss": 0.16738788783550262 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5671685934066772, "learning_rate": 3.4003180569611102e-06, "loss": 0.3325, "step": 784, "teacher_loss": 0.30642879009246826 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.3607160151004791, "learning_rate": 3.404655197339887e-06, "loss": 0.2779, "step": 785, "teacher_loss": 0.26870208978652954 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.2685242295265198, "learning_rate": 3.408992337718664e-06, "loss": 0.4174, "step": 786, "teacher_loss": 0.43390893936157227 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5977778434753418, "learning_rate": 3.413329478097441e-06, "loss": 0.3765, "step": 787, "teacher_loss": 0.35194242000579834 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.49139028787612915, "learning_rate": 3.417666618476218e-06, "loss": 0.2706, "step": 788, "teacher_loss": 0.24603916704654694 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.3657628297805786, "learning_rate": 3.4220037588549954e-06, "loss": 0.2407, "step": 789, "teacher_loss": 0.22684475779533386 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.4599382281303406, "learning_rate": 3.426340899233772e-06, "loss": 0.3181, "step": 790, "teacher_loss": 0.3022891879081726 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.41153550148010254, "learning_rate": 3.430678039612549e-06, "loss": 0.2435, "step": 791, "teacher_loss": 0.2247842252254486 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5343309640884399, "learning_rate": 3.4350151799913257e-06, "loss": 0.315, "step": 792, "teacher_loss": 0.2906323969364166 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.35651153326034546, "learning_rate": 3.439352320370103e-06, "loss": 0.2195, "step": 793, "teacher_loss": 0.20424365997314453 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.38014569878578186, "learning_rate": 3.4436894607488797e-06, "loss": 0.2973, "step": 794, "teacher_loss": 0.2881115972995758 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5738981366157532, "learning_rate": 3.4480266011276564e-06, "loss": 0.2879, "step": 795, "teacher_loss": 0.25608059763908386 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.5806160569190979, "learning_rate": 3.4523637415064332e-06, "loss": 0.306, "step": 796, "teacher_loss": 0.2754618525505066 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.4372144937515259, "learning_rate": 3.4567008818852104e-06, "loss": 0.2724, "step": 797, "teacher_loss": 0.254102885723114 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.354651540517807, "learning_rate": 3.461038022263987e-06, "loss": 0.2546, "step": 798, "teacher_loss": 0.24346190690994263 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.6520552039146423, "learning_rate": 3.4653751626427644e-06, "loss": 0.2781, "step": 799, "teacher_loss": 0.23655402660369873 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.4892185628414154, "learning_rate": 3.469712303021541e-06, "loss": 0.3559, "step": 800, "teacher_loss": 0.34104424715042114 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 0.4080970287322998, "learning_rate": 3.4740494434003184e-06, "loss": 0.1973, "step": 801, "teacher_loss": 0.17385894060134888 }, { "compression_loss": 0.0, "epoch": 0.14, "label_loss": 1.0153800249099731, "learning_rate": 3.478386583779095e-06, "loss": 0.3055, "step": 802, "teacher_loss": 0.22665008902549744 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.5426517724990845, "learning_rate": 3.482723724157872e-06, "loss": 0.283, "step": 803, "teacher_loss": 0.2541946470737457 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 1.054621934890747, "learning_rate": 3.487060864536649e-06, "loss": 0.313, "step": 804, "teacher_loss": 0.23054583370685577 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.4915931224822998, "learning_rate": 3.491398004915426e-06, "loss": 0.3753, "step": 805, "teacher_loss": 0.36235424876213074 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.661030650138855, "learning_rate": 3.4957351452942027e-06, "loss": 0.3455, "step": 806, "teacher_loss": 0.31044602394104004 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.23664268851280212, "learning_rate": 3.5000722856729794e-06, "loss": 0.2015, "step": 807, "teacher_loss": 0.19761332869529724 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.6139421463012695, "learning_rate": 3.5044094260517566e-06, "loss": 0.439, "step": 808, "teacher_loss": 0.4196016788482666 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.26901984214782715, "learning_rate": 3.508746566430534e-06, "loss": 0.2893, "step": 809, "teacher_loss": 0.2915344834327698 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.7388201951980591, "learning_rate": 3.5130837068093106e-06, "loss": 0.3154, "step": 810, "teacher_loss": 0.26829952001571655 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.4415381848812103, "learning_rate": 3.5174208471880874e-06, "loss": 0.2223, "step": 811, "teacher_loss": 0.19795460999011993 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.32499265670776367, "learning_rate": 3.5217579875668646e-06, "loss": 0.3234, "step": 812, "teacher_loss": 0.3232687711715698 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.2048925906419754, "learning_rate": 3.5260951279456414e-06, "loss": 0.1826, "step": 813, "teacher_loss": 0.18011921644210815 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.18259766697883606, "learning_rate": 3.530432268324418e-06, "loss": 0.2178, "step": 814, "teacher_loss": 0.22171109914779663 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.4891200661659241, "learning_rate": 3.534769408703195e-06, "loss": 0.3313, "step": 815, "teacher_loss": 0.3137642741203308 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.7843291759490967, "learning_rate": 3.539106549081972e-06, "loss": 0.2809, "step": 816, "teacher_loss": 0.22496187686920166 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.28351259231567383, "learning_rate": 3.543443689460749e-06, "loss": 0.1745, "step": 817, "teacher_loss": 0.1624288558959961 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.23178696632385254, "learning_rate": 3.5477808298395257e-06, "loss": 0.1979, "step": 818, "teacher_loss": 0.194082111120224 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.3313000202178955, "learning_rate": 3.552117970218303e-06, "loss": 0.2733, "step": 819, "teacher_loss": 0.26689624786376953 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.22444593906402588, "learning_rate": 3.55645511059708e-06, "loss": 0.2739, "step": 820, "teacher_loss": 0.27940744161605835 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.7567064166069031, "learning_rate": 3.560792250975857e-06, "loss": 0.2502, "step": 821, "teacher_loss": 0.19388672709465027 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.46008479595184326, "learning_rate": 3.5651293913546336e-06, "loss": 0.2726, "step": 822, "teacher_loss": 0.2518126964569092 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.30962055921554565, "learning_rate": 3.569466531733411e-06, "loss": 0.2582, "step": 823, "teacher_loss": 0.2525278329849243 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.20213857293128967, "learning_rate": 3.5738036721121876e-06, "loss": 0.1983, "step": 824, "teacher_loss": 0.19783489406108856 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.5922117233276367, "learning_rate": 3.5781408124909643e-06, "loss": 0.2429, "step": 825, "teacher_loss": 0.2040407508611679 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.4371291399002075, "learning_rate": 3.582477952869741e-06, "loss": 0.1662, "step": 826, "teacher_loss": 0.13614767789840698 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.35752737522125244, "learning_rate": 3.5868150932485183e-06, "loss": 0.2022, "step": 827, "teacher_loss": 0.18489113450050354 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.35045942664146423, "learning_rate": 3.591152233627295e-06, "loss": 0.2335, "step": 828, "teacher_loss": 0.2205122709274292 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.6536198258399963, "learning_rate": 3.595489374006072e-06, "loss": 0.334, "step": 829, "teacher_loss": 0.29846251010894775 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.4388755261898041, "learning_rate": 3.5998265143848486e-06, "loss": 0.4085, "step": 830, "teacher_loss": 0.40517014265060425 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.38399872183799744, "learning_rate": 3.6041636547636263e-06, "loss": 0.2304, "step": 831, "teacher_loss": 0.21332237124443054 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.3681285083293915, "learning_rate": 3.608500795142403e-06, "loss": 0.2265, "step": 832, "teacher_loss": 0.21076641976833344 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.3287276327610016, "learning_rate": 3.61283793552118e-06, "loss": 0.204, "step": 833, "teacher_loss": 0.19010718166828156 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.29119449853897095, "learning_rate": 3.6171750758999566e-06, "loss": 0.2318, "step": 834, "teacher_loss": 0.2252272218465805 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.32534223794937134, "learning_rate": 3.621512216278734e-06, "loss": 0.216, "step": 835, "teacher_loss": 0.20379836857318878 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.3137243986129761, "learning_rate": 3.6258493566575106e-06, "loss": 0.2288, "step": 836, "teacher_loss": 0.21933528780937195 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.4292237162590027, "learning_rate": 3.6301864970362873e-06, "loss": 0.2345, "step": 837, "teacher_loss": 0.2128177285194397 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.471017062664032, "learning_rate": 3.6345236374150645e-06, "loss": 0.2339, "step": 838, "teacher_loss": 0.20754003524780273 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.4699787199497223, "learning_rate": 3.6388607777938413e-06, "loss": 0.3017, "step": 839, "teacher_loss": 0.28298401832580566 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.4896871745586395, "learning_rate": 3.643197918172618e-06, "loss": 0.3399, "step": 840, "teacher_loss": 0.32330477237701416 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.5529024004936218, "learning_rate": 3.647535058551395e-06, "loss": 0.2489, "step": 841, "teacher_loss": 0.21517637372016907 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.6582509875297546, "learning_rate": 3.6518721989301725e-06, "loss": 0.3614, "step": 842, "teacher_loss": 0.32845115661621094 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.3299567401409149, "learning_rate": 3.6562093393089493e-06, "loss": 0.2812, "step": 843, "teacher_loss": 0.2757299244403839 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.48203617334365845, "learning_rate": 3.660546479687726e-06, "loss": 0.2247, "step": 844, "teacher_loss": 0.1960657238960266 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.16762351989746094, "learning_rate": 3.664883620066503e-06, "loss": 0.2199, "step": 845, "teacher_loss": 0.22566986083984375 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.22216686606407166, "learning_rate": 3.66922076044528e-06, "loss": 0.2187, "step": 846, "teacher_loss": 0.21833837032318115 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.13990309834480286, "learning_rate": 3.6735579008240568e-06, "loss": 0.1713, "step": 847, "teacher_loss": 0.1747407615184784 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.3876246213912964, "learning_rate": 3.6778950412028336e-06, "loss": 0.2496, "step": 848, "teacher_loss": 0.23422543704509735 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.5054432153701782, "learning_rate": 3.6822321815816103e-06, "loss": 0.2898, "step": 849, "teacher_loss": 0.26583167910575867 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.6304320096969604, "learning_rate": 3.6865693219603875e-06, "loss": 0.4114, "step": 850, "teacher_loss": 0.38704848289489746 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.5840238332748413, "learning_rate": 3.6909064623391643e-06, "loss": 0.2817, "step": 851, "teacher_loss": 0.24810011684894562 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.365255206823349, "learning_rate": 3.695243602717941e-06, "loss": 0.2353, "step": 852, "teacher_loss": 0.2208411544561386 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.4353649616241455, "learning_rate": 3.6995807430967187e-06, "loss": 0.2637, "step": 853, "teacher_loss": 0.244678795337677 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.36997756361961365, "learning_rate": 3.7039178834754955e-06, "loss": 0.2801, "step": 854, "teacher_loss": 0.2701045870780945 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.7335597276687622, "learning_rate": 3.7082550238542723e-06, "loss": 0.3561, "step": 855, "teacher_loss": 0.3141207993030548 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.41204988956451416, "learning_rate": 3.712592164233049e-06, "loss": 0.1982, "step": 856, "teacher_loss": 0.17448459565639496 }, { "compression_loss": 0.0, "epoch": 0.15, "label_loss": 0.5471560955047607, "learning_rate": 3.7169293046118262e-06, "loss": 0.2551, "step": 857, "teacher_loss": 0.22263610363006592 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.37322646379470825, "learning_rate": 3.721266444990603e-06, "loss": 0.3297, "step": 858, "teacher_loss": 0.32486236095428467 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.32819563150405884, "learning_rate": 3.7256035853693798e-06, "loss": 0.2302, "step": 859, "teacher_loss": 0.21925684809684753 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.7161264419555664, "learning_rate": 3.7299407257481566e-06, "loss": 0.2541, "step": 860, "teacher_loss": 0.20278343558311462 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.20098334550857544, "learning_rate": 3.7342778661269338e-06, "loss": 0.2012, "step": 861, "teacher_loss": 0.20126613974571228 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.5543959140777588, "learning_rate": 3.7386150065057105e-06, "loss": 0.3197, "step": 862, "teacher_loss": 0.2936192452907562 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.7394939661026001, "learning_rate": 3.7429521468844873e-06, "loss": 0.2605, "step": 863, "teacher_loss": 0.20730775594711304 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.5929688811302185, "learning_rate": 3.7472892872632645e-06, "loss": 0.3508, "step": 864, "teacher_loss": 0.3238885998725891 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.4986763596534729, "learning_rate": 3.7516264276420413e-06, "loss": 0.2952, "step": 865, "teacher_loss": 0.2726404070854187 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.62636798620224, "learning_rate": 3.7559635680208185e-06, "loss": 0.2636, "step": 866, "teacher_loss": 0.22326195240020752 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.33991825580596924, "learning_rate": 3.760300708399595e-06, "loss": 0.2213, "step": 867, "teacher_loss": 0.2080913633108139 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.18533429503440857, "learning_rate": 3.7646378487783725e-06, "loss": 0.2037, "step": 868, "teacher_loss": 0.20570705831050873 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.32129088044166565, "learning_rate": 3.7689749891571497e-06, "loss": 0.2381, "step": 869, "teacher_loss": 0.2288992702960968 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.4956677556037903, "learning_rate": 3.773312129535926e-06, "loss": 0.202, "step": 870, "teacher_loss": 0.16936850547790527 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.16438832879066467, "learning_rate": 3.777649269914703e-06, "loss": 0.2944, "step": 871, "teacher_loss": 0.30880677700042725 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.2171880602836609, "learning_rate": 3.78198641029348e-06, "loss": 0.206, "step": 872, "teacher_loss": 0.2047433853149414 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.43466800451278687, "learning_rate": 3.786323550672257e-06, "loss": 0.3843, "step": 873, "teacher_loss": 0.37875521183013916 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.35497087240219116, "learning_rate": 3.7906606910510335e-06, "loss": 0.23, "step": 874, "teacher_loss": 0.21608425676822662 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.3560202717781067, "learning_rate": 3.7949978314298107e-06, "loss": 0.2734, "step": 875, "teacher_loss": 0.2641996741294861 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.38263845443725586, "learning_rate": 3.7993349718085875e-06, "loss": 0.2777, "step": 876, "teacher_loss": 0.26604828238487244 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.8546239137649536, "learning_rate": 3.8036721121873647e-06, "loss": 0.3397, "step": 877, "teacher_loss": 0.28249073028564453 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.643721342086792, "learning_rate": 3.808009252566141e-06, "loss": 0.4905, "step": 878, "teacher_loss": 0.47351616621017456 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.9456126689910889, "learning_rate": 3.8123463929449182e-06, "loss": 0.3847, "step": 879, "teacher_loss": 0.32232964038848877 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.3652101159095764, "learning_rate": 3.816683533323696e-06, "loss": 0.2347, "step": 880, "teacher_loss": 0.22023239731788635 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.42043590545654297, "learning_rate": 3.821020673702472e-06, "loss": 0.2241, "step": 881, "teacher_loss": 0.20229294896125793 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.7193540930747986, "learning_rate": 3.825357814081249e-06, "loss": 0.3352, "step": 882, "teacher_loss": 0.2925070524215698 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.32793840765953064, "learning_rate": 3.829694954460026e-06, "loss": 0.25, "step": 883, "teacher_loss": 0.24136817455291748 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.41589248180389404, "learning_rate": 3.834032094838803e-06, "loss": 0.3227, "step": 884, "teacher_loss": 0.31234976649284363 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.7969814538955688, "learning_rate": 3.83836923521758e-06, "loss": 0.4113, "step": 885, "teacher_loss": 0.3685019910335541 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.29240015149116516, "learning_rate": 3.842706375596357e-06, "loss": 0.1815, "step": 886, "teacher_loss": 0.16915994882583618 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.2281486988067627, "learning_rate": 3.847043515975133e-06, "loss": 0.2525, "step": 887, "teacher_loss": 0.2552439868450165 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.3892339766025543, "learning_rate": 3.851380656353911e-06, "loss": 0.2172, "step": 888, "teacher_loss": 0.1981118619441986 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.7245720624923706, "learning_rate": 3.855717796732688e-06, "loss": 0.3159, "step": 889, "teacher_loss": 0.2705182433128357 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.47479361295700073, "learning_rate": 3.8600549371114645e-06, "loss": 0.2649, "step": 890, "teacher_loss": 0.24153810739517212 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.6211075186729431, "learning_rate": 3.864392077490242e-06, "loss": 0.2258, "step": 891, "teacher_loss": 0.18184706568717957 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.2326255738735199, "learning_rate": 3.868729217869018e-06, "loss": 0.2311, "step": 892, "teacher_loss": 0.2309640347957611 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.48016250133514404, "learning_rate": 3.873066358247796e-06, "loss": 0.2291, "step": 893, "teacher_loss": 0.2012297809123993 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.6621298789978027, "learning_rate": 3.877403498626572e-06, "loss": 0.3372, "step": 894, "teacher_loss": 0.30114662647247314 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.5290963649749756, "learning_rate": 3.881740639005349e-06, "loss": 0.3426, "step": 895, "teacher_loss": 0.32189279794692993 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 1.1647858619689941, "learning_rate": 3.886077779384126e-06, "loss": 0.6473, "step": 896, "teacher_loss": 0.5897899270057678 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.25802403688430786, "learning_rate": 3.890414919762904e-06, "loss": 0.2678, "step": 897, "teacher_loss": 0.2688629627227783 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.6035110950469971, "learning_rate": 3.8947520601416795e-06, "loss": 0.303, "step": 898, "teacher_loss": 0.2696284353733063 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.2295677214860916, "learning_rate": 3.899089200520457e-06, "loss": 0.2082, "step": 899, "teacher_loss": 0.2058761715888977 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.12190845608711243, "learning_rate": 3.903426340899234e-06, "loss": 0.1636, "step": 900, "teacher_loss": 0.1681968867778778 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.3050462603569031, "learning_rate": 3.907763481278011e-06, "loss": 0.268, "step": 901, "teacher_loss": 0.26387637853622437 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.7868713140487671, "learning_rate": 3.912100621656788e-06, "loss": 0.4171, "step": 902, "teacher_loss": 0.37606674432754517 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.30534982681274414, "learning_rate": 3.916437762035564e-06, "loss": 0.2834, "step": 903, "teacher_loss": 0.28098100423812866 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.42476966977119446, "learning_rate": 3.920774902414342e-06, "loss": 0.27, "step": 904, "teacher_loss": 0.25281471014022827 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.5270523428916931, "learning_rate": 3.925112042793119e-06, "loss": 0.2844, "step": 905, "teacher_loss": 0.25741684436798096 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.7185977697372437, "learning_rate": 3.929449183171895e-06, "loss": 0.2836, "step": 906, "teacher_loss": 0.23525749146938324 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.36288803815841675, "learning_rate": 3.933786323550672e-06, "loss": 0.3229, "step": 907, "teacher_loss": 0.3184909224510193 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.2687336206436157, "learning_rate": 3.93812346392945e-06, "loss": 0.2188, "step": 908, "teacher_loss": 0.21328972280025482 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.41602492332458496, "learning_rate": 3.942460604308226e-06, "loss": 0.2366, "step": 909, "teacher_loss": 0.2166227549314499 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.6163256168365479, "learning_rate": 3.946797744687003e-06, "loss": 0.317, "step": 910, "teacher_loss": 0.2836914360523224 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.5708070993423462, "learning_rate": 3.951134885065779e-06, "loss": 0.2507, "step": 911, "teacher_loss": 0.21518346667289734 }, { "compression_loss": 0.0, "epoch": 0.16, "label_loss": 0.4501948058605194, "learning_rate": 3.955472025444557e-06, "loss": 0.2577, "step": 912, "teacher_loss": 0.2363569736480713 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.3216378092765808, "learning_rate": 3.9598091658233345e-06, "loss": 0.2317, "step": 913, "teacher_loss": 0.22169330716133118 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.25488758087158203, "learning_rate": 3.9641463062021105e-06, "loss": 0.2633, "step": 914, "teacher_loss": 0.26423513889312744 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.4387619197368622, "learning_rate": 3.968483446580888e-06, "loss": 0.2419, "step": 915, "teacher_loss": 0.21997150778770447 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.5765150785446167, "learning_rate": 3.972820586959665e-06, "loss": 0.3512, "step": 916, "teacher_loss": 0.3261568546295166 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.27555862069129944, "learning_rate": 3.977157727338442e-06, "loss": 0.2252, "step": 917, "teacher_loss": 0.2195887714624405 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.4694218933582306, "learning_rate": 3.981494867717218e-06, "loss": 0.3226, "step": 918, "teacher_loss": 0.30624938011169434 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.3344815969467163, "learning_rate": 3.985832008095996e-06, "loss": 0.2241, "step": 919, "teacher_loss": 0.21181178092956543 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.348363995552063, "learning_rate": 3.990169148474772e-06, "loss": 0.1689, "step": 920, "teacher_loss": 0.14895710349082947 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.8943019509315491, "learning_rate": 3.9945062888535496e-06, "loss": 0.2893, "step": 921, "teacher_loss": 0.22203224897384644 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.4587143063545227, "learning_rate": 3.9988434292323255e-06, "loss": 0.2592, "step": 922, "teacher_loss": 0.23705679178237915 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.7997756600379944, "learning_rate": 4.003180569611103e-06, "loss": 0.2722, "step": 923, "teacher_loss": 0.21352726221084595 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.40306591987609863, "learning_rate": 4.007517709989881e-06, "loss": 0.2787, "step": 924, "teacher_loss": 0.26491859555244446 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.4491243362426758, "learning_rate": 4.011854850368657e-06, "loss": 0.3019, "step": 925, "teacher_loss": 0.2855908274650574 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.27633893489837646, "learning_rate": 4.016191990747434e-06, "loss": 0.3247, "step": 926, "teacher_loss": 0.33006787300109863 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.21397623419761658, "learning_rate": 4.020529131126211e-06, "loss": 0.2027, "step": 927, "teacher_loss": 0.2014533281326294 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.2860625088214874, "learning_rate": 4.024866271504988e-06, "loss": 0.1887, "step": 928, "teacher_loss": 0.17782723903656006 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.27117055654525757, "learning_rate": 4.029203411883765e-06, "loss": 0.378, "step": 929, "teacher_loss": 0.3898550271987915 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.68213951587677, "learning_rate": 4.033540552262541e-06, "loss": 0.3452, "step": 930, "teacher_loss": 0.3077096939086914 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.4420263171195984, "learning_rate": 4.037877692641318e-06, "loss": 0.2725, "step": 931, "teacher_loss": 0.25371670722961426 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.6770023107528687, "learning_rate": 4.042214833020096e-06, "loss": 0.2602, "step": 932, "teacher_loss": 0.21393685042858124 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.6111568212509155, "learning_rate": 4.046551973398872e-06, "loss": 0.3414, "step": 933, "teacher_loss": 0.3114704191684723 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.38260430097579956, "learning_rate": 4.050889113777649e-06, "loss": 0.3422, "step": 934, "teacher_loss": 0.3376636207103729 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.3411727845668793, "learning_rate": 4.055226254156427e-06, "loss": 0.2725, "step": 935, "teacher_loss": 0.2648276090621948 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.4407883286476135, "learning_rate": 4.059563394535203e-06, "loss": 0.2447, "step": 936, "teacher_loss": 0.22286444902420044 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.4329412579536438, "learning_rate": 4.0639005349139805e-06, "loss": 0.2539, "step": 937, "teacher_loss": 0.23398561775684357 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.3359575569629669, "learning_rate": 4.068237675292757e-06, "loss": 0.2118, "step": 938, "teacher_loss": 0.19796612858772278 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.2284258008003235, "learning_rate": 4.072574815671534e-06, "loss": 0.2204, "step": 939, "teacher_loss": 0.21945300698280334 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.3847157955169678, "learning_rate": 4.076911956050311e-06, "loss": 0.3722, "step": 940, "teacher_loss": 0.3708217740058899 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.4290144145488739, "learning_rate": 4.081249096429088e-06, "loss": 0.2433, "step": 941, "teacher_loss": 0.2226441204547882 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.3952246308326721, "learning_rate": 4.085586236807864e-06, "loss": 0.2295, "step": 942, "teacher_loss": 0.2111111581325531 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.38891148567199707, "learning_rate": 4.089923377186642e-06, "loss": 0.2309, "step": 943, "teacher_loss": 0.213353231549263 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.41349172592163086, "learning_rate": 4.094260517565418e-06, "loss": 0.1751, "step": 944, "teacher_loss": 0.14863823354244232 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.3463779091835022, "learning_rate": 4.0985976579441956e-06, "loss": 0.235, "step": 945, "teacher_loss": 0.22262157499790192 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.4911763072013855, "learning_rate": 4.102934798322973e-06, "loss": 0.2219, "step": 946, "teacher_loss": 0.19197635352611542 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.1033777967095375, "learning_rate": 4.107271938701749e-06, "loss": 0.1856, "step": 947, "teacher_loss": 0.19476903975009918 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.43637940287590027, "learning_rate": 4.111609079080527e-06, "loss": 0.2801, "step": 948, "teacher_loss": 0.26268404722213745 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.5513020157814026, "learning_rate": 4.1159462194593035e-06, "loss": 0.2252, "step": 949, "teacher_loss": 0.18901792168617249 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 1.0793309211730957, "learning_rate": 4.12028335983808e-06, "loss": 0.3689, "step": 950, "teacher_loss": 0.28991031646728516 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.2533317506313324, "learning_rate": 4.124620500216857e-06, "loss": 0.1982, "step": 951, "teacher_loss": 0.1921074092388153 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.7881702184677124, "learning_rate": 4.128957640595634e-06, "loss": 0.3531, "step": 952, "teacher_loss": 0.304771363735199 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.5719931125640869, "learning_rate": 4.133294780974411e-06, "loss": 0.2478, "step": 953, "teacher_loss": 0.2117629051208496 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.2875203490257263, "learning_rate": 4.137631921353188e-06, "loss": 0.2829, "step": 954, "teacher_loss": 0.2823842465877533 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.36733829975128174, "learning_rate": 4.141969061731964e-06, "loss": 0.3096, "step": 955, "teacher_loss": 0.30319488048553467 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.4917277693748474, "learning_rate": 4.146306202110742e-06, "loss": 0.2711, "step": 956, "teacher_loss": 0.2465406060218811 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.246895432472229, "learning_rate": 4.150643342489519e-06, "loss": 0.1765, "step": 957, "teacher_loss": 0.16867899894714355 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.47302472591400146, "learning_rate": 4.154980482868295e-06, "loss": 0.244, "step": 958, "teacher_loss": 0.21852673590183258 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.3434694707393646, "learning_rate": 4.159317623247073e-06, "loss": 0.2134, "step": 959, "teacher_loss": 0.1989946961402893 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.18457627296447754, "learning_rate": 4.163654763625849e-06, "loss": 0.285, "step": 960, "teacher_loss": 0.29612693190574646 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.5879428386688232, "learning_rate": 4.1679919040046265e-06, "loss": 0.3018, "step": 961, "teacher_loss": 0.2700411081314087 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.1663154661655426, "learning_rate": 4.172329044383403e-06, "loss": 0.3135, "step": 962, "teacher_loss": 0.32984256744384766 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.7105588912963867, "learning_rate": 4.17666618476218e-06, "loss": 0.2601, "step": 963, "teacher_loss": 0.2100663185119629 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.299935519695282, "learning_rate": 4.181003325140957e-06, "loss": 0.2596, "step": 964, "teacher_loss": 0.25516477227211 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.5807290077209473, "learning_rate": 4.1853404655197345e-06, "loss": 0.3152, "step": 965, "teacher_loss": 0.2857227921485901 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.3001972436904907, "learning_rate": 4.18967760589851e-06, "loss": 0.2347, "step": 966, "teacher_loss": 0.22739841043949127 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.30118033289909363, "learning_rate": 4.194014746277288e-06, "loss": 0.4572, "step": 967, "teacher_loss": 0.47455668449401855 }, { "compression_loss": 0.0, "epoch": 0.17, "label_loss": 0.5492435097694397, "learning_rate": 4.198351886656066e-06, "loss": 0.2427, "step": 968, "teacher_loss": 0.20863686501979828 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.21092335879802704, "learning_rate": 4.2026890270348416e-06, "loss": 0.2932, "step": 969, "teacher_loss": 0.3023689091205597 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.5165823698043823, "learning_rate": 4.207026167413619e-06, "loss": 0.2737, "step": 970, "teacher_loss": 0.24669763445854187 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.46799975633621216, "learning_rate": 4.211363307792395e-06, "loss": 0.2835, "step": 971, "teacher_loss": 0.2630111873149872 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.29865220189094543, "learning_rate": 4.215700448171173e-06, "loss": 0.305, "step": 972, "teacher_loss": 0.30571699142456055 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.5653126835823059, "learning_rate": 4.2200375885499495e-06, "loss": 0.3436, "step": 973, "teacher_loss": 0.3189849853515625 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.21941082179546356, "learning_rate": 4.224374728928726e-06, "loss": 0.2397, "step": 974, "teacher_loss": 0.2419101595878601 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.47118890285491943, "learning_rate": 4.228711869307503e-06, "loss": 0.253, "step": 975, "teacher_loss": 0.22870707511901855 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.3474152982234955, "learning_rate": 4.233049009686281e-06, "loss": 0.1994, "step": 976, "teacher_loss": 0.18298810720443726 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.36741209030151367, "learning_rate": 4.237386150065057e-06, "loss": 0.3737, "step": 977, "teacher_loss": 0.3743492364883423 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.5597560405731201, "learning_rate": 4.241723290443834e-06, "loss": 0.2816, "step": 978, "teacher_loss": 0.25066542625427246 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.525507390499115, "learning_rate": 4.246060430822611e-06, "loss": 0.2572, "step": 979, "teacher_loss": 0.22739559412002563 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.6766448020935059, "learning_rate": 4.250397571201388e-06, "loss": 0.2679, "step": 980, "teacher_loss": 0.2225266993045807 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.28461331129074097, "learning_rate": 4.254734711580165e-06, "loss": 0.2248, "step": 981, "teacher_loss": 0.21820171177387238 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.9696887731552124, "learning_rate": 4.259071851958941e-06, "loss": 0.3294, "step": 982, "teacher_loss": 0.25827687978744507 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.17163234949111938, "learning_rate": 4.263408992337719e-06, "loss": 0.233, "step": 983, "teacher_loss": 0.23976437747478485 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.7183161973953247, "learning_rate": 4.267746132716496e-06, "loss": 0.2647, "step": 984, "teacher_loss": 0.2143322080373764 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.2817261219024658, "learning_rate": 4.2720832730952725e-06, "loss": 0.2704, "step": 985, "teacher_loss": 0.26910319924354553 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.6363096237182617, "learning_rate": 4.276420413474049e-06, "loss": 0.2522, "step": 986, "teacher_loss": 0.2095108926296234 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.24661116302013397, "learning_rate": 4.280757553852827e-06, "loss": 0.1652, "step": 987, "teacher_loss": 0.15610413253307343 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.5053436160087585, "learning_rate": 4.285094694231603e-06, "loss": 0.3204, "step": 988, "teacher_loss": 0.2998276948928833 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.3370142877101898, "learning_rate": 4.2894318346103804e-06, "loss": 0.2765, "step": 989, "teacher_loss": 0.26980364322662354 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.377845823764801, "learning_rate": 4.293768974989157e-06, "loss": 0.2562, "step": 990, "teacher_loss": 0.24268172681331635 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.9990074038505554, "learning_rate": 4.298106115367934e-06, "loss": 0.4299, "step": 991, "teacher_loss": 0.36664068698883057 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.7937301993370056, "learning_rate": 4.302443255746712e-06, "loss": 0.3371, "step": 992, "teacher_loss": 0.28632599115371704 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.517193078994751, "learning_rate": 4.3067803961254875e-06, "loss": 0.2072, "step": 993, "teacher_loss": 0.17272207140922546 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.5028003454208374, "learning_rate": 4.311117536504265e-06, "loss": 0.2493, "step": 994, "teacher_loss": 0.22113189101219177 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.15499143302440643, "learning_rate": 4.315454676883042e-06, "loss": 0.1958, "step": 995, "teacher_loss": 0.20028001070022583 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.39688703417778015, "learning_rate": 4.319791817261819e-06, "loss": 0.3163, "step": 996, "teacher_loss": 0.3073402941226959 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.30999478697776794, "learning_rate": 4.3241289576405955e-06, "loss": 0.199, "step": 997, "teacher_loss": 0.1867075115442276 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.6614822149276733, "learning_rate": 4.328466098019373e-06, "loss": 0.3184, "step": 998, "teacher_loss": 0.28031837940216064 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.4407710134983063, "learning_rate": 4.332803238398149e-06, "loss": 0.3742, "step": 999, "teacher_loss": 0.3667859435081482 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.30677372217178345, "learning_rate": 4.337140378776927e-06, "loss": 0.1966, "step": 1000, "teacher_loss": 0.18439361453056335 }, { "epoch": 0.18, "eval_exact_match": 79.78240302743615, "eval_f1": 87.28467191236393, "step": 1000 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.41976457834243774, "learning_rate": 4.3414775191557034e-06, "loss": 0.2564, "step": 1001, "teacher_loss": 0.23828575015068054 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.2679102122783661, "learning_rate": 4.34581465953448e-06, "loss": 0.3375, "step": 1002, "teacher_loss": 0.3452507257461548 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.3169446587562561, "learning_rate": 4.350151799913258e-06, "loss": 0.1705, "step": 1003, "teacher_loss": 0.15423138439655304 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.6178823113441467, "learning_rate": 4.354488940292034e-06, "loss": 0.266, "step": 1004, "teacher_loss": 0.22686412930488586 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.7176684737205505, "learning_rate": 4.358826080670811e-06, "loss": 0.3019, "step": 1005, "teacher_loss": 0.255659282207489 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.4044558107852936, "learning_rate": 4.363163221049588e-06, "loss": 0.2802, "step": 1006, "teacher_loss": 0.2664405405521393 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.6271200180053711, "learning_rate": 4.367500361428365e-06, "loss": 0.3949, "step": 1007, "teacher_loss": 0.3691057562828064 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.3779192566871643, "learning_rate": 4.371837501807142e-06, "loss": 0.2336, "step": 1008, "teacher_loss": 0.2175370156764984 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.5824831128120422, "learning_rate": 4.3761746421859185e-06, "loss": 0.2541, "step": 1009, "teacher_loss": 0.21766166388988495 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 1.0577467679977417, "learning_rate": 4.380511782564696e-06, "loss": 0.3168, "step": 1010, "teacher_loss": 0.2344709038734436 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.26988738775253296, "learning_rate": 4.384848922943473e-06, "loss": 0.4745, "step": 1011, "teacher_loss": 0.497225284576416 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.46977198123931885, "learning_rate": 4.38918606332225e-06, "loss": 0.2836, "step": 1012, "teacher_loss": 0.2628623843193054 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.3277967572212219, "learning_rate": 4.3935232037010264e-06, "loss": 0.2179, "step": 1013, "teacher_loss": 0.20572246611118317 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.20961245894432068, "learning_rate": 4.397860344079804e-06, "loss": 0.1465, "step": 1014, "teacher_loss": 0.13948100805282593 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.5755942463874817, "learning_rate": 4.40219748445858e-06, "loss": 0.5469, "step": 1015, "teacher_loss": 0.5436673760414124 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.5191944241523743, "learning_rate": 4.406534624837358e-06, "loss": 0.2942, "step": 1016, "teacher_loss": 0.26924625039100647 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.7318869829177856, "learning_rate": 4.410871765216134e-06, "loss": 0.2502, "step": 1017, "teacher_loss": 0.19667673110961914 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.43240630626678467, "learning_rate": 4.415208905594911e-06, "loss": 0.2781, "step": 1018, "teacher_loss": 0.26097458600997925 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.4053493142127991, "learning_rate": 4.419546045973688e-06, "loss": 0.2275, "step": 1019, "teacher_loss": 0.20779326558113098 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.5808550119400024, "learning_rate": 4.423883186352465e-06, "loss": 0.2726, "step": 1020, "teacher_loss": 0.23830586671829224 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.7242401838302612, "learning_rate": 4.428220326731242e-06, "loss": 0.378, "step": 1021, "teacher_loss": 0.3394874334335327 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.5987971425056458, "learning_rate": 4.432557467110019e-06, "loss": 0.3065, "step": 1022, "teacher_loss": 0.27407407760620117 }, { "compression_loss": 0.0, "epoch": 0.18, "label_loss": 0.3604764938354492, "learning_rate": 4.436894607488796e-06, "loss": 0.2761, "step": 1023, "teacher_loss": 0.2667540907859802 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.42976105213165283, "learning_rate": 4.441231747867573e-06, "loss": 0.2526, "step": 1024, "teacher_loss": 0.232865571975708 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.16539642214775085, "learning_rate": 4.44556888824635e-06, "loss": 0.2126, "step": 1025, "teacher_loss": 0.21789035201072693 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.5467285513877869, "learning_rate": 4.449906028625126e-06, "loss": 0.3692, "step": 1026, "teacher_loss": 0.34948933124542236 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.3324883282184601, "learning_rate": 4.454243169003904e-06, "loss": 0.2313, "step": 1027, "teacher_loss": 0.22007356584072113 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.7447936534881592, "learning_rate": 4.458580309382681e-06, "loss": 0.2757, "step": 1028, "teacher_loss": 0.22361455857753754 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.4618903398513794, "learning_rate": 4.462917449761457e-06, "loss": 0.3314, "step": 1029, "teacher_loss": 0.31691259145736694 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.15198522806167603, "learning_rate": 4.467254590140234e-06, "loss": 0.3453, "step": 1030, "teacher_loss": 0.36675846576690674 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.2747849225997925, "learning_rate": 4.471591730519011e-06, "loss": 0.199, "step": 1031, "teacher_loss": 0.1905422806739807 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.30247190594673157, "learning_rate": 4.4759288708977885e-06, "loss": 0.2477, "step": 1032, "teacher_loss": 0.24163810908794403 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.3867129683494568, "learning_rate": 4.480266011276565e-06, "loss": 0.2692, "step": 1033, "teacher_loss": 0.25610053539276123 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.40048548579216003, "learning_rate": 4.484603151655342e-06, "loss": 0.2814, "step": 1034, "teacher_loss": 0.26817089319229126 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.2737944722175598, "learning_rate": 4.488940292034119e-06, "loss": 0.2076, "step": 1035, "teacher_loss": 0.20022031664848328 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.9053131937980652, "learning_rate": 4.4932774324128965e-06, "loss": 0.3598, "step": 1036, "teacher_loss": 0.29918426275253296 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.2241610884666443, "learning_rate": 4.497614572791672e-06, "loss": 0.1736, "step": 1037, "teacher_loss": 0.16797776520252228 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.3201694190502167, "learning_rate": 4.50195171317045e-06, "loss": 0.2327, "step": 1038, "teacher_loss": 0.22295695543289185 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 1.0477876663208008, "learning_rate": 4.506288853549226e-06, "loss": 0.3322, "step": 1039, "teacher_loss": 0.2526901662349701 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.9910594820976257, "learning_rate": 4.510625993928004e-06, "loss": 0.2853, "step": 1040, "teacher_loss": 0.20692741870880127 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.8933700323104858, "learning_rate": 4.51496313430678e-06, "loss": 0.419, "step": 1041, "teacher_loss": 0.3662818670272827 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.571254312992096, "learning_rate": 4.519300274685557e-06, "loss": 0.2237, "step": 1042, "teacher_loss": 0.1850433051586151 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.33391740918159485, "learning_rate": 4.523637415064335e-06, "loss": 0.2283, "step": 1043, "teacher_loss": 0.2165614366531372 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.4566960036754608, "learning_rate": 4.5279745554431115e-06, "loss": 0.3055, "step": 1044, "teacher_loss": 0.2886720299720764 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.13232797384262085, "learning_rate": 4.532311695821888e-06, "loss": 0.2133, "step": 1045, "teacher_loss": 0.22229985892772675 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.4978542923927307, "learning_rate": 4.536648836200665e-06, "loss": 0.4331, "step": 1046, "teacher_loss": 0.425857275724411 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.43347957730293274, "learning_rate": 4.540985976579443e-06, "loss": 0.2149, "step": 1047, "teacher_loss": 0.1905868649482727 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.4231417775154114, "learning_rate": 4.545323116958219e-06, "loss": 0.26, "step": 1048, "teacher_loss": 0.24186962842941284 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.22906342148780823, "learning_rate": 4.549660257336996e-06, "loss": 0.244, "step": 1049, "teacher_loss": 0.24570153653621674 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.33008861541748047, "learning_rate": 4.553997397715772e-06, "loss": 0.1647, "step": 1050, "teacher_loss": 0.1463623195886612 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.8421276807785034, "learning_rate": 4.55833453809455e-06, "loss": 0.2869, "step": 1051, "teacher_loss": 0.22520504891872406 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.3828859329223633, "learning_rate": 4.562671678473327e-06, "loss": 0.1979, "step": 1052, "teacher_loss": 0.17732752859592438 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.3831285238265991, "learning_rate": 4.567008818852103e-06, "loss": 0.2119, "step": 1053, "teacher_loss": 0.19282452762126923 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.9916056394577026, "learning_rate": 4.571345959230881e-06, "loss": 0.3011, "step": 1054, "teacher_loss": 0.22434478998184204 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.2853778600692749, "learning_rate": 4.575683099609658e-06, "loss": 0.2319, "step": 1055, "teacher_loss": 0.22595274448394775 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.7721850275993347, "learning_rate": 4.5800202399884345e-06, "loss": 0.2815, "step": 1056, "teacher_loss": 0.22701841592788696 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.6067239046096802, "learning_rate": 4.584357380367211e-06, "loss": 0.3036, "step": 1057, "teacher_loss": 0.26994168758392334 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.20231294631958008, "learning_rate": 4.588694520745988e-06, "loss": 0.2927, "step": 1058, "teacher_loss": 0.30278515815734863 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.3941829204559326, "learning_rate": 4.593031661124765e-06, "loss": 0.2034, "step": 1059, "teacher_loss": 0.18223124742507935 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.5052745938301086, "learning_rate": 4.5973688015035425e-06, "loss": 0.2361, "step": 1060, "teacher_loss": 0.20615960657596588 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.7895598411560059, "learning_rate": 4.601705941882318e-06, "loss": 0.6353, "step": 1061, "teacher_loss": 0.6181142330169678 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.4553050994873047, "learning_rate": 4.606043082261096e-06, "loss": 0.2421, "step": 1062, "teacher_loss": 0.21843793988227844 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.32229432463645935, "learning_rate": 4.610380222639873e-06, "loss": 0.1757, "step": 1063, "teacher_loss": 0.15940703451633453 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.31394755840301514, "learning_rate": 4.61471736301865e-06, "loss": 0.2284, "step": 1064, "teacher_loss": 0.2188643217086792 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.34707412123680115, "learning_rate": 4.619054503397427e-06, "loss": 0.2911, "step": 1065, "teacher_loss": 0.28493136167526245 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.9305747747421265, "learning_rate": 4.623391643776204e-06, "loss": 0.2861, "step": 1066, "teacher_loss": 0.21446290612220764 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.32297974824905396, "learning_rate": 4.627728784154981e-06, "loss": 0.3485, "step": 1067, "teacher_loss": 0.3513873517513275 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.26276522874832153, "learning_rate": 4.6320659245337575e-06, "loss": 0.2225, "step": 1068, "teacher_loss": 0.21806611120700836 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.5694539546966553, "learning_rate": 4.636403064912534e-06, "loss": 0.2047, "step": 1069, "teacher_loss": 0.16413669288158417 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.14550527930259705, "learning_rate": 4.640740205291311e-06, "loss": 0.2139, "step": 1070, "teacher_loss": 0.22152139246463776 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.3342948257923126, "learning_rate": 4.645077345670089e-06, "loss": 0.1953, "step": 1071, "teacher_loss": 0.17984285950660706 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.46232205629348755, "learning_rate": 4.649414486048865e-06, "loss": 0.2226, "step": 1072, "teacher_loss": 0.19598373770713806 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.5358340740203857, "learning_rate": 4.653751626427642e-06, "loss": 0.2848, "step": 1073, "teacher_loss": 0.2568589150905609 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.28656986355781555, "learning_rate": 4.658088766806419e-06, "loss": 0.2304, "step": 1074, "teacher_loss": 0.2241937220096588 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.9465980529785156, "learning_rate": 4.662425907185196e-06, "loss": 0.2695, "step": 1075, "teacher_loss": 0.1942925751209259 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.1416025459766388, "learning_rate": 4.6667630475639734e-06, "loss": 0.1563, "step": 1076, "teacher_loss": 0.15792804956436157 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.901451587677002, "learning_rate": 4.67110018794275e-06, "loss": 0.2931, "step": 1077, "teacher_loss": 0.22555328905582428 }, { "compression_loss": 0.0, "epoch": 0.19, "label_loss": 0.20931003987789154, "learning_rate": 4.675437328321527e-06, "loss": 0.1872, "step": 1078, "teacher_loss": 0.18474730849266052 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.5381642580032349, "learning_rate": 4.679774468700304e-06, "loss": 0.2182, "step": 1079, "teacher_loss": 0.18265162408351898 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.47066718339920044, "learning_rate": 4.6841116090790805e-06, "loss": 0.2324, "step": 1080, "teacher_loss": 0.20595771074295044 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.36368829011917114, "learning_rate": 4.688448749457857e-06, "loss": 0.2713, "step": 1081, "teacher_loss": 0.26098522543907166 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.4694782495498657, "learning_rate": 4.692785889836635e-06, "loss": 0.3416, "step": 1082, "teacher_loss": 0.32733774185180664 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.7196818590164185, "learning_rate": 4.697123030215411e-06, "loss": 0.3201, "step": 1083, "teacher_loss": 0.27571550011634827 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.6905295848846436, "learning_rate": 4.7014601705941885e-06, "loss": 0.2846, "step": 1084, "teacher_loss": 0.23951539397239685 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.1392497569322586, "learning_rate": 4.705797310972965e-06, "loss": 0.1434, "step": 1085, "teacher_loss": 0.14389224350452423 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.5574511885643005, "learning_rate": 4.710134451351742e-06, "loss": 0.3486, "step": 1086, "teacher_loss": 0.3254011571407318 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.38188010454177856, "learning_rate": 4.71447159173052e-06, "loss": 0.2628, "step": 1087, "teacher_loss": 0.24957598745822906 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.31982582807540894, "learning_rate": 4.7188087321092956e-06, "loss": 0.2104, "step": 1088, "teacher_loss": 0.1982215791940689 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.21952366828918457, "learning_rate": 4.723145872488073e-06, "loss": 0.2193, "step": 1089, "teacher_loss": 0.21923795342445374 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.47833967208862305, "learning_rate": 4.72748301286685e-06, "loss": 0.3999, "step": 1090, "teacher_loss": 0.3912278413772583 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.07976051419973373, "learning_rate": 4.731820153245627e-06, "loss": 0.1795, "step": 1091, "teacher_loss": 0.19058480858802795 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.27420559525489807, "learning_rate": 4.7361572936244035e-06, "loss": 0.1994, "step": 1092, "teacher_loss": 0.19112759828567505 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 1.1061021089553833, "learning_rate": 4.740494434003181e-06, "loss": 0.4018, "step": 1093, "teacher_loss": 0.32357919216156006 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.28886157274246216, "learning_rate": 4.744831574381957e-06, "loss": 0.2188, "step": 1094, "teacher_loss": 0.21104061603546143 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.3894529938697815, "learning_rate": 4.749168714760735e-06, "loss": 0.2085, "step": 1095, "teacher_loss": 0.18841558694839478 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.5072648525238037, "learning_rate": 4.7535058551395115e-06, "loss": 0.223, "step": 1096, "teacher_loss": 0.19137945771217346 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.5903101563453674, "learning_rate": 4.757842995518288e-06, "loss": 0.2683, "step": 1097, "teacher_loss": 0.23246756196022034 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.28118568658828735, "learning_rate": 4.762180135897066e-06, "loss": 0.2612, "step": 1098, "teacher_loss": 0.2589266896247864 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.3604264259338379, "learning_rate": 4.766517276275842e-06, "loss": 0.2536, "step": 1099, "teacher_loss": 0.24169890582561493 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.3828474283218384, "learning_rate": 4.770854416654619e-06, "loss": 0.2326, "step": 1100, "teacher_loss": 0.21589769423007965 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.2525019645690918, "learning_rate": 4.775191557033396e-06, "loss": 0.2747, "step": 1101, "teacher_loss": 0.2771334648132324 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.5346746444702148, "learning_rate": 4.779528697412173e-06, "loss": 0.3208, "step": 1102, "teacher_loss": 0.29699212312698364 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.4957199692726135, "learning_rate": 4.78386583779095e-06, "loss": 0.2465, "step": 1103, "teacher_loss": 0.2187555432319641 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.47427546977996826, "learning_rate": 4.788202978169727e-06, "loss": 0.3496, "step": 1104, "teacher_loss": 0.3356937766075134 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.3891294002532959, "learning_rate": 4.792540118548503e-06, "loss": 0.3032, "step": 1105, "teacher_loss": 0.2936500906944275 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.28724467754364014, "learning_rate": 4.796877258927281e-06, "loss": 0.2505, "step": 1106, "teacher_loss": 0.24637135863304138 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.31420186161994934, "learning_rate": 4.801214399306058e-06, "loss": 0.3102, "step": 1107, "teacher_loss": 0.309751033782959 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.3099789023399353, "learning_rate": 4.8055515396848345e-06, "loss": 0.2141, "step": 1108, "teacher_loss": 0.20348715782165527 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.2552195191383362, "learning_rate": 4.809888680063612e-06, "loss": 0.1964, "step": 1109, "teacher_loss": 0.18982470035552979 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.3421964645385742, "learning_rate": 4.814225820442388e-06, "loss": 0.2672, "step": 1110, "teacher_loss": 0.25882214307785034 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.4639921188354492, "learning_rate": 4.818562960821166e-06, "loss": 0.2925, "step": 1111, "teacher_loss": 0.27346134185791016 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.2397266924381256, "learning_rate": 4.822900101199942e-06, "loss": 0.2084, "step": 1112, "teacher_loss": 0.20486527681350708 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.5527971386909485, "learning_rate": 4.827237241578719e-06, "loss": 0.3695, "step": 1113, "teacher_loss": 0.3491598069667816 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 1.2085955142974854, "learning_rate": 4.831574381957496e-06, "loss": 0.5834, "step": 1114, "teacher_loss": 0.513923704624176 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.8424012660980225, "learning_rate": 4.835911522336274e-06, "loss": 0.3221, "step": 1115, "teacher_loss": 0.26428303122520447 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.6601627469062805, "learning_rate": 4.8402486627150495e-06, "loss": 0.3147, "step": 1116, "teacher_loss": 0.2762717604637146 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.7519962191581726, "learning_rate": 4.844585803093827e-06, "loss": 0.4587, "step": 1117, "teacher_loss": 0.4260726869106293 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.4877069592475891, "learning_rate": 4.848922943472603e-06, "loss": 0.276, "step": 1118, "teacher_loss": 0.25252410769462585 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.468178927898407, "learning_rate": 4.853260083851381e-06, "loss": 0.2282, "step": 1119, "teacher_loss": 0.2015511691570282 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.5334605574607849, "learning_rate": 4.857597224230158e-06, "loss": 0.2762, "step": 1120, "teacher_loss": 0.2476247102022171 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.5600509643554688, "learning_rate": 4.861934364608934e-06, "loss": 0.298, "step": 1121, "teacher_loss": 0.2689228355884552 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.4051320552825928, "learning_rate": 4.866271504987712e-06, "loss": 0.2891, "step": 1122, "teacher_loss": 0.27618369460105896 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.4193379282951355, "learning_rate": 4.870608645366489e-06, "loss": 0.3517, "step": 1123, "teacher_loss": 0.3441656529903412 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.45917046070098877, "learning_rate": 4.874945785745265e-06, "loss": 0.4287, "step": 1124, "teacher_loss": 0.4253392517566681 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.619087815284729, "learning_rate": 4.879282926124042e-06, "loss": 0.3703, "step": 1125, "teacher_loss": 0.34262222051620483 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.29534435272216797, "learning_rate": 4.88362006650282e-06, "loss": 0.2016, "step": 1126, "teacher_loss": 0.1911729872226715 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.44164717197418213, "learning_rate": 4.887957206881596e-06, "loss": 0.1802, "step": 1127, "teacher_loss": 0.15113690495491028 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.24396923184394836, "learning_rate": 4.892294347260373e-06, "loss": 0.2387, "step": 1128, "teacher_loss": 0.23813480138778687 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.3436773121356964, "learning_rate": 4.896631487639149e-06, "loss": 0.2825, "step": 1129, "teacher_loss": 0.2757401466369629 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.401786208152771, "learning_rate": 4.900968628017927e-06, "loss": 0.249, "step": 1130, "teacher_loss": 0.23198971152305603 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.666278600692749, "learning_rate": 4.9053057683967045e-06, "loss": 0.2255, "step": 1131, "teacher_loss": 0.1764870584011078 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 1.011008381843567, "learning_rate": 4.9096429087754805e-06, "loss": 0.321, "step": 1132, "teacher_loss": 0.24438059329986572 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.9981403350830078, "learning_rate": 4.913980049154258e-06, "loss": 0.328, "step": 1133, "teacher_loss": 0.2534977197647095 }, { "compression_loss": 0.0, "epoch": 0.2, "label_loss": 0.582736074924469, "learning_rate": 4.918317189533035e-06, "loss": 0.3337, "step": 1134, "teacher_loss": 0.306058406829834 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.8159241676330566, "learning_rate": 4.922654329911812e-06, "loss": 0.3222, "step": 1135, "teacher_loss": 0.26738640666007996 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.5094940662384033, "learning_rate": 4.926991470290588e-06, "loss": 0.4148, "step": 1136, "teacher_loss": 0.40424269437789917 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.11156779527664185, "learning_rate": 4.931328610669365e-06, "loss": 0.1679, "step": 1137, "teacher_loss": 0.17410853505134583 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.9740431308746338, "learning_rate": 4.935665751048142e-06, "loss": 0.2645, "step": 1138, "teacher_loss": 0.18562020361423492 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.23955108225345612, "learning_rate": 4.9400028914269196e-06, "loss": 0.1559, "step": 1139, "teacher_loss": 0.14657802879810333 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.6785324811935425, "learning_rate": 4.9443400318056955e-06, "loss": 0.3267, "step": 1140, "teacher_loss": 0.2876512110233307 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.6967428922653198, "learning_rate": 4.948677172184473e-06, "loss": 0.314, "step": 1141, "teacher_loss": 0.2714667320251465 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.3970792293548584, "learning_rate": 4.953014312563251e-06, "loss": 0.2588, "step": 1142, "teacher_loss": 0.24341876804828644 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.6833770275115967, "learning_rate": 4.957351452942027e-06, "loss": 0.3242, "step": 1143, "teacher_loss": 0.28425711393356323 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.37923964858055115, "learning_rate": 4.961688593320804e-06, "loss": 0.2005, "step": 1144, "teacher_loss": 0.1806732714176178 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.226164311170578, "learning_rate": 4.966025733699581e-06, "loss": 0.2611, "step": 1145, "teacher_loss": 0.2649730443954468 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.31378278136253357, "learning_rate": 4.970362874078358e-06, "loss": 0.1946, "step": 1146, "teacher_loss": 0.1813521385192871 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.8112713098526001, "learning_rate": 4.974700014457135e-06, "loss": 0.2909, "step": 1147, "teacher_loss": 0.2330630123615265 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.4197642207145691, "learning_rate": 4.979037154835911e-06, "loss": 0.2076, "step": 1148, "teacher_loss": 0.18404710292816162 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.44166573882102966, "learning_rate": 4.983374295214688e-06, "loss": 0.2258, "step": 1149, "teacher_loss": 0.20184940099716187 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.27114400267601013, "learning_rate": 4.987711435593466e-06, "loss": 0.2065, "step": 1150, "teacher_loss": 0.19927412271499634 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.655838131904602, "learning_rate": 4.992048575972242e-06, "loss": 0.3295, "step": 1151, "teacher_loss": 0.2932407855987549 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.5922248363494873, "learning_rate": 4.996385716351019e-06, "loss": 0.3031, "step": 1152, "teacher_loss": 0.270932137966156 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.3755096197128296, "learning_rate": 5.000722856729797e-06, "loss": 0.1986, "step": 1153, "teacher_loss": 0.17897866666316986 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.4813052713871002, "learning_rate": 5.005059997108573e-06, "loss": 0.1804, "step": 1154, "teacher_loss": 0.14693206548690796 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.6488795280456543, "learning_rate": 5.0093971374873505e-06, "loss": 0.2595, "step": 1155, "teacher_loss": 0.21618717908859253 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.2763577699661255, "learning_rate": 5.013734277866127e-06, "loss": 0.228, "step": 1156, "teacher_loss": 0.2226552665233612 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.3682769536972046, "learning_rate": 5.018071418244904e-06, "loss": 0.3396, "step": 1157, "teacher_loss": 0.33641955256462097 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.5189266204833984, "learning_rate": 5.022408558623681e-06, "loss": 0.3612, "step": 1158, "teacher_loss": 0.3437250256538391 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.2635785937309265, "learning_rate": 5.026745699002458e-06, "loss": 0.2292, "step": 1159, "teacher_loss": 0.22533442080020905 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.48142436146736145, "learning_rate": 5.031082839381234e-06, "loss": 0.2287, "step": 1160, "teacher_loss": 0.2006313055753708 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.5742180347442627, "learning_rate": 5.035419979760012e-06, "loss": 0.2903, "step": 1161, "teacher_loss": 0.25876420736312866 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.47835981845855713, "learning_rate": 5.039757120138789e-06, "loss": 0.314, "step": 1162, "teacher_loss": 0.2957807779312134 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.32022857666015625, "learning_rate": 5.0440942605175656e-06, "loss": 0.264, "step": 1163, "teacher_loss": 0.25777360796928406 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.4389013648033142, "learning_rate": 5.048431400896343e-06, "loss": 0.2758, "step": 1164, "teacher_loss": 0.2576468884944916 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.30154094099998474, "learning_rate": 5.052768541275119e-06, "loss": 0.2313, "step": 1165, "teacher_loss": 0.22344811260700226 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.23638620972633362, "learning_rate": 5.057105681653897e-06, "loss": 0.1495, "step": 1166, "teacher_loss": 0.13989299535751343 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.27809587121009827, "learning_rate": 5.061442822032673e-06, "loss": 0.2921, "step": 1167, "teacher_loss": 0.2936674952507019 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.17922601103782654, "learning_rate": 5.06577996241145e-06, "loss": 0.2428, "step": 1168, "teacher_loss": 0.2498663365840912 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.4638948440551758, "learning_rate": 5.070117102790227e-06, "loss": 0.2533, "step": 1169, "teacher_loss": 0.22988645732402802 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.3509288728237152, "learning_rate": 5.074454243169004e-06, "loss": 0.2361, "step": 1170, "teacher_loss": 0.2233295738697052 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.13288934528827667, "learning_rate": 5.078791383547781e-06, "loss": 0.174, "step": 1171, "teacher_loss": 0.17857147753238678 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.49124500155448914, "learning_rate": 5.083128523926558e-06, "loss": 0.2575, "step": 1172, "teacher_loss": 0.23157858848571777 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.7791959047317505, "learning_rate": 5.087465664305335e-06, "loss": 0.2826, "step": 1173, "teacher_loss": 0.22747664153575897 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.7260560989379883, "learning_rate": 5.091802804684112e-06, "loss": 0.3169, "step": 1174, "teacher_loss": 0.2714585065841675 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.33840230107307434, "learning_rate": 5.096139945062889e-06, "loss": 0.2518, "step": 1175, "teacher_loss": 0.24213892221450806 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.2387653887271881, "learning_rate": 5.100477085441665e-06, "loss": 0.293, "step": 1176, "teacher_loss": 0.29897743463516235 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.2110324203968048, "learning_rate": 5.104814225820443e-06, "loss": 0.2154, "step": 1177, "teacher_loss": 0.21584412455558777 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.3478757441043854, "learning_rate": 5.109151366199219e-06, "loss": 0.2402, "step": 1178, "teacher_loss": 0.22825860977172852 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.3471631705760956, "learning_rate": 5.1134885065779965e-06, "loss": 0.3092, "step": 1179, "teacher_loss": 0.30500495433807373 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.38604047894477844, "learning_rate": 5.117825646956773e-06, "loss": 0.206, "step": 1180, "teacher_loss": 0.18596185743808746 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.7693082094192505, "learning_rate": 5.12216278733555e-06, "loss": 0.3538, "step": 1181, "teacher_loss": 0.3076856732368469 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.4554254114627838, "learning_rate": 5.126499927714327e-06, "loss": 0.4417, "step": 1182, "teacher_loss": 0.4401906132698059 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.3787423074245453, "learning_rate": 5.1308370680931044e-06, "loss": 0.1972, "step": 1183, "teacher_loss": 0.1769973188638687 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.30436742305755615, "learning_rate": 5.135174208471881e-06, "loss": 0.2488, "step": 1184, "teacher_loss": 0.2426188439130783 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 1.0952677726745605, "learning_rate": 5.139511348850658e-06, "loss": 0.3315, "step": 1185, "teacher_loss": 0.24662281572818756 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.6754557490348816, "learning_rate": 5.143848489229435e-06, "loss": 0.3952, "step": 1186, "teacher_loss": 0.36401450634002686 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.553848385810852, "learning_rate": 5.1481856296082115e-06, "loss": 0.2686, "step": 1187, "teacher_loss": 0.23694336414337158 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.6661457419395447, "learning_rate": 5.152522769986989e-06, "loss": 0.2712, "step": 1188, "teacher_loss": 0.22731655836105347 }, { "compression_loss": 0.0, "epoch": 0.21, "label_loss": 0.43331173062324524, "learning_rate": 5.156859910365765e-06, "loss": 0.2492, "step": 1189, "teacher_loss": 0.22869017720222473 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.7166616916656494, "learning_rate": 5.161197050744543e-06, "loss": 0.2524, "step": 1190, "teacher_loss": 0.2007940113544464 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.4378998875617981, "learning_rate": 5.1655341911233195e-06, "loss": 0.3477, "step": 1191, "teacher_loss": 0.33770930767059326 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.1930282860994339, "learning_rate": 5.169871331502096e-06, "loss": 0.2316, "step": 1192, "teacher_loss": 0.2358579933643341 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.25824838876724243, "learning_rate": 5.174208471880873e-06, "loss": 0.2358, "step": 1193, "teacher_loss": 0.23328590393066406 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.6622394323348999, "learning_rate": 5.178545612259651e-06, "loss": 0.3084, "step": 1194, "teacher_loss": 0.26912936568260193 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.8537653684616089, "learning_rate": 5.1828827526384274e-06, "loss": 0.2982, "step": 1195, "teacher_loss": 0.23648113012313843 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.7899073958396912, "learning_rate": 5.187219893017204e-06, "loss": 0.3672, "step": 1196, "teacher_loss": 0.32022690773010254 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.8777328729629517, "learning_rate": 5.191557033395981e-06, "loss": 0.2678, "step": 1197, "teacher_loss": 0.19998799264431 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.47831371426582336, "learning_rate": 5.195894173774758e-06, "loss": 0.2511, "step": 1198, "teacher_loss": 0.22583192586898804 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.2943691909313202, "learning_rate": 5.200231314153535e-06, "loss": 0.2539, "step": 1199, "teacher_loss": 0.24943341314792633 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.28227388858795166, "learning_rate": 5.204568454532311e-06, "loss": 0.2746, "step": 1200, "teacher_loss": 0.27375128865242004 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.2694704532623291, "learning_rate": 5.208905594911089e-06, "loss": 0.2548, "step": 1201, "teacher_loss": 0.25319626927375793 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.356032133102417, "learning_rate": 5.213242735289866e-06, "loss": 0.2655, "step": 1202, "teacher_loss": 0.2554033398628235 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.21529226005077362, "learning_rate": 5.2175798756686425e-06, "loss": 0.2096, "step": 1203, "teacher_loss": 0.20893582701683044 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.7583473920822144, "learning_rate": 5.221917016047419e-06, "loss": 0.2712, "step": 1204, "teacher_loss": 0.2171146720647812 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.3672451674938202, "learning_rate": 5.226254156426197e-06, "loss": 0.1939, "step": 1205, "teacher_loss": 0.17466257512569427 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.4197983741760254, "learning_rate": 5.230591296804974e-06, "loss": 0.2593, "step": 1206, "teacher_loss": 0.24141529202461243 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.3416770100593567, "learning_rate": 5.2349284371837504e-06, "loss": 0.2444, "step": 1207, "teacher_loss": 0.23355409502983093 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.35045433044433594, "learning_rate": 5.239265577562527e-06, "loss": 0.1875, "step": 1208, "teacher_loss": 0.16938243806362152 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.4604479670524597, "learning_rate": 5.243602717941304e-06, "loss": 0.3014, "step": 1209, "teacher_loss": 0.2837444543838501 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.35232654213905334, "learning_rate": 5.247939858320082e-06, "loss": 0.252, "step": 1210, "teacher_loss": 0.24088945984840393 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.6564865112304688, "learning_rate": 5.2522769986988575e-06, "loss": 0.2302, "step": 1211, "teacher_loss": 0.18283820152282715 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.3088138699531555, "learning_rate": 5.256614139077635e-06, "loss": 0.3209, "step": 1212, "teacher_loss": 0.32225584983825684 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.6396206617355347, "learning_rate": 5.260951279456412e-06, "loss": 0.305, "step": 1213, "teacher_loss": 0.2678562104701996 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.6946617364883423, "learning_rate": 5.265288419835189e-06, "loss": 0.4101, "step": 1214, "teacher_loss": 0.37847232818603516 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.829551100730896, "learning_rate": 5.2696255602139655e-06, "loss": 0.2812, "step": 1215, "teacher_loss": 0.2202252447605133 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.7917488813400269, "learning_rate": 5.273962700592742e-06, "loss": 0.2726, "step": 1216, "teacher_loss": 0.21489733457565308 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.2725987434387207, "learning_rate": 5.27829984097152e-06, "loss": 0.2404, "step": 1217, "teacher_loss": 0.23681502044200897 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.5505666136741638, "learning_rate": 5.282636981350297e-06, "loss": 0.2664, "step": 1218, "teacher_loss": 0.23480895161628723 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.46690648794174194, "learning_rate": 5.2869741217290734e-06, "loss": 0.2156, "step": 1219, "teacher_loss": 0.18770459294319153 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.24107155203819275, "learning_rate": 5.29131126210785e-06, "loss": 0.1939, "step": 1220, "teacher_loss": 0.1887015402317047 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.27591270208358765, "learning_rate": 5.295648402486628e-06, "loss": 0.2407, "step": 1221, "teacher_loss": 0.2367565929889679 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.5319687128067017, "learning_rate": 5.299985542865404e-06, "loss": 0.3498, "step": 1222, "teacher_loss": 0.3295632004737854 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.2172059714794159, "learning_rate": 5.304322683244181e-06, "loss": 0.3703, "step": 1223, "teacher_loss": 0.38734835386276245 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.3539636731147766, "learning_rate": 5.308659823622958e-06, "loss": 0.1869, "step": 1224, "teacher_loss": 0.16837289929389954 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.3852180540561676, "learning_rate": 5.312996964001735e-06, "loss": 0.2492, "step": 1225, "teacher_loss": 0.2341034710407257 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.4535210132598877, "learning_rate": 5.317334104380512e-06, "loss": 0.227, "step": 1226, "teacher_loss": 0.20178814232349396 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.6944804191589355, "learning_rate": 5.3216712447592885e-06, "loss": 0.2949, "step": 1227, "teacher_loss": 0.25045156478881836 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.5647241473197937, "learning_rate": 5.326008385138066e-06, "loss": 0.3082, "step": 1228, "teacher_loss": 0.2797269821166992 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.4358232021331787, "learning_rate": 5.330345525516843e-06, "loss": 0.3087, "step": 1229, "teacher_loss": 0.2945837378501892 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.3156379461288452, "learning_rate": 5.33468266589562e-06, "loss": 0.261, "step": 1230, "teacher_loss": 0.2549268305301666 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.33211833238601685, "learning_rate": 5.3390198062743964e-06, "loss": 0.2429, "step": 1231, "teacher_loss": 0.2330237179994583 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.3029274046421051, "learning_rate": 5.343356946653174e-06, "loss": 0.2221, "step": 1232, "teacher_loss": 0.2131727933883667 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.4400797486305237, "learning_rate": 5.34769408703195e-06, "loss": 0.2533, "step": 1233, "teacher_loss": 0.23258748650550842 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.3303312063217163, "learning_rate": 5.352031227410728e-06, "loss": 0.2428, "step": 1234, "teacher_loss": 0.23308271169662476 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.6181873083114624, "learning_rate": 5.356368367789504e-06, "loss": 0.287, "step": 1235, "teacher_loss": 0.25020280480384827 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.2569912075996399, "learning_rate": 5.360705508168281e-06, "loss": 0.1979, "step": 1236, "teacher_loss": 0.1913888156414032 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.3329373002052307, "learning_rate": 5.365042648547058e-06, "loss": 0.2163, "step": 1237, "teacher_loss": 0.20329231023788452 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.19664104282855988, "learning_rate": 5.369379788925835e-06, "loss": 0.2012, "step": 1238, "teacher_loss": 0.20175495743751526 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.4789338707923889, "learning_rate": 5.373716929304612e-06, "loss": 0.194, "step": 1239, "teacher_loss": 0.16235429048538208 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.36036866903305054, "learning_rate": 5.378054069683389e-06, "loss": 0.2499, "step": 1240, "teacher_loss": 0.23765911161899567 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.17101141810417175, "learning_rate": 5.382391210062166e-06, "loss": 0.1686, "step": 1241, "teacher_loss": 0.16834668815135956 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.5667974948883057, "learning_rate": 5.386728350440943e-06, "loss": 0.3674, "step": 1242, "teacher_loss": 0.34528452157974243 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.3654319643974304, "learning_rate": 5.39106549081972e-06, "loss": 0.2378, "step": 1243, "teacher_loss": 0.2235853374004364 }, { "compression_loss": 0.0, "epoch": 0.22, "label_loss": 0.13516873121261597, "learning_rate": 5.395402631198496e-06, "loss": 0.2272, "step": 1244, "teacher_loss": 0.23741930723190308 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.4525141716003418, "learning_rate": 5.399739771577274e-06, "loss": 0.2478, "step": 1245, "teacher_loss": 0.2250119000673294 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.5897389650344849, "learning_rate": 5.40407691195605e-06, "loss": 0.2729, "step": 1246, "teacher_loss": 0.23768490552902222 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.38645854592323303, "learning_rate": 5.408414052334827e-06, "loss": 0.2134, "step": 1247, "teacher_loss": 0.1941990852355957 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.7676335573196411, "learning_rate": 5.412751192713604e-06, "loss": 0.3584, "step": 1248, "teacher_loss": 0.3129536807537079 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.3984995484352112, "learning_rate": 5.417088333092381e-06, "loss": 0.2197, "step": 1249, "teacher_loss": 0.1997983753681183 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.31520527601242065, "learning_rate": 5.4214254734711585e-06, "loss": 0.1779, "step": 1250, "teacher_loss": 0.162623792886734 }, { "epoch": 0.23, "eval_exact_match": 79.85808893093662, "eval_f1": 87.24254571758037, "step": 1250 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.39510267972946167, "learning_rate": 5.425762613849935e-06, "loss": 0.3035, "step": 1251, "teacher_loss": 0.29329121112823486 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.5366889834403992, "learning_rate": 5.430099754228712e-06, "loss": 0.2783, "step": 1252, "teacher_loss": 0.2495642602443695 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.598315954208374, "learning_rate": 5.434436894607489e-06, "loss": 0.3065, "step": 1253, "teacher_loss": 0.27412959933280945 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.4712716341018677, "learning_rate": 5.4387740349862665e-06, "loss": 0.4155, "step": 1254, "teacher_loss": 0.40935713052749634 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.5349586606025696, "learning_rate": 5.443111175365042e-06, "loss": 0.2506, "step": 1255, "teacher_loss": 0.21899788081645966 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.3278942406177521, "learning_rate": 5.44744831574382e-06, "loss": 0.1678, "step": 1256, "teacher_loss": 0.14998933672904968 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.20256701111793518, "learning_rate": 5.451785456122596e-06, "loss": 0.2116, "step": 1257, "teacher_loss": 0.21258941292762756 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.11660751700401306, "learning_rate": 5.456122596501374e-06, "loss": 0.2559, "step": 1258, "teacher_loss": 0.27139660716056824 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.46190154552459717, "learning_rate": 5.46045973688015e-06, "loss": 0.2599, "step": 1259, "teacher_loss": 0.23741409182548523 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.2473524510860443, "learning_rate": 5.464796877258927e-06, "loss": 0.1778, "step": 1260, "teacher_loss": 0.17002329230308533 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.4944515824317932, "learning_rate": 5.469134017637705e-06, "loss": 0.2978, "step": 1261, "teacher_loss": 0.2759248614311218 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.8135677576065063, "learning_rate": 5.4734711580164815e-06, "loss": 0.3457, "step": 1262, "teacher_loss": 0.2936977446079254 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.23547472059726715, "learning_rate": 5.477808298395258e-06, "loss": 0.257, "step": 1263, "teacher_loss": 0.25939512252807617 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.5694279074668884, "learning_rate": 5.482145438774035e-06, "loss": 0.3007, "step": 1264, "teacher_loss": 0.2708965837955475 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.5643197298049927, "learning_rate": 5.486482579152812e-06, "loss": 0.3878, "step": 1265, "teacher_loss": 0.3681699335575104 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.4379342794418335, "learning_rate": 5.490819719531589e-06, "loss": 0.2588, "step": 1266, "teacher_loss": 0.23889079689979553 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.2910255789756775, "learning_rate": 5.495156859910366e-06, "loss": 0.2604, "step": 1267, "teacher_loss": 0.2569883465766907 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.16718003153800964, "learning_rate": 5.499494000289142e-06, "loss": 0.2275, "step": 1268, "teacher_loss": 0.23424017429351807 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.4267197251319885, "learning_rate": 5.50383114066792e-06, "loss": 0.2382, "step": 1269, "teacher_loss": 0.21722105145454407 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.1430625021457672, "learning_rate": 5.508168281046697e-06, "loss": 0.193, "step": 1270, "teacher_loss": 0.19852329790592194 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.45203936100006104, "learning_rate": 5.512505421425473e-06, "loss": 0.2597, "step": 1271, "teacher_loss": 0.23832622170448303 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.624198317527771, "learning_rate": 5.516842561804251e-06, "loss": 0.2792, "step": 1272, "teacher_loss": 0.24090111255645752 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.2790127992630005, "learning_rate": 5.521179702183028e-06, "loss": 0.1946, "step": 1273, "teacher_loss": 0.18517965078353882 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.5398377776145935, "learning_rate": 5.5255168425618045e-06, "loss": 0.2293, "step": 1274, "teacher_loss": 0.19480839371681213 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.5422208309173584, "learning_rate": 5.529853982940581e-06, "loss": 0.3687, "step": 1275, "teacher_loss": 0.349415123462677 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.6231808066368103, "learning_rate": 5.534191123319358e-06, "loss": 0.2541, "step": 1276, "teacher_loss": 0.21312439441680908 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.4202580451965332, "learning_rate": 5.538528263698135e-06, "loss": 0.236, "step": 1277, "teacher_loss": 0.2155061662197113 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.29901033639907837, "learning_rate": 5.5428654040769125e-06, "loss": 0.2933, "step": 1278, "teacher_loss": 0.2926676869392395 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.5168792605400085, "learning_rate": 5.547202544455688e-06, "loss": 0.2652, "step": 1279, "teacher_loss": 0.23727889358997345 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.31214818358421326, "learning_rate": 5.551539684834466e-06, "loss": 0.2335, "step": 1280, "teacher_loss": 0.2247873991727829 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.4879359304904938, "learning_rate": 5.555876825213243e-06, "loss": 0.2205, "step": 1281, "teacher_loss": 0.1908344328403473 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.14473803341388702, "learning_rate": 5.56021396559202e-06, "loss": 0.1511, "step": 1282, "teacher_loss": 0.1518411785364151 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.2977965474128723, "learning_rate": 5.564551105970797e-06, "loss": 0.1923, "step": 1283, "teacher_loss": 0.18056859076023102 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.38523414731025696, "learning_rate": 5.568888246349574e-06, "loss": 0.2626, "step": 1284, "teacher_loss": 0.24892790615558624 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.6362987756729126, "learning_rate": 5.573225386728351e-06, "loss": 0.3375, "step": 1285, "teacher_loss": 0.30428051948547363 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.1684516966342926, "learning_rate": 5.5775625271071275e-06, "loss": 0.19, "step": 1286, "teacher_loss": 0.19237330555915833 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.3424099087715149, "learning_rate": 5.581899667485904e-06, "loss": 0.2258, "step": 1287, "teacher_loss": 0.21282604336738586 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.6559836864471436, "learning_rate": 5.586236807864681e-06, "loss": 0.3504, "step": 1288, "teacher_loss": 0.31644487380981445 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.24155014753341675, "learning_rate": 5.590573948243459e-06, "loss": 0.2019, "step": 1289, "teacher_loss": 0.197509303689003 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.36160242557525635, "learning_rate": 5.594911088622235e-06, "loss": 0.1629, "step": 1290, "teacher_loss": 0.14078977704048157 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.20559725165367126, "learning_rate": 5.599248229001012e-06, "loss": 0.1684, "step": 1291, "teacher_loss": 0.16431768238544464 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.37003186345100403, "learning_rate": 5.603585369379789e-06, "loss": 0.3106, "step": 1292, "teacher_loss": 0.3040315508842468 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.23484672605991364, "learning_rate": 5.607922509758566e-06, "loss": 0.2924, "step": 1293, "teacher_loss": 0.2988301217556 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.8479714393615723, "learning_rate": 5.612259650137343e-06, "loss": 0.4411, "step": 1294, "teacher_loss": 0.3958855867385864 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.5910420417785645, "learning_rate": 5.616596790516119e-06, "loss": 0.2619, "step": 1295, "teacher_loss": 0.22527837753295898 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.3322191834449768, "learning_rate": 5.620933930894897e-06, "loss": 0.2361, "step": 1296, "teacher_loss": 0.2253904938697815 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.39758002758026123, "learning_rate": 5.625271071273674e-06, "loss": 0.2863, "step": 1297, "teacher_loss": 0.273922324180603 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.17350620031356812, "learning_rate": 5.6296082116524505e-06, "loss": 0.2141, "step": 1298, "teacher_loss": 0.21862801909446716 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.3849458694458008, "learning_rate": 5.633945352031227e-06, "loss": 0.2712, "step": 1299, "teacher_loss": 0.2586025595664978 }, { "compression_loss": 0.0, "epoch": 0.23, "label_loss": 0.5989031791687012, "learning_rate": 5.638282492410005e-06, "loss": 0.2403, "step": 1300, "teacher_loss": 0.2005019187927246 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.8836695551872253, "learning_rate": 5.642619632788781e-06, "loss": 0.3321, "step": 1301, "teacher_loss": 0.27083802223205566 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.4654373526573181, "learning_rate": 5.6469567731675585e-06, "loss": 0.2394, "step": 1302, "teacher_loss": 0.2142777144908905 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.2677880823612213, "learning_rate": 5.651293913546335e-06, "loss": 0.2512, "step": 1303, "teacher_loss": 0.2493935525417328 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.08647237718105316, "learning_rate": 5.655631053925112e-06, "loss": 0.2215, "step": 1304, "teacher_loss": 0.23650875687599182 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.4774653911590576, "learning_rate": 5.65996819430389e-06, "loss": 0.2775, "step": 1305, "teacher_loss": 0.2552984356880188 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.23576687276363373, "learning_rate": 5.6643053346826656e-06, "loss": 0.1949, "step": 1306, "teacher_loss": 0.19039157032966614 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.2447567880153656, "learning_rate": 5.668642475061443e-06, "loss": 0.3084, "step": 1307, "teacher_loss": 0.3154921233654022 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.35558998584747314, "learning_rate": 5.67297961544022e-06, "loss": 0.2093, "step": 1308, "teacher_loss": 0.1930309236049652 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.7455028295516968, "learning_rate": 5.677316755818997e-06, "loss": 0.2965, "step": 1309, "teacher_loss": 0.24666452407836914 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.2909244894981384, "learning_rate": 5.6816538961977735e-06, "loss": 0.3713, "step": 1310, "teacher_loss": 0.3802812993526459 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.2900809347629547, "learning_rate": 5.685991036576551e-06, "loss": 0.222, "step": 1311, "teacher_loss": 0.2144090235233307 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.5351499319076538, "learning_rate": 5.690328176955327e-06, "loss": 0.2254, "step": 1312, "teacher_loss": 0.19096694886684418 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.5194671154022217, "learning_rate": 5.694665317334105e-06, "loss": 0.2511, "step": 1313, "teacher_loss": 0.22131946682929993 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.5641889572143555, "learning_rate": 5.6990024577128815e-06, "loss": 0.2561, "step": 1314, "teacher_loss": 0.22190842032432556 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.13668163120746613, "learning_rate": 5.703339598091658e-06, "loss": 0.1915, "step": 1315, "teacher_loss": 0.19760063290596008 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.5113697648048401, "learning_rate": 5.707676738470436e-06, "loss": 0.3508, "step": 1316, "teacher_loss": 0.33297199010849 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.5250779390335083, "learning_rate": 5.712013878849212e-06, "loss": 0.2526, "step": 1317, "teacher_loss": 0.22234031558036804 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.23730963468551636, "learning_rate": 5.716351019227989e-06, "loss": 0.3092, "step": 1318, "teacher_loss": 0.3171396851539612 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.31640592217445374, "learning_rate": 5.720688159606766e-06, "loss": 0.258, "step": 1319, "teacher_loss": 0.2515076994895935 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.5580583810806274, "learning_rate": 5.725025299985543e-06, "loss": 0.2585, "step": 1320, "teacher_loss": 0.22516728937625885 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.46960604190826416, "learning_rate": 5.72936244036432e-06, "loss": 0.2795, "step": 1321, "teacher_loss": 0.2584110498428345 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.39644157886505127, "learning_rate": 5.733699580743097e-06, "loss": 0.2545, "step": 1322, "teacher_loss": 0.23873931169509888 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.2446393072605133, "learning_rate": 5.738036721121873e-06, "loss": 0.2173, "step": 1323, "teacher_loss": 0.2142748087644577 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.4224982261657715, "learning_rate": 5.742373861500651e-06, "loss": 0.224, "step": 1324, "teacher_loss": 0.20190811157226562 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.42481768131256104, "learning_rate": 5.746711001879428e-06, "loss": 0.3055, "step": 1325, "teacher_loss": 0.292196124792099 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.4198145866394043, "learning_rate": 5.7510481422582045e-06, "loss": 0.2451, "step": 1326, "teacher_loss": 0.22565031051635742 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.9430245161056519, "learning_rate": 5.755385282636982e-06, "loss": 0.2495, "step": 1327, "teacher_loss": 0.17241689562797546 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.7390469908714294, "learning_rate": 5.759722423015758e-06, "loss": 0.2744, "step": 1328, "teacher_loss": 0.22279001772403717 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 1.0234602689743042, "learning_rate": 5.764059563394536e-06, "loss": 0.3094, "step": 1329, "teacher_loss": 0.23007167875766754 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.5311638116836548, "learning_rate": 5.768396703773312e-06, "loss": 0.2495, "step": 1330, "teacher_loss": 0.2181762158870697 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.625234842300415, "learning_rate": 5.772733844152089e-06, "loss": 0.3367, "step": 1331, "teacher_loss": 0.304587721824646 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.4760865271091461, "learning_rate": 5.777070984530866e-06, "loss": 0.3806, "step": 1332, "teacher_loss": 0.3700268268585205 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.5296129584312439, "learning_rate": 5.7814081249096436e-06, "loss": 0.2602, "step": 1333, "teacher_loss": 0.23023159801959991 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.1823360174894333, "learning_rate": 5.7857452652884195e-06, "loss": 0.214, "step": 1334, "teacher_loss": 0.21756044030189514 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.3620914816856384, "learning_rate": 5.790082405667197e-06, "loss": 0.2632, "step": 1335, "teacher_loss": 0.25224554538726807 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.28975558280944824, "learning_rate": 5.794419546045974e-06, "loss": 0.196, "step": 1336, "teacher_loss": 0.18557213246822357 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.3370620012283325, "learning_rate": 5.798756686424751e-06, "loss": 0.2489, "step": 1337, "teacher_loss": 0.23909465968608856 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.3273800015449524, "learning_rate": 5.803093826803528e-06, "loss": 0.2047, "step": 1338, "teacher_loss": 0.19109182059764862 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.4376048743724823, "learning_rate": 5.807430967182304e-06, "loss": 0.3761, "step": 1339, "teacher_loss": 0.3692399263381958 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.3818703889846802, "learning_rate": 5.811768107561082e-06, "loss": 0.3125, "step": 1340, "teacher_loss": 0.30478206276893616 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.7969062328338623, "learning_rate": 5.816105247939859e-06, "loss": 0.3746, "step": 1341, "teacher_loss": 0.32772839069366455 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.5678620934486389, "learning_rate": 5.820442388318635e-06, "loss": 0.3341, "step": 1342, "teacher_loss": 0.3081613779067993 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.4068654179573059, "learning_rate": 5.824779528697412e-06, "loss": 0.2339, "step": 1343, "teacher_loss": 0.21470209956169128 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.5724692344665527, "learning_rate": 5.829116669076189e-06, "loss": 0.3003, "step": 1344, "teacher_loss": 0.27004462480545044 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.685820460319519, "learning_rate": 5.833453809454966e-06, "loss": 0.2911, "step": 1345, "teacher_loss": 0.2472207099199295 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.22002211213111877, "learning_rate": 5.837790949833743e-06, "loss": 0.1884, "step": 1346, "teacher_loss": 0.18486765027046204 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.2986704111099243, "learning_rate": 5.84212809021252e-06, "loss": 0.2203, "step": 1347, "teacher_loss": 0.21157175302505493 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.554895281791687, "learning_rate": 5.846465230591297e-06, "loss": 0.2153, "step": 1348, "teacher_loss": 0.17756152153015137 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.15596388280391693, "learning_rate": 5.8508023709700745e-06, "loss": 0.1756, "step": 1349, "teacher_loss": 0.17774304747581482 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.15273180603981018, "learning_rate": 5.8551395113488504e-06, "loss": 0.1577, "step": 1350, "teacher_loss": 0.15828032791614532 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.20243240892887115, "learning_rate": 5.859476651727628e-06, "loss": 0.1632, "step": 1351, "teacher_loss": 0.1588045060634613 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.19009071588516235, "learning_rate": 5.863813792106405e-06, "loss": 0.2208, "step": 1352, "teacher_loss": 0.22426247596740723 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.43891090154647827, "learning_rate": 5.868150932485182e-06, "loss": 0.4042, "step": 1353, "teacher_loss": 0.40037986636161804 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.4940303564071655, "learning_rate": 5.872488072863958e-06, "loss": 0.2474, "step": 1354, "teacher_loss": 0.21997055411338806 }, { "compression_loss": 0.0, "epoch": 0.24, "label_loss": 0.6147751808166504, "learning_rate": 5.876825213242735e-06, "loss": 0.2412, "step": 1355, "teacher_loss": 0.19964075088500977 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.4485546946525574, "learning_rate": 5.881162353621512e-06, "loss": 0.2457, "step": 1356, "teacher_loss": 0.22310753166675568 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.6731774806976318, "learning_rate": 5.8854994940002896e-06, "loss": 0.285, "step": 1357, "teacher_loss": 0.24181930720806122 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.7686455845832825, "learning_rate": 5.889836634379066e-06, "loss": 0.3114, "step": 1358, "teacher_loss": 0.26064521074295044 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 1.2206658124923706, "learning_rate": 5.894173774757843e-06, "loss": 0.4777, "step": 1359, "teacher_loss": 0.3951765298843384 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.3752766251564026, "learning_rate": 5.898510915136621e-06, "loss": 0.378, "step": 1360, "teacher_loss": 0.3783051073551178 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.48867619037628174, "learning_rate": 5.902848055515397e-06, "loss": 0.3041, "step": 1361, "teacher_loss": 0.28364354372024536 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.8336423635482788, "learning_rate": 5.907185195894174e-06, "loss": 0.306, "step": 1362, "teacher_loss": 0.2473602145910263 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.4570959508419037, "learning_rate": 5.91152233627295e-06, "loss": 0.3004, "step": 1363, "teacher_loss": 0.282977819442749 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.31784719228744507, "learning_rate": 5.915859476651728e-06, "loss": 0.2566, "step": 1364, "teacher_loss": 0.24979303777217865 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.7443061470985413, "learning_rate": 5.920196617030505e-06, "loss": 0.2679, "step": 1365, "teacher_loss": 0.21493223309516907 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.2886722683906555, "learning_rate": 5.924533757409281e-06, "loss": 0.3029, "step": 1366, "teacher_loss": 0.3044867515563965 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.3999277949333191, "learning_rate": 5.928870897788058e-06, "loss": 0.226, "step": 1367, "teacher_loss": 0.2066381871700287 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.6261770725250244, "learning_rate": 5.933208038166836e-06, "loss": 0.383, "step": 1368, "teacher_loss": 0.35600489377975464 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.5161714553833008, "learning_rate": 5.9375451785456126e-06, "loss": 0.3483, "step": 1369, "teacher_loss": 0.3296029269695282 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.41568416357040405, "learning_rate": 5.941882318924389e-06, "loss": 0.245, "step": 1370, "teacher_loss": 0.22598525881767273 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.7609728574752808, "learning_rate": 5.946219459303167e-06, "loss": 0.3333, "step": 1371, "teacher_loss": 0.28575897216796875 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.2975355386734009, "learning_rate": 5.950556599681943e-06, "loss": 0.1979, "step": 1372, "teacher_loss": 0.18677443265914917 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.45459914207458496, "learning_rate": 5.9548937400607205e-06, "loss": 0.2376, "step": 1373, "teacher_loss": 0.21353699266910553 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.6039212346076965, "learning_rate": 5.9592308804394964e-06, "loss": 0.399, "step": 1374, "teacher_loss": 0.37624669075012207 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.42793986201286316, "learning_rate": 5.963568020818274e-06, "loss": 0.2062, "step": 1375, "teacher_loss": 0.18154001235961914 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.41243982315063477, "learning_rate": 5.967905161197051e-06, "loss": 0.2315, "step": 1376, "teacher_loss": 0.21144677698612213 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.30396029353141785, "learning_rate": 5.972242301575828e-06, "loss": 0.2223, "step": 1377, "teacher_loss": 0.21323281526565552 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.5517523288726807, "learning_rate": 5.976579441954604e-06, "loss": 0.2223, "step": 1378, "teacher_loss": 0.1857045292854309 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.39856451749801636, "learning_rate": 5.980916582333382e-06, "loss": 0.26, "step": 1379, "teacher_loss": 0.2445628046989441 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.2551502585411072, "learning_rate": 5.985253722712159e-06, "loss": 0.2171, "step": 1380, "teacher_loss": 0.21285447478294373 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.40075159072875977, "learning_rate": 5.9895908630909356e-06, "loss": 0.2789, "step": 1381, "teacher_loss": 0.26538708806037903 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.15860755741596222, "learning_rate": 5.993928003469713e-06, "loss": 0.1825, "step": 1382, "teacher_loss": 0.1851159930229187 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.22349731624126434, "learning_rate": 5.998265143848489e-06, "loss": 0.2449, "step": 1383, "teacher_loss": 0.2472614198923111 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.9152520895004272, "learning_rate": 6.002602284227267e-06, "loss": 0.4403, "step": 1384, "teacher_loss": 0.38753193616867065 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.2609570026397705, "learning_rate": 6.006939424606043e-06, "loss": 0.244, "step": 1385, "teacher_loss": 0.2420898824930191 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.253960520029068, "learning_rate": 6.01127656498482e-06, "loss": 0.275, "step": 1386, "teacher_loss": 0.27731263637542725 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.4052412211894989, "learning_rate": 6.015613705363597e-06, "loss": 0.2549, "step": 1387, "teacher_loss": 0.23824575543403625 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.5720700621604919, "learning_rate": 6.019950845742374e-06, "loss": 0.2996, "step": 1388, "teacher_loss": 0.2693380117416382 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.4965176582336426, "learning_rate": 6.024287986121151e-06, "loss": 0.3023, "step": 1389, "teacher_loss": 0.2807462513446808 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.8628653287887573, "learning_rate": 6.028625126499928e-06, "loss": 0.2695, "step": 1390, "teacher_loss": 0.20353132486343384 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.4072635769844055, "learning_rate": 6.032962266878705e-06, "loss": 0.3392, "step": 1391, "teacher_loss": 0.33163323998451233 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.6146756410598755, "learning_rate": 6.037299407257482e-06, "loss": 0.2608, "step": 1392, "teacher_loss": 0.22149290144443512 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.21091735363006592, "learning_rate": 6.0416365476362585e-06, "loss": 0.2156, "step": 1393, "teacher_loss": 0.21609428524971008 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.43321549892425537, "learning_rate": 6.045973688015035e-06, "loss": 0.2462, "step": 1394, "teacher_loss": 0.22545374929904938 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.4395361542701721, "learning_rate": 6.050310828393813e-06, "loss": 0.4239, "step": 1395, "teacher_loss": 0.42217665910720825 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.3347514271736145, "learning_rate": 6.054647968772589e-06, "loss": 0.2103, "step": 1396, "teacher_loss": 0.19650761783123016 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.29272395372390747, "learning_rate": 6.0589851091513665e-06, "loss": 0.244, "step": 1397, "teacher_loss": 0.23856772482395172 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.3474947512149811, "learning_rate": 6.063322249530143e-06, "loss": 0.1883, "step": 1398, "teacher_loss": 0.1706659495830536 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.39376091957092285, "learning_rate": 6.06765938990892e-06, "loss": 0.2387, "step": 1399, "teacher_loss": 0.22142837941646576 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.1845490038394928, "learning_rate": 6.071996530287697e-06, "loss": 0.2102, "step": 1400, "teacher_loss": 0.2130938470363617 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.24443325400352478, "learning_rate": 6.0763336706664744e-06, "loss": 0.1888, "step": 1401, "teacher_loss": 0.18261878192424774 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.4610665440559387, "learning_rate": 6.080670811045251e-06, "loss": 0.2678, "step": 1402, "teacher_loss": 0.2463333010673523 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.2577584385871887, "learning_rate": 6.085007951424028e-06, "loss": 0.1618, "step": 1403, "teacher_loss": 0.1511085033416748 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.7570144534111023, "learning_rate": 6.089345091802805e-06, "loss": 0.4051, "step": 1404, "teacher_loss": 0.36602944135665894 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.5082919001579285, "learning_rate": 6.0936822321815815e-06, "loss": 0.3033, "step": 1405, "teacher_loss": 0.28048452734947205 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.3867013156414032, "learning_rate": 6.098019372560359e-06, "loss": 0.3065, "step": 1406, "teacher_loss": 0.29753541946411133 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.42510563135147095, "learning_rate": 6.102356512939135e-06, "loss": 0.2138, "step": 1407, "teacher_loss": 0.1903550922870636 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.4607405662536621, "learning_rate": 6.106693653317913e-06, "loss": 0.2426, "step": 1408, "teacher_loss": 0.21835315227508545 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.5637032985687256, "learning_rate": 6.1110307936966895e-06, "loss": 0.2444, "step": 1409, "teacher_loss": 0.20892012119293213 }, { "compression_loss": 0.0, "epoch": 0.25, "label_loss": 0.41851964592933655, "learning_rate": 6.115367934075466e-06, "loss": 0.3185, "step": 1410, "teacher_loss": 0.3073967695236206 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.4464087188243866, "learning_rate": 6.119705074454243e-06, "loss": 0.2377, "step": 1411, "teacher_loss": 0.21449559926986694 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.35517561435699463, "learning_rate": 6.12404221483302e-06, "loss": 0.2409, "step": 1412, "teacher_loss": 0.22819578647613525 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.5694050788879395, "learning_rate": 6.1283793552117974e-06, "loss": 0.3702, "step": 1413, "teacher_loss": 0.34801793098449707 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.9597104787826538, "learning_rate": 6.132716495590574e-06, "loss": 0.2499, "step": 1414, "teacher_loss": 0.17097695171833038 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.34097692370414734, "learning_rate": 6.137053635969351e-06, "loss": 0.2533, "step": 1415, "teacher_loss": 0.24356834590435028 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 1.5129200220108032, "learning_rate": 6.141390776348128e-06, "loss": 0.3439, "step": 1416, "teacher_loss": 0.21406057476997375 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.4174276888370514, "learning_rate": 6.145727916726905e-06, "loss": 0.2405, "step": 1417, "teacher_loss": 0.2208547592163086 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.5416680574417114, "learning_rate": 6.150065057105681e-06, "loss": 0.2827, "step": 1418, "teacher_loss": 0.25397396087646484 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.6673641204833984, "learning_rate": 6.154402197484459e-06, "loss": 0.3037, "step": 1419, "teacher_loss": 0.2632533311843872 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.08588457852602005, "learning_rate": 6.158739337863236e-06, "loss": 0.1625, "step": 1420, "teacher_loss": 0.17100778222084045 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.8911499977111816, "learning_rate": 6.1630764782420125e-06, "loss": 0.3628, "step": 1421, "teacher_loss": 0.30414801836013794 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.6807798147201538, "learning_rate": 6.167413618620789e-06, "loss": 0.2802, "step": 1422, "teacher_loss": 0.23572000861167908 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.5705142021179199, "learning_rate": 6.171750758999566e-06, "loss": 0.3701, "step": 1423, "teacher_loss": 0.34780365228652954 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.42614126205444336, "learning_rate": 6.176087899378344e-06, "loss": 0.2305, "step": 1424, "teacher_loss": 0.20878547430038452 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.2949012815952301, "learning_rate": 6.1804250397571204e-06, "loss": 0.2294, "step": 1425, "teacher_loss": 0.2221618890762329 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.16361354291439056, "learning_rate": 6.184762180135897e-06, "loss": 0.1988, "step": 1426, "teacher_loss": 0.20274531841278076 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.5443438291549683, "learning_rate": 6.189099320514674e-06, "loss": 0.2351, "step": 1427, "teacher_loss": 0.2007354199886322 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.44500407576560974, "learning_rate": 6.193436460893452e-06, "loss": 0.336, "step": 1428, "teacher_loss": 0.32389694452285767 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.9601039886474609, "learning_rate": 6.1977736012722275e-06, "loss": 0.5154, "step": 1429, "teacher_loss": 0.465933620929718 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.2091706395149231, "learning_rate": 6.202110741651005e-06, "loss": 0.2222, "step": 1430, "teacher_loss": 0.22361059486865997 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.3767629861831665, "learning_rate": 6.206447882029782e-06, "loss": 0.247, "step": 1431, "teacher_loss": 0.2325373888015747 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.41331759095191956, "learning_rate": 6.210785022408559e-06, "loss": 0.2803, "step": 1432, "teacher_loss": 0.2655293941497803 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.5075310468673706, "learning_rate": 6.2151221627873355e-06, "loss": 0.292, "step": 1433, "teacher_loss": 0.26809728145599365 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 1.3871757984161377, "learning_rate": 6.219459303166112e-06, "loss": 0.421, "step": 1434, "teacher_loss": 0.3136047124862671 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.23288634419441223, "learning_rate": 6.22379644354489e-06, "loss": 0.2456, "step": 1435, "teacher_loss": 0.24706783890724182 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.24160626530647278, "learning_rate": 6.228133583923667e-06, "loss": 0.23, "step": 1436, "teacher_loss": 0.2286582887172699 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.47641247510910034, "learning_rate": 6.2324707243024434e-06, "loss": 0.2617, "step": 1437, "teacher_loss": 0.2377924919128418 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.2589383125305176, "learning_rate": 6.23680786468122e-06, "loss": 0.226, "step": 1438, "teacher_loss": 0.22230902314186096 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.4442060589790344, "learning_rate": 6.241145005059998e-06, "loss": 0.2889, "step": 1439, "teacher_loss": 0.2716790437698364 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.5592027902603149, "learning_rate": 6.245482145438774e-06, "loss": 0.3425, "step": 1440, "teacher_loss": 0.3184077739715576 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.44950759410858154, "learning_rate": 6.249819285817551e-06, "loss": 0.2074, "step": 1441, "teacher_loss": 0.18050439655780792 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.6417919397354126, "learning_rate": 6.254156426196327e-06, "loss": 0.2697, "step": 1442, "teacher_loss": 0.22840999066829681 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.3183668851852417, "learning_rate": 6.258493566575105e-06, "loss": 0.2171, "step": 1443, "teacher_loss": 0.2058173418045044 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.251525342464447, "learning_rate": 6.262830706953882e-06, "loss": 0.2528, "step": 1444, "teacher_loss": 0.25295931100845337 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.6930206418037415, "learning_rate": 6.2671678473326585e-06, "loss": 0.3206, "step": 1445, "teacher_loss": 0.27924779057502747 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.839125394821167, "learning_rate": 6.271504987711436e-06, "loss": 0.3106, "step": 1446, "teacher_loss": 0.25189632177352905 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.583656370639801, "learning_rate": 6.275842128090213e-06, "loss": 0.2614, "step": 1447, "teacher_loss": 0.22556567192077637 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.15846890211105347, "learning_rate": 6.28017926846899e-06, "loss": 0.2701, "step": 1448, "teacher_loss": 0.2825484871864319 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.46844011545181274, "learning_rate": 6.284516408847766e-06, "loss": 0.1997, "step": 1449, "teacher_loss": 0.16980135440826416 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.2734919786453247, "learning_rate": 6.288853549226544e-06, "loss": 0.1866, "step": 1450, "teacher_loss": 0.17698529362678528 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 1.0001075267791748, "learning_rate": 6.29319068960532e-06, "loss": 0.439, "step": 1451, "teacher_loss": 0.37663692235946655 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.5742713212966919, "learning_rate": 6.297527829984098e-06, "loss": 0.3119, "step": 1452, "teacher_loss": 0.28274089097976685 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.13663902878761292, "learning_rate": 6.3018649703628735e-06, "loss": 0.1616, "step": 1453, "teacher_loss": 0.16433680057525635 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.42710593342781067, "learning_rate": 6.306202110741651e-06, "loss": 0.1928, "step": 1454, "teacher_loss": 0.16672807931900024 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.21836897730827332, "learning_rate": 6.310539251120428e-06, "loss": 0.2175, "step": 1455, "teacher_loss": 0.21740569174289703 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.3814011514186859, "learning_rate": 6.314876391499205e-06, "loss": 0.2061, "step": 1456, "teacher_loss": 0.18667420744895935 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.3604958951473236, "learning_rate": 6.319213531877982e-06, "loss": 0.2128, "step": 1457, "teacher_loss": 0.19633695483207703 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.3024424910545349, "learning_rate": 6.323550672256759e-06, "loss": 0.2196, "step": 1458, "teacher_loss": 0.21035520732402802 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.8184963464736938, "learning_rate": 6.327887812635536e-06, "loss": 0.2672, "step": 1459, "teacher_loss": 0.20598170161247253 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.5430244207382202, "learning_rate": 6.332224953014313e-06, "loss": 0.263, "step": 1460, "teacher_loss": 0.23191845417022705 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.25123992562294006, "learning_rate": 6.33656209339309e-06, "loss": 0.2486, "step": 1461, "teacher_loss": 0.24826672673225403 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.46620747447013855, "learning_rate": 6.340899233771866e-06, "loss": 0.294, "step": 1462, "teacher_loss": 0.2748807668685913 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.2556838095188141, "learning_rate": 6.345236374150644e-06, "loss": 0.3202, "step": 1463, "teacher_loss": 0.3273867964744568 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.4673628807067871, "learning_rate": 6.34957351452942e-06, "loss": 0.3111, "step": 1464, "teacher_loss": 0.293710857629776 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.28518030047416687, "learning_rate": 6.353910654908197e-06, "loss": 0.1844, "step": 1465, "teacher_loss": 0.17314721643924713 }, { "compression_loss": 0.0, "epoch": 0.26, "label_loss": 0.2275487780570984, "learning_rate": 6.358247795286975e-06, "loss": 0.2176, "step": 1466, "teacher_loss": 0.21654535830020905 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.7746908068656921, "learning_rate": 6.362584935665751e-06, "loss": 0.308, "step": 1467, "teacher_loss": 0.256172776222229 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.33609867095947266, "learning_rate": 6.3669220760445285e-06, "loss": 0.2475, "step": 1468, "teacher_loss": 0.23766066133975983 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.42383456230163574, "learning_rate": 6.371259216423305e-06, "loss": 0.2354, "step": 1469, "teacher_loss": 0.2144349068403244 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.23633140325546265, "learning_rate": 6.375596356802082e-06, "loss": 0.2335, "step": 1470, "teacher_loss": 0.23315949738025665 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.5437577962875366, "learning_rate": 6.379933497180859e-06, "loss": 0.2238, "step": 1471, "teacher_loss": 0.18822576105594635 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.3644137978553772, "learning_rate": 6.384270637559636e-06, "loss": 0.1943, "step": 1472, "teacher_loss": 0.17536523938179016 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.7902100682258606, "learning_rate": 6.388607777938412e-06, "loss": 0.2723, "step": 1473, "teacher_loss": 0.2147947996854782 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.9066468477249146, "learning_rate": 6.39294491831719e-06, "loss": 0.3154, "step": 1474, "teacher_loss": 0.24971899390220642 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.5707660913467407, "learning_rate": 6.397282058695966e-06, "loss": 0.3259, "step": 1475, "teacher_loss": 0.2986467480659485 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.39851319789886475, "learning_rate": 6.401619199074744e-06, "loss": 0.3067, "step": 1476, "teacher_loss": 0.2964468002319336 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.45031142234802246, "learning_rate": 6.405956339453521e-06, "loss": 0.2675, "step": 1477, "teacher_loss": 0.2471812218427658 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.45350366830825806, "learning_rate": 6.410293479832297e-06, "loss": 0.2928, "step": 1478, "teacher_loss": 0.27496200799942017 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.452339768409729, "learning_rate": 6.414630620211075e-06, "loss": 0.2043, "step": 1479, "teacher_loss": 0.17673403024673462 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.37310177087783813, "learning_rate": 6.4189677605898515e-06, "loss": 0.24, "step": 1480, "teacher_loss": 0.2251579463481903 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.6626706123352051, "learning_rate": 6.423304900968628e-06, "loss": 0.3455, "step": 1481, "teacher_loss": 0.31028690934181213 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.3392796814441681, "learning_rate": 6.427642041347405e-06, "loss": 0.2424, "step": 1482, "teacher_loss": 0.23168599605560303 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.3476063311100006, "learning_rate": 6.431979181726182e-06, "loss": 0.2076, "step": 1483, "teacher_loss": 0.1920507401227951 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.3485298454761505, "learning_rate": 6.436316322104959e-06, "loss": 0.2023, "step": 1484, "teacher_loss": 0.18600186705589294 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.3874940872192383, "learning_rate": 6.440653462483736e-06, "loss": 0.2809, "step": 1485, "teacher_loss": 0.2690887153148651 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.1845911592245102, "learning_rate": 6.444990602862512e-06, "loss": 0.1195, "step": 1486, "teacher_loss": 0.11229465901851654 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.4796159863471985, "learning_rate": 6.44932774324129e-06, "loss": 0.2279, "step": 1487, "teacher_loss": 0.19996792078018188 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.5086467266082764, "learning_rate": 6.453664883620067e-06, "loss": 0.32, "step": 1488, "teacher_loss": 0.29902976751327515 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.4942467212677002, "learning_rate": 6.458002023998843e-06, "loss": 0.2338, "step": 1489, "teacher_loss": 0.2048470824956894 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.34948718547821045, "learning_rate": 6.462339164377621e-06, "loss": 0.2153, "step": 1490, "teacher_loss": 0.20037628710269928 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.5074208974838257, "learning_rate": 6.466676304756397e-06, "loss": 0.2884, "step": 1491, "teacher_loss": 0.26407018303871155 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.9427697658538818, "learning_rate": 6.4710134451351745e-06, "loss": 0.2791, "step": 1492, "teacher_loss": 0.20534226298332214 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.438088983297348, "learning_rate": 6.475350585513951e-06, "loss": 0.2172, "step": 1493, "teacher_loss": 0.19268149137496948 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.4645646810531616, "learning_rate": 6.479687725892728e-06, "loss": 0.2872, "step": 1494, "teacher_loss": 0.2675420939922333 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 1.3653159141540527, "learning_rate": 6.484024866271505e-06, "loss": 0.4513, "step": 1495, "teacher_loss": 0.3497464060783386 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.7653203010559082, "learning_rate": 6.4883620066502825e-06, "loss": 0.2472, "step": 1496, "teacher_loss": 0.18967586755752563 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.21197505295276642, "learning_rate": 6.492699147029058e-06, "loss": 0.2276, "step": 1497, "teacher_loss": 0.2293054312467575 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.26606351137161255, "learning_rate": 6.497036287407836e-06, "loss": 0.1999, "step": 1498, "teacher_loss": 0.19250395894050598 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.5619463920593262, "learning_rate": 6.501373427786614e-06, "loss": 0.2635, "step": 1499, "teacher_loss": 0.2303701937198639 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.566064178943634, "learning_rate": 6.5057105681653896e-06, "loss": 0.4351, "step": 1500, "teacher_loss": 0.4205273687839508 }, { "epoch": 0.27, "eval_exact_match": 79.5837275307474, "eval_f1": 87.06577693357859, "step": 1500 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.6536146402359009, "learning_rate": 6.510047708544167e-06, "loss": 0.3145, "step": 1501, "teacher_loss": 0.27684885263442993 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.6563832759857178, "learning_rate": 6.514384848922943e-06, "loss": 0.3079, "step": 1502, "teacher_loss": 0.2692255973815918 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.5887653827667236, "learning_rate": 6.518721989301721e-06, "loss": 0.3174, "step": 1503, "teacher_loss": 0.28729528188705444 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.31192320585250854, "learning_rate": 6.5230591296804975e-06, "loss": 0.196, "step": 1504, "teacher_loss": 0.18310143053531647 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.4568164050579071, "learning_rate": 6.527396270059274e-06, "loss": 0.2577, "step": 1505, "teacher_loss": 0.2356141209602356 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.853651225566864, "learning_rate": 6.531733410438051e-06, "loss": 0.3065, "step": 1506, "teacher_loss": 0.24565467238426208 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.19764959812164307, "learning_rate": 6.536070550816829e-06, "loss": 0.1998, "step": 1507, "teacher_loss": 0.20005784928798676 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.3866952657699585, "learning_rate": 6.540407691195605e-06, "loss": 0.239, "step": 1508, "teacher_loss": 0.22258563339710236 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.3077249228954315, "learning_rate": 6.544744831574382e-06, "loss": 0.3707, "step": 1509, "teacher_loss": 0.3777162432670593 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.7031110525131226, "learning_rate": 6.54908197195316e-06, "loss": 0.2954, "step": 1510, "teacher_loss": 0.25006282329559326 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.469660222530365, "learning_rate": 6.553419112331936e-06, "loss": 0.2713, "step": 1511, "teacher_loss": 0.24926158785820007 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.44997936487197876, "learning_rate": 6.557756252710713e-06, "loss": 0.2722, "step": 1512, "teacher_loss": 0.2523934245109558 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.1950952410697937, "learning_rate": 6.562093393089489e-06, "loss": 0.204, "step": 1513, "teacher_loss": 0.20494115352630615 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.4545411467552185, "learning_rate": 6.566430533468267e-06, "loss": 0.2405, "step": 1514, "teacher_loss": 0.2167295664548874 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.2769771218299866, "learning_rate": 6.570767673847044e-06, "loss": 0.2203, "step": 1515, "teacher_loss": 0.21395191550254822 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.6572542190551758, "learning_rate": 6.5751048142258205e-06, "loss": 0.3388, "step": 1516, "teacher_loss": 0.30342453718185425 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.20394620299339294, "learning_rate": 6.579441954604597e-06, "loss": 0.2197, "step": 1517, "teacher_loss": 0.22148063778877258 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.5273770689964294, "learning_rate": 6.583779094983375e-06, "loss": 0.2951, "step": 1518, "teacher_loss": 0.26924821734428406 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.18105781078338623, "learning_rate": 6.588116235362151e-06, "loss": 0.1567, "step": 1519, "teacher_loss": 0.15399745106697083 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.37027502059936523, "learning_rate": 6.5924533757409285e-06, "loss": 0.2491, "step": 1520, "teacher_loss": 0.23565515875816345 }, { "compression_loss": 0.0, "epoch": 0.27, "label_loss": 0.5778634548187256, "learning_rate": 6.596790516119705e-06, "loss": 0.2677, "step": 1521, "teacher_loss": 0.23321810364723206 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.48173433542251587, "learning_rate": 6.601127656498482e-06, "loss": 0.3414, "step": 1522, "teacher_loss": 0.3257637619972229 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.28270217776298523, "learning_rate": 6.60546479687726e-06, "loss": 0.2089, "step": 1523, "teacher_loss": 0.20072519779205322 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.7699395418167114, "learning_rate": 6.6098019372560356e-06, "loss": 0.6166, "step": 1524, "teacher_loss": 0.5995362997055054 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.354941189289093, "learning_rate": 6.614139077634813e-06, "loss": 0.2355, "step": 1525, "teacher_loss": 0.222259059548378 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.36794978380203247, "learning_rate": 6.61847621801359e-06, "loss": 0.2587, "step": 1526, "teacher_loss": 0.24660485982894897 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.2065725028514862, "learning_rate": 6.622813358392367e-06, "loss": 0.2204, "step": 1527, "teacher_loss": 0.22196698188781738 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4842984080314636, "learning_rate": 6.6271504987711435e-06, "loss": 0.2904, "step": 1528, "teacher_loss": 0.2688485383987427 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4599143862724304, "learning_rate": 6.631487639149921e-06, "loss": 0.2323, "step": 1529, "teacher_loss": 0.20698490738868713 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.3056512475013733, "learning_rate": 6.635824779528697e-06, "loss": 0.1645, "step": 1530, "teacher_loss": 0.14879915118217468 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.14455418288707733, "learning_rate": 6.640161919907475e-06, "loss": 0.2401, "step": 1531, "teacher_loss": 0.2507137656211853 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4103032350540161, "learning_rate": 6.6444990602862515e-06, "loss": 0.2954, "step": 1532, "teacher_loss": 0.28265517950057983 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.14162828028202057, "learning_rate": 6.648836200665028e-06, "loss": 0.1634, "step": 1533, "teacher_loss": 0.16582491993904114 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.44356968998908997, "learning_rate": 6.653173341043806e-06, "loss": 0.3179, "step": 1534, "teacher_loss": 0.30391383171081543 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 1.1816470623016357, "learning_rate": 6.657510481422582e-06, "loss": 0.3503, "step": 1535, "teacher_loss": 0.25797995924949646 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4879845380783081, "learning_rate": 6.661847621801359e-06, "loss": 0.2168, "step": 1536, "teacher_loss": 0.18672238290309906 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4371688961982727, "learning_rate": 6.666184762180136e-06, "loss": 0.2967, "step": 1537, "teacher_loss": 0.2811279296875 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.5191504955291748, "learning_rate": 6.670521902558913e-06, "loss": 0.3176, "step": 1538, "teacher_loss": 0.29522716999053955 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4230346977710724, "learning_rate": 6.67485904293769e-06, "loss": 0.2532, "step": 1539, "teacher_loss": 0.2343805879354477 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.3335103690624237, "learning_rate": 6.6791961833164665e-06, "loss": 0.1938, "step": 1540, "teacher_loss": 0.1782270222902298 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.5602671504020691, "learning_rate": 6.683533323695243e-06, "loss": 0.2814, "step": 1541, "teacher_loss": 0.25040560960769653 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.2900892198085785, "learning_rate": 6.687870464074021e-06, "loss": 0.2469, "step": 1542, "teacher_loss": 0.24214991927146912 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.3640191853046417, "learning_rate": 6.692207604452798e-06, "loss": 0.3305, "step": 1543, "teacher_loss": 0.3268120288848877 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.28090301156044006, "learning_rate": 6.6965447448315744e-06, "loss": 0.2424, "step": 1544, "teacher_loss": 0.2381608486175537 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.3154433071613312, "learning_rate": 6.700881885210352e-06, "loss": 0.3793, "step": 1545, "teacher_loss": 0.3864471912384033 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.6108459234237671, "learning_rate": 6.705219025589128e-06, "loss": 0.3181, "step": 1546, "teacher_loss": 0.2855387330055237 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4527820944786072, "learning_rate": 6.709556165967906e-06, "loss": 0.24, "step": 1547, "teacher_loss": 0.21638791263103485 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.3557090163230896, "learning_rate": 6.713893306346682e-06, "loss": 0.215, "step": 1548, "teacher_loss": 0.19931426644325256 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.18618391454219818, "learning_rate": 6.718230446725459e-06, "loss": 0.1967, "step": 1549, "teacher_loss": 0.1978602111339569 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.2845844030380249, "learning_rate": 6.722567587104236e-06, "loss": 0.3817, "step": 1550, "teacher_loss": 0.3925043046474457 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4687957167625427, "learning_rate": 6.726904727483013e-06, "loss": 0.2492, "step": 1551, "teacher_loss": 0.22483769059181213 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4031181037425995, "learning_rate": 6.7312418678617895e-06, "loss": 0.2792, "step": 1552, "teacher_loss": 0.2654242217540741 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.635313868522644, "learning_rate": 6.735579008240567e-06, "loss": 0.2866, "step": 1553, "teacher_loss": 0.24783284962177277 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.5238542556762695, "learning_rate": 6.739916148619344e-06, "loss": 0.252, "step": 1554, "teacher_loss": 0.2218276858329773 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4486095905303955, "learning_rate": 6.744253288998121e-06, "loss": 0.2399, "step": 1555, "teacher_loss": 0.21673499047756195 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.48756474256515503, "learning_rate": 6.748590429376898e-06, "loss": 0.375, "step": 1556, "teacher_loss": 0.36249426007270813 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.10583238303661346, "learning_rate": 6.752927569755674e-06, "loss": 0.1572, "step": 1557, "teacher_loss": 0.16290950775146484 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.5338687300682068, "learning_rate": 6.757264710134452e-06, "loss": 0.2741, "step": 1558, "teacher_loss": 0.24521172046661377 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.7148044109344482, "learning_rate": 6.761601850513229e-06, "loss": 0.3346, "step": 1559, "teacher_loss": 0.2923468351364136 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.3636273741722107, "learning_rate": 6.765938990892005e-06, "loss": 0.2747, "step": 1560, "teacher_loss": 0.2648392915725708 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4297388792037964, "learning_rate": 6.770276131270782e-06, "loss": 0.2728, "step": 1561, "teacher_loss": 0.2554115653038025 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 1.492283582687378, "learning_rate": 6.774613271649559e-06, "loss": 0.6542, "step": 1562, "teacher_loss": 0.5610284209251404 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.47709864377975464, "learning_rate": 6.778950412028336e-06, "loss": 0.251, "step": 1563, "teacher_loss": 0.2258908897638321 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.24776878952980042, "learning_rate": 6.783287552407113e-06, "loss": 0.193, "step": 1564, "teacher_loss": 0.18690750002861023 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.7163398265838623, "learning_rate": 6.78762469278589e-06, "loss": 0.3204, "step": 1565, "teacher_loss": 0.27636831998825073 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.2928646206855774, "learning_rate": 6.791961833164667e-06, "loss": 0.1992, "step": 1566, "teacher_loss": 0.18874001502990723 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.3436906933784485, "learning_rate": 6.7962989735434445e-06, "loss": 0.2255, "step": 1567, "teacher_loss": 0.21235454082489014 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.2857658863067627, "learning_rate": 6.8006361139222204e-06, "loss": 0.2658, "step": 1568, "teacher_loss": 0.2636314928531647 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.30223679542541504, "learning_rate": 6.804973254300998e-06, "loss": 0.2035, "step": 1569, "teacher_loss": 0.19252745807170868 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.2982024550437927, "learning_rate": 6.809310394679774e-06, "loss": 0.2603, "step": 1570, "teacher_loss": 0.25607356429100037 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.5866947770118713, "learning_rate": 6.813647535058552e-06, "loss": 0.2633, "step": 1571, "teacher_loss": 0.2273411899805069 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4968215823173523, "learning_rate": 6.817984675437328e-06, "loss": 0.2707, "step": 1572, "teacher_loss": 0.24559777975082397 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.6647064089775085, "learning_rate": 6.822321815816105e-06, "loss": 0.4016, "step": 1573, "teacher_loss": 0.3723995089530945 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.686487078666687, "learning_rate": 6.826658956194882e-06, "loss": 0.284, "step": 1574, "teacher_loss": 0.23932772874832153 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.8935974836349487, "learning_rate": 6.8309960965736596e-06, "loss": 0.2883, "step": 1575, "teacher_loss": 0.2210836112499237 }, { "compression_loss": 0.0, "epoch": 0.28, "label_loss": 0.4665301740169525, "learning_rate": 6.835333236952436e-06, "loss": 0.3239, "step": 1576, "teacher_loss": 0.3080606162548065 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.19298695027828217, "learning_rate": 6.839670377331213e-06, "loss": 0.1892, "step": 1577, "teacher_loss": 0.18879903852939606 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.8310211300849915, "learning_rate": 6.844007517709991e-06, "loss": 0.2559, "step": 1578, "teacher_loss": 0.19204741716384888 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6331437230110168, "learning_rate": 6.848344658088767e-06, "loss": 0.2812, "step": 1579, "teacher_loss": 0.24213330447673798 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6423879265785217, "learning_rate": 6.852681798467544e-06, "loss": 0.303, "step": 1580, "teacher_loss": 0.2652893364429474 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.4978925585746765, "learning_rate": 6.85701893884632e-06, "loss": 0.3412, "step": 1581, "teacher_loss": 0.32374048233032227 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.697215735912323, "learning_rate": 6.861356079225098e-06, "loss": 0.3246, "step": 1582, "teacher_loss": 0.2831571698188782 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.543228030204773, "learning_rate": 6.865693219603875e-06, "loss": 0.2785, "step": 1583, "teacher_loss": 0.24907484650611877 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.3739100694656372, "learning_rate": 6.870030359982651e-06, "loss": 0.3501, "step": 1584, "teacher_loss": 0.34750163555145264 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.5138339996337891, "learning_rate": 6.874367500361428e-06, "loss": 0.2365, "step": 1585, "teacher_loss": 0.20568975806236267 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.26184117794036865, "learning_rate": 6.878704640740206e-06, "loss": 0.2669, "step": 1586, "teacher_loss": 0.2674316465854645 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6729931235313416, "learning_rate": 6.8830417811189826e-06, "loss": 0.2537, "step": 1587, "teacher_loss": 0.20705857872962952 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.4510330259799957, "learning_rate": 6.887378921497759e-06, "loss": 0.2922, "step": 1588, "teacher_loss": 0.2745053768157959 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6315981149673462, "learning_rate": 6.891716061876536e-06, "loss": 0.3575, "step": 1589, "teacher_loss": 0.32701215147972107 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.2609737813472748, "learning_rate": 6.896053202255313e-06, "loss": 0.2111, "step": 1590, "teacher_loss": 0.2055622637271881 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.7260409593582153, "learning_rate": 6.9003903426340905e-06, "loss": 0.2536, "step": 1591, "teacher_loss": 0.20115147531032562 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6837571859359741, "learning_rate": 6.9047274830128664e-06, "loss": 0.3027, "step": 1592, "teacher_loss": 0.26033419370651245 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.3227293789386749, "learning_rate": 6.909064623391644e-06, "loss": 0.1969, "step": 1593, "teacher_loss": 0.1829744279384613 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6455123424530029, "learning_rate": 6.913401763770421e-06, "loss": 0.283, "step": 1594, "teacher_loss": 0.24272695183753967 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6461910009384155, "learning_rate": 6.917738904149198e-06, "loss": 0.2789, "step": 1595, "teacher_loss": 0.23814070224761963 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.2895563244819641, "learning_rate": 6.922076044527974e-06, "loss": 0.2847, "step": 1596, "teacher_loss": 0.28418421745300293 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.22185850143432617, "learning_rate": 6.926413184906752e-06, "loss": 0.2692, "step": 1597, "teacher_loss": 0.27450883388519287 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.4758252501487732, "learning_rate": 6.930750325285529e-06, "loss": 0.226, "step": 1598, "teacher_loss": 0.19828465580940247 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.1545831710100174, "learning_rate": 6.9350874656643055e-06, "loss": 0.2554, "step": 1599, "teacher_loss": 0.26658469438552856 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.27799665927886963, "learning_rate": 6.939424606043082e-06, "loss": 0.2782, "step": 1600, "teacher_loss": 0.27825433015823364 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.3682354688644409, "learning_rate": 6.943761746421859e-06, "loss": 0.2725, "step": 1601, "teacher_loss": 0.26190778613090515 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.36152321100234985, "learning_rate": 6.948098886800637e-06, "loss": 0.2341, "step": 1602, "teacher_loss": 0.21999385952949524 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.23636780679225922, "learning_rate": 6.952436027179413e-06, "loss": 0.193, "step": 1603, "teacher_loss": 0.18817946314811707 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.24163982272148132, "learning_rate": 6.95677316755819e-06, "loss": 0.226, "step": 1604, "teacher_loss": 0.22429007291793823 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.18795496225357056, "learning_rate": 6.961110307936967e-06, "loss": 0.1735, "step": 1605, "teacher_loss": 0.17191702127456665 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.3355136513710022, "learning_rate": 6.965447448315744e-06, "loss": 0.2183, "step": 1606, "teacher_loss": 0.20526964962482452 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.9709417819976807, "learning_rate": 6.969784588694521e-06, "loss": 0.3519, "step": 1607, "teacher_loss": 0.28310951590538025 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6504513621330261, "learning_rate": 6.974121729073298e-06, "loss": 0.3362, "step": 1608, "teacher_loss": 0.3012961745262146 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.30171966552734375, "learning_rate": 6.978458869452075e-06, "loss": 0.232, "step": 1609, "teacher_loss": 0.2242894470691681 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.41263431310653687, "learning_rate": 6.982796009830852e-06, "loss": 0.1743, "step": 1610, "teacher_loss": 0.1478133201599121 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.7839899063110352, "learning_rate": 6.9871331502096285e-06, "loss": 0.2454, "step": 1611, "teacher_loss": 0.18555572628974915 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.5709105730056763, "learning_rate": 6.991470290588405e-06, "loss": 0.2441, "step": 1612, "teacher_loss": 0.20782436430454254 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.5652528405189514, "learning_rate": 6.995807430967183e-06, "loss": 0.3085, "step": 1613, "teacher_loss": 0.27997809648513794 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.4097408652305603, "learning_rate": 7.000144571345959e-06, "loss": 0.2704, "step": 1614, "teacher_loss": 0.2549644708633423 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.20464831590652466, "learning_rate": 7.0044817117247365e-06, "loss": 0.1807, "step": 1615, "teacher_loss": 0.1780604124069214 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.42826512455940247, "learning_rate": 7.008818852103513e-06, "loss": 0.2685, "step": 1616, "teacher_loss": 0.2507718801498413 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.42512720823287964, "learning_rate": 7.01315599248229e-06, "loss": 0.194, "step": 1617, "teacher_loss": 0.16826513409614563 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6167625784873962, "learning_rate": 7.017493132861068e-06, "loss": 0.2766, "step": 1618, "teacher_loss": 0.23885196447372437 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.8156715631484985, "learning_rate": 7.021830273239844e-06, "loss": 0.2435, "step": 1619, "teacher_loss": 0.17993509769439697 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.7376936078071594, "learning_rate": 7.026167413618621e-06, "loss": 0.2779, "step": 1620, "teacher_loss": 0.22675897181034088 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.9738141298294067, "learning_rate": 7.030504553997398e-06, "loss": 0.7924, "step": 1621, "teacher_loss": 0.7722911238670349 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.9232257604598999, "learning_rate": 7.034841694376175e-06, "loss": 0.2871, "step": 1622, "teacher_loss": 0.21642255783081055 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.18170255422592163, "learning_rate": 7.0391788347549515e-06, "loss": 0.1837, "step": 1623, "teacher_loss": 0.18388764560222626 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6453879475593567, "learning_rate": 7.043515975133729e-06, "loss": 0.4657, "step": 1624, "teacher_loss": 0.4457581639289856 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6297687292098999, "learning_rate": 7.047853115512505e-06, "loss": 0.2294, "step": 1625, "teacher_loss": 0.18488456308841705 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.44334226846694946, "learning_rate": 7.052190255891283e-06, "loss": 0.169, "step": 1626, "teacher_loss": 0.13852104544639587 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.654925525188446, "learning_rate": 7.0565273962700595e-06, "loss": 0.2909, "step": 1627, "teacher_loss": 0.2504042088985443 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.42963486909866333, "learning_rate": 7.060864536648836e-06, "loss": 0.2075, "step": 1628, "teacher_loss": 0.18282747268676758 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.5620638728141785, "learning_rate": 7.065201677027614e-06, "loss": 0.3048, "step": 1629, "teacher_loss": 0.2761821746826172 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.6134941577911377, "learning_rate": 7.06953881740639e-06, "loss": 0.5665, "step": 1630, "teacher_loss": 0.5612772107124329 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.5002784729003906, "learning_rate": 7.0738759577851674e-06, "loss": 0.262, "step": 1631, "teacher_loss": 0.23550619184970856 }, { "compression_loss": 0.0, "epoch": 0.29, "label_loss": 0.45943838357925415, "learning_rate": 7.078213098163944e-06, "loss": 0.242, "step": 1632, "teacher_loss": 0.21784795820713043 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.8223768472671509, "learning_rate": 7.082550238542721e-06, "loss": 0.3204, "step": 1633, "teacher_loss": 0.26466691493988037 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.304261177778244, "learning_rate": 7.086887378921498e-06, "loss": 0.2178, "step": 1634, "teacher_loss": 0.208164781332016 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.44871509075164795, "learning_rate": 7.091224519300275e-06, "loss": 0.5954, "step": 1635, "teacher_loss": 0.6117015480995178 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.5155469179153442, "learning_rate": 7.095561659679051e-06, "loss": 0.2616, "step": 1636, "teacher_loss": 0.23342445492744446 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.5483478307723999, "learning_rate": 7.099898800057829e-06, "loss": 0.2518, "step": 1637, "teacher_loss": 0.21886010468006134 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.29973161220550537, "learning_rate": 7.104235940436606e-06, "loss": 0.2221, "step": 1638, "teacher_loss": 0.21342161297798157 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.3664548993110657, "learning_rate": 7.1085730808153825e-06, "loss": 0.2595, "step": 1639, "teacher_loss": 0.24763160943984985 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.4747645854949951, "learning_rate": 7.11291022119416e-06, "loss": 0.2376, "step": 1640, "teacher_loss": 0.21121986210346222 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.8306114673614502, "learning_rate": 7.117247361572936e-06, "loss": 0.2725, "step": 1641, "teacher_loss": 0.21049359440803528 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.46779969334602356, "learning_rate": 7.121584501951714e-06, "loss": 0.2104, "step": 1642, "teacher_loss": 0.18180415034294128 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.23728707432746887, "learning_rate": 7.1259216423304904e-06, "loss": 0.1373, "step": 1643, "teacher_loss": 0.12619513273239136 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.8424392342567444, "learning_rate": 7.130258782709267e-06, "loss": 0.312, "step": 1644, "teacher_loss": 0.25305604934692383 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.34003889560699463, "learning_rate": 7.134595923088044e-06, "loss": 0.2074, "step": 1645, "teacher_loss": 0.19260868430137634 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.686252772808075, "learning_rate": 7.138933063466822e-06, "loss": 0.3789, "step": 1646, "teacher_loss": 0.3447898328304291 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.45493486523628235, "learning_rate": 7.1432702038455975e-06, "loss": 0.1873, "step": 1647, "teacher_loss": 0.15761220455169678 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.6702474355697632, "learning_rate": 7.147607344224375e-06, "loss": 0.3254, "step": 1648, "teacher_loss": 0.2871015667915344 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.47037121653556824, "learning_rate": 7.151944484603151e-06, "loss": 0.1999, "step": 1649, "teacher_loss": 0.16986367106437683 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.24249157309532166, "learning_rate": 7.156281624981929e-06, "loss": 0.2699, "step": 1650, "teacher_loss": 0.272937536239624 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 1.0796585083007812, "learning_rate": 7.160618765360706e-06, "loss": 0.3372, "step": 1651, "teacher_loss": 0.25466597080230713 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.5444917678833008, "learning_rate": 7.164955905739482e-06, "loss": 0.1849, "step": 1652, "teacher_loss": 0.14489763975143433 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.45087337493896484, "learning_rate": 7.16929304611826e-06, "loss": 0.2942, "step": 1653, "teacher_loss": 0.27682676911354065 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.822691023349762, "learning_rate": 7.173630186497037e-06, "loss": 0.3237, "step": 1654, "teacher_loss": 0.26830005645751953 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.5252161026000977, "learning_rate": 7.177967326875813e-06, "loss": 0.2053, "step": 1655, "teacher_loss": 0.16980135440826416 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.18071994185447693, "learning_rate": 7.18230446725459e-06, "loss": 0.2537, "step": 1656, "teacher_loss": 0.2617645263671875 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 1.085675835609436, "learning_rate": 7.186641607633368e-06, "loss": 0.3519, "step": 1657, "teacher_loss": 0.2703893184661865 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.39283737540245056, "learning_rate": 7.190978748012144e-06, "loss": 0.2596, "step": 1658, "teacher_loss": 0.24481524527072906 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.5701018571853638, "learning_rate": 7.195315888390921e-06, "loss": 0.2201, "step": 1659, "teacher_loss": 0.18121731281280518 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.24561578035354614, "learning_rate": 7.199653028769697e-06, "loss": 0.3179, "step": 1660, "teacher_loss": 0.3259660601615906 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.26406487822532654, "learning_rate": 7.203990169148475e-06, "loss": 0.2402, "step": 1661, "teacher_loss": 0.23756316304206848 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.4176293611526489, "learning_rate": 7.2083273095272525e-06, "loss": 0.2505, "step": 1662, "teacher_loss": 0.2318916618824005 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.3857450485229492, "learning_rate": 7.2126644499060285e-06, "loss": 0.2479, "step": 1663, "teacher_loss": 0.2325943112373352 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.2657570242881775, "learning_rate": 7.217001590284806e-06, "loss": 0.3175, "step": 1664, "teacher_loss": 0.32323789596557617 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.5581217408180237, "learning_rate": 7.221338730663583e-06, "loss": 0.2276, "step": 1665, "teacher_loss": 0.19089959561824799 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.49160897731781006, "learning_rate": 7.22567587104236e-06, "loss": 0.3407, "step": 1666, "teacher_loss": 0.323901891708374 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.5474512577056885, "learning_rate": 7.230013011421136e-06, "loss": 0.2225, "step": 1667, "teacher_loss": 0.1863606721162796 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.2993767559528351, "learning_rate": 7.234350151799913e-06, "loss": 0.1807, "step": 1668, "teacher_loss": 0.1674700826406479 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.37867045402526855, "learning_rate": 7.23868729217869e-06, "loss": 0.221, "step": 1669, "teacher_loss": 0.20344525575637817 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.31751367449760437, "learning_rate": 7.243024432557468e-06, "loss": 0.1783, "step": 1670, "teacher_loss": 0.16278833150863647 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.5946303606033325, "learning_rate": 7.2473615729362435e-06, "loss": 0.264, "step": 1671, "teacher_loss": 0.22730335593223572 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.5039011240005493, "learning_rate": 7.251698713315021e-06, "loss": 0.2745, "step": 1672, "teacher_loss": 0.24898861348628998 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.8543486595153809, "learning_rate": 7.256035853693799e-06, "loss": 0.3221, "step": 1673, "teacher_loss": 0.26295357942581177 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.1504620611667633, "learning_rate": 7.260372994072575e-06, "loss": 0.1629, "step": 1674, "teacher_loss": 0.16426241397857666 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.4494657516479492, "learning_rate": 7.264710134451352e-06, "loss": 0.2516, "step": 1675, "teacher_loss": 0.22959056496620178 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.461913526058197, "learning_rate": 7.269047274830129e-06, "loss": 0.2904, "step": 1676, "teacher_loss": 0.2713763117790222 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.2915355861186981, "learning_rate": 7.273384415208906e-06, "loss": 0.2472, "step": 1677, "teacher_loss": 0.24229495227336884 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.5916110277175903, "learning_rate": 7.277721555587683e-06, "loss": 0.2434, "step": 1678, "teacher_loss": 0.20476101338863373 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.3630271255970001, "learning_rate": 7.282058695966459e-06, "loss": 0.2834, "step": 1679, "teacher_loss": 0.27460354566574097 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.4084319770336151, "learning_rate": 7.286395836345236e-06, "loss": 0.2289, "step": 1680, "teacher_loss": 0.2089563012123108 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.11544451862573624, "learning_rate": 7.290732976724014e-06, "loss": 0.1416, "step": 1681, "teacher_loss": 0.14456146955490112 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.9449407458305359, "learning_rate": 7.29507011710279e-06, "loss": 0.3621, "step": 1682, "teacher_loss": 0.29732680320739746 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.33266693353652954, "learning_rate": 7.299407257481567e-06, "loss": 0.2549, "step": 1683, "teacher_loss": 0.24624398350715637 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.22451072931289673, "learning_rate": 7.303744397860345e-06, "loss": 0.1855, "step": 1684, "teacher_loss": 0.1812075823545456 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.6619007587432861, "learning_rate": 7.308081538239121e-06, "loss": 0.456, "step": 1685, "teacher_loss": 0.43316125869750977 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.30011308193206787, "learning_rate": 7.3124186786178985e-06, "loss": 0.1897, "step": 1686, "teacher_loss": 0.17746882140636444 }, { "compression_loss": 0.0, "epoch": 0.3, "label_loss": 0.1647869199514389, "learning_rate": 7.316755818996675e-06, "loss": 0.2197, "step": 1687, "teacher_loss": 0.22582116723060608 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.4102597236633301, "learning_rate": 7.321092959375452e-06, "loss": 0.2487, "step": 1688, "teacher_loss": 0.23072564601898193 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.4120871424674988, "learning_rate": 7.325430099754229e-06, "loss": 0.2338, "step": 1689, "teacher_loss": 0.21399688720703125 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 1.1790788173675537, "learning_rate": 7.329767240133006e-06, "loss": 0.3521, "step": 1690, "teacher_loss": 0.26023560762405396 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.41928374767303467, "learning_rate": 7.334104380511782e-06, "loss": 0.1827, "step": 1691, "teacher_loss": 0.15641218423843384 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.22762858867645264, "learning_rate": 7.33844152089056e-06, "loss": 0.1659, "step": 1692, "teacher_loss": 0.15905120968818665 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.2536356747150421, "learning_rate": 7.342778661269336e-06, "loss": 0.2055, "step": 1693, "teacher_loss": 0.20010483264923096 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.3064706325531006, "learning_rate": 7.3471158016481136e-06, "loss": 0.206, "step": 1694, "teacher_loss": 0.1948208063840866 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.5034622550010681, "learning_rate": 7.351452942026891e-06, "loss": 0.2401, "step": 1695, "teacher_loss": 0.21083992719650269 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.38133305311203003, "learning_rate": 7.355790082405667e-06, "loss": 0.1823, "step": 1696, "teacher_loss": 0.16017015278339386 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.5878756642341614, "learning_rate": 7.360127222784445e-06, "loss": 0.2908, "step": 1697, "teacher_loss": 0.25783663988113403 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.5000163316726685, "learning_rate": 7.364464363163221e-06, "loss": 0.2432, "step": 1698, "teacher_loss": 0.2146349400281906 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.15711775422096252, "learning_rate": 7.368801503541998e-06, "loss": 0.2729, "step": 1699, "teacher_loss": 0.285740464925766 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.2974385619163513, "learning_rate": 7.373138643920775e-06, "loss": 0.2114, "step": 1700, "teacher_loss": 0.20181187987327576 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.48172444105148315, "learning_rate": 7.377475784299552e-06, "loss": 0.3203, "step": 1701, "teacher_loss": 0.30236685276031494 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.5313643217086792, "learning_rate": 7.381812924678329e-06, "loss": 0.2297, "step": 1702, "teacher_loss": 0.19612954556941986 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.33326053619384766, "learning_rate": 7.386150065057106e-06, "loss": 0.2524, "step": 1703, "teacher_loss": 0.24336345493793488 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.548973023891449, "learning_rate": 7.390487205435882e-06, "loss": 0.2708, "step": 1704, "teacher_loss": 0.23993246257305145 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.8691943883895874, "learning_rate": 7.39482434581466e-06, "loss": 0.2918, "step": 1705, "teacher_loss": 0.22760280966758728 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.3747527599334717, "learning_rate": 7.399161486193437e-06, "loss": 0.177, "step": 1706, "teacher_loss": 0.1550162136554718 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.4190804362297058, "learning_rate": 7.403498626572213e-06, "loss": 0.2564, "step": 1707, "teacher_loss": 0.23831962049007416 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.3904881179332733, "learning_rate": 7.407835766950991e-06, "loss": 0.2544, "step": 1708, "teacher_loss": 0.239267498254776 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.8016515970230103, "learning_rate": 7.412172907329767e-06, "loss": 0.5098, "step": 1709, "teacher_loss": 0.4773510694503784 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.2168349027633667, "learning_rate": 7.4165100477085445e-06, "loss": 0.1438, "step": 1710, "teacher_loss": 0.13565593957901 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.6603478193283081, "learning_rate": 7.420847188087321e-06, "loss": 0.2855, "step": 1711, "teacher_loss": 0.24385464191436768 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.4019721746444702, "learning_rate": 7.425184328466098e-06, "loss": 0.1988, "step": 1712, "teacher_loss": 0.17621468007564545 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.44434458017349243, "learning_rate": 7.429521468844875e-06, "loss": 0.2366, "step": 1713, "teacher_loss": 0.2134873867034912 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.568021833896637, "learning_rate": 7.4338586092236525e-06, "loss": 0.2871, "step": 1714, "teacher_loss": 0.2558918297290802 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.19821667671203613, "learning_rate": 7.438195749602428e-06, "loss": 0.2059, "step": 1715, "teacher_loss": 0.20677441358566284 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.442771852016449, "learning_rate": 7.442532889981206e-06, "loss": 0.2496, "step": 1716, "teacher_loss": 0.2281550019979477 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.22311145067214966, "learning_rate": 7.446870030359983e-06, "loss": 0.2713, "step": 1717, "teacher_loss": 0.27667611837387085 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.3049648404121399, "learning_rate": 7.4512071707387596e-06, "loss": 0.2137, "step": 1718, "teacher_loss": 0.2035529762506485 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.5022507309913635, "learning_rate": 7.455544311117537e-06, "loss": 0.2142, "step": 1719, "teacher_loss": 0.18224143981933594 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.2592204213142395, "learning_rate": 7.459881451496313e-06, "loss": 0.2881, "step": 1720, "teacher_loss": 0.29136383533477783 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.2386520802974701, "learning_rate": 7.464218591875091e-06, "loss": 0.184, "step": 1721, "teacher_loss": 0.1779354214668274 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.6151055097579956, "learning_rate": 7.4685557322538675e-06, "loss": 0.2947, "step": 1722, "teacher_loss": 0.2590673565864563 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.7037807703018188, "learning_rate": 7.472892872632644e-06, "loss": 0.2442, "step": 1723, "teacher_loss": 0.19308754801750183 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.33382242918014526, "learning_rate": 7.477230013011421e-06, "loss": 0.1606, "step": 1724, "teacher_loss": 0.14139509201049805 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.8106041550636292, "learning_rate": 7.481567153390199e-06, "loss": 0.2856, "step": 1725, "teacher_loss": 0.22725045680999756 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.413419246673584, "learning_rate": 7.485904293768975e-06, "loss": 0.2293, "step": 1726, "teacher_loss": 0.20887622237205505 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.3871203064918518, "learning_rate": 7.490241434147752e-06, "loss": 0.2792, "step": 1727, "teacher_loss": 0.26722365617752075 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.32493478059768677, "learning_rate": 7.494578574526529e-06, "loss": 0.2301, "step": 1728, "teacher_loss": 0.2195383608341217 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.4425840377807617, "learning_rate": 7.498915714905306e-06, "loss": 0.2911, "step": 1729, "teacher_loss": 0.27426621317863464 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.544608473777771, "learning_rate": 7.5032528552840826e-06, "loss": 0.3097, "step": 1730, "teacher_loss": 0.28356924653053284 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.36008691787719727, "learning_rate": 7.507589995662859e-06, "loss": 0.2092, "step": 1731, "teacher_loss": 0.1923803687095642 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.4727802276611328, "learning_rate": 7.511927136041637e-06, "loss": 0.3376, "step": 1732, "teacher_loss": 0.32262080907821655 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.8734699487686157, "learning_rate": 7.516264276420415e-06, "loss": 0.3171, "step": 1733, "teacher_loss": 0.25531288981437683 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.37842100858688354, "learning_rate": 7.52060141679919e-06, "loss": 0.1875, "step": 1734, "teacher_loss": 0.16627779603004456 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.18552684783935547, "learning_rate": 7.524938557177967e-06, "loss": 0.2206, "step": 1735, "teacher_loss": 0.22451084852218628 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.4525231122970581, "learning_rate": 7.529275697556745e-06, "loss": 0.2288, "step": 1736, "teacher_loss": 0.2039850652217865 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.31749141216278076, "learning_rate": 7.533612837935522e-06, "loss": 0.2571, "step": 1737, "teacher_loss": 0.250375360250473 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.4055234491825104, "learning_rate": 7.537949978314299e-06, "loss": 0.2859, "step": 1738, "teacher_loss": 0.2725864350795746 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.3373218774795532, "learning_rate": 7.542287118693074e-06, "loss": 0.2262, "step": 1739, "teacher_loss": 0.21390564739704132 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.5933299660682678, "learning_rate": 7.546624259071852e-06, "loss": 0.3101, "step": 1740, "teacher_loss": 0.2786233425140381 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.25776606798171997, "learning_rate": 7.55096139945063e-06, "loss": 0.2206, "step": 1741, "teacher_loss": 0.216432124376297 }, { "compression_loss": 0.0, "epoch": 0.31, "label_loss": 0.5019830465316772, "learning_rate": 7.555298539829406e-06, "loss": 0.2842, "step": 1742, "teacher_loss": 0.26005327701568604 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.2376137375831604, "learning_rate": 7.559635680208182e-06, "loss": 0.1845, "step": 1743, "teacher_loss": 0.17865246534347534 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.2858850955963135, "learning_rate": 7.56397282058696e-06, "loss": 0.2729, "step": 1744, "teacher_loss": 0.2714817523956299 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.3677661716938019, "learning_rate": 7.568309960965737e-06, "loss": 0.3363, "step": 1745, "teacher_loss": 0.3328477144241333 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.81312495470047, "learning_rate": 7.572647101344514e-06, "loss": 0.2042, "step": 1746, "teacher_loss": 0.13654255867004395 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.15854638814926147, "learning_rate": 7.5769842417232894e-06, "loss": 0.1958, "step": 1747, "teacher_loss": 0.19993603229522705 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.2754839360713959, "learning_rate": 7.581321382102067e-06, "loss": 0.255, "step": 1748, "teacher_loss": 0.2527409791946411 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.6465566158294678, "learning_rate": 7.585658522480845e-06, "loss": 0.271, "step": 1749, "teacher_loss": 0.22926507890224457 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.4403068423271179, "learning_rate": 7.5899956628596214e-06, "loss": 0.2639, "step": 1750, "teacher_loss": 0.24433518946170807 }, { "epoch": 0.32, "eval_exact_match": 79.46073793755913, "eval_f1": 87.19280246592186, "step": 1750 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.5703877210617065, "learning_rate": 7.594332803238399e-06, "loss": 0.2364, "step": 1751, "teacher_loss": 0.19933678209781647 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.37232139706611633, "learning_rate": 7.598669943617175e-06, "loss": 0.249, "step": 1752, "teacher_loss": 0.2352568507194519 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.4878363609313965, "learning_rate": 7.603007083995952e-06, "loss": 0.2832, "step": 1753, "teacher_loss": 0.26045307517051697 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.26372215151786804, "learning_rate": 7.607344224374729e-06, "loss": 0.1783, "step": 1754, "teacher_loss": 0.1687929928302765 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.5693401098251343, "learning_rate": 7.611681364753507e-06, "loss": 0.2294, "step": 1755, "teacher_loss": 0.19168388843536377 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.4138832986354828, "learning_rate": 7.616018505132282e-06, "loss": 0.22, "step": 1756, "teacher_loss": 0.1984454095363617 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.3313694894313812, "learning_rate": 7.62035564551106e-06, "loss": 0.2567, "step": 1757, "teacher_loss": 0.24837106466293335 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.12037936598062515, "learning_rate": 7.6246927858898365e-06, "loss": 0.1517, "step": 1758, "teacher_loss": 0.15522542595863342 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.29638609290122986, "learning_rate": 7.629029926268614e-06, "loss": 0.2482, "step": 1759, "teacher_loss": 0.24282485246658325 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.4636688232421875, "learning_rate": 7.633367066647392e-06, "loss": 0.2456, "step": 1760, "teacher_loss": 0.22139650583267212 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.15306410193443298, "learning_rate": 7.637704207026168e-06, "loss": 0.178, "step": 1761, "teacher_loss": 0.18078546226024628 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.46510058641433716, "learning_rate": 7.642041347404944e-06, "loss": 0.2946, "step": 1762, "teacher_loss": 0.27562767267227173 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.31164616346359253, "learning_rate": 7.646378487783721e-06, "loss": 0.2655, "step": 1763, "teacher_loss": 0.2603718638420105 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.34508180618286133, "learning_rate": 7.650715628162499e-06, "loss": 0.2153, "step": 1764, "teacher_loss": 0.20088255405426025 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.43271222710609436, "learning_rate": 7.655052768541275e-06, "loss": 0.2352, "step": 1765, "teacher_loss": 0.21324174106121063 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.48831379413604736, "learning_rate": 7.659389908920052e-06, "loss": 0.2546, "step": 1766, "teacher_loss": 0.22868716716766357 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.6186795234680176, "learning_rate": 7.66372704929883e-06, "loss": 0.2912, "step": 1767, "teacher_loss": 0.25483590364456177 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.6985093355178833, "learning_rate": 7.668064189677606e-06, "loss": 0.2768, "step": 1768, "teacher_loss": 0.22995707392692566 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.5289044380187988, "learning_rate": 7.672401330056382e-06, "loss": 0.242, "step": 1769, "teacher_loss": 0.21010245382785797 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.40761590003967285, "learning_rate": 7.67673847043516e-06, "loss": 0.2587, "step": 1770, "teacher_loss": 0.24210332334041595 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.3014605939388275, "learning_rate": 7.681075610813937e-06, "loss": 0.2236, "step": 1771, "teacher_loss": 0.21492652595043182 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.13060994446277618, "learning_rate": 7.685412751192715e-06, "loss": 0.2046, "step": 1772, "teacher_loss": 0.21283230185508728 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.29701292514801025, "learning_rate": 7.68974989157149e-06, "loss": 0.2458, "step": 1773, "teacher_loss": 0.24006086587905884 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.9122977256774902, "learning_rate": 7.694087031950267e-06, "loss": 0.4663, "step": 1774, "teacher_loss": 0.4167540371417999 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.36518651247024536, "learning_rate": 7.698424172329044e-06, "loss": 0.198, "step": 1775, "teacher_loss": 0.17946107685565948 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.5476127862930298, "learning_rate": 7.702761312707822e-06, "loss": 0.2344, "step": 1776, "teacher_loss": 0.19961652159690857 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.8547636270523071, "learning_rate": 7.7070984530866e-06, "loss": 0.2813, "step": 1777, "teacher_loss": 0.21758529543876648 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.7159478664398193, "learning_rate": 7.711435593465375e-06, "loss": 0.3423, "step": 1778, "teacher_loss": 0.30079466104507446 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.5323176383972168, "learning_rate": 7.715772733844151e-06, "loss": 0.3233, "step": 1779, "teacher_loss": 0.3000541925430298 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.4683852195739746, "learning_rate": 7.720109874222929e-06, "loss": 0.2345, "step": 1780, "teacher_loss": 0.2085098922252655 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.497215211391449, "learning_rate": 7.724447014601707e-06, "loss": 0.2614, "step": 1781, "teacher_loss": 0.23515203595161438 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.47892889380455017, "learning_rate": 7.728784154980484e-06, "loss": 0.2167, "step": 1782, "teacher_loss": 0.18751531839370728 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.9846535921096802, "learning_rate": 7.73312129535926e-06, "loss": 0.2994, "step": 1783, "teacher_loss": 0.22326920926570892 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.23477408289909363, "learning_rate": 7.737458435738036e-06, "loss": 0.1814, "step": 1784, "teacher_loss": 0.1755068451166153 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.3676646947860718, "learning_rate": 7.741795576116814e-06, "loss": 0.2297, "step": 1785, "teacher_loss": 0.21437790989875793 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.5792921781539917, "learning_rate": 7.746132716495591e-06, "loss": 0.3182, "step": 1786, "teacher_loss": 0.2891741991043091 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.32727351784706116, "learning_rate": 7.750469856874367e-06, "loss": 0.2589, "step": 1787, "teacher_loss": 0.25128233432769775 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.4121859669685364, "learning_rate": 7.754806997253145e-06, "loss": 0.2848, "step": 1788, "teacher_loss": 0.2706039547920227 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.59428471326828, "learning_rate": 7.759144137631922e-06, "loss": 0.26, "step": 1789, "teacher_loss": 0.222828209400177 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.3319006562232971, "learning_rate": 7.763481278010698e-06, "loss": 0.2146, "step": 1790, "teacher_loss": 0.20156118273735046 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.649003267288208, "learning_rate": 7.767818418389474e-06, "loss": 0.2607, "step": 1791, "teacher_loss": 0.21756139397621155 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.6035705804824829, "learning_rate": 7.772155558768252e-06, "loss": 0.2995, "step": 1792, "teacher_loss": 0.2657451629638672 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.2995683550834656, "learning_rate": 7.77649269914703e-06, "loss": 0.2358, "step": 1793, "teacher_loss": 0.22875794768333435 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.6605123281478882, "learning_rate": 7.780829839525807e-06, "loss": 0.3122, "step": 1794, "teacher_loss": 0.2735482156276703 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.4632706046104431, "learning_rate": 7.785166979904583e-06, "loss": 0.3876, "step": 1795, "teacher_loss": 0.3792091906070709 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.3336637318134308, "learning_rate": 7.789504120283359e-06, "loss": 0.2412, "step": 1796, "teacher_loss": 0.2309657335281372 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.23243877291679382, "learning_rate": 7.793841260662137e-06, "loss": 0.2041, "step": 1797, "teacher_loss": 0.20097434520721436 }, { "compression_loss": 0.0, "epoch": 0.32, "label_loss": 0.4276742935180664, "learning_rate": 7.798178401040914e-06, "loss": 0.2569, "step": 1798, "teacher_loss": 0.23787957429885864 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.4468705654144287, "learning_rate": 7.802515541419692e-06, "loss": 0.2468, "step": 1799, "teacher_loss": 0.22459043562412262 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.30764341354370117, "learning_rate": 7.806852681798468e-06, "loss": 0.1724, "step": 1800, "teacher_loss": 0.15739014744758606 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.6394176483154297, "learning_rate": 7.811189822177244e-06, "loss": 0.2566, "step": 1801, "teacher_loss": 0.2140173316001892 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.4904750883579254, "learning_rate": 7.815526962556021e-06, "loss": 0.2335, "step": 1802, "teacher_loss": 0.2049984633922577 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.5518819093704224, "learning_rate": 7.819864102934799e-06, "loss": 0.2592, "step": 1803, "teacher_loss": 0.2266334891319275 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.413704514503479, "learning_rate": 7.824201243313577e-06, "loss": 0.2505, "step": 1804, "teacher_loss": 0.23231223225593567 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.13127173483371735, "learning_rate": 7.828538383692353e-06, "loss": 0.1518, "step": 1805, "teacher_loss": 0.1541162133216858 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.35204431414604187, "learning_rate": 7.832875524071128e-06, "loss": 0.1947, "step": 1806, "teacher_loss": 0.17721673846244812 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.5221048593521118, "learning_rate": 7.837212664449906e-06, "loss": 0.361, "step": 1807, "teacher_loss": 0.3430839478969574 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.45229244232177734, "learning_rate": 7.841549804828684e-06, "loss": 0.1856, "step": 1808, "teacher_loss": 0.1559458076953888 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.699129045009613, "learning_rate": 7.84588694520746e-06, "loss": 0.2985, "step": 1809, "teacher_loss": 0.25393155217170715 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.40289968252182007, "learning_rate": 7.850224085586237e-06, "loss": 0.1812, "step": 1810, "teacher_loss": 0.1565280258655548 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.3219031095504761, "learning_rate": 7.854561225965013e-06, "loss": 0.2199, "step": 1811, "teacher_loss": 0.208558589220047 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.6861417889595032, "learning_rate": 7.85889836634379e-06, "loss": 0.3468, "step": 1812, "teacher_loss": 0.3090746998786926 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.2918952703475952, "learning_rate": 7.863235506722567e-06, "loss": 0.2331, "step": 1813, "teacher_loss": 0.22654855251312256 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.39952874183654785, "learning_rate": 7.867572647101344e-06, "loss": 0.2108, "step": 1814, "teacher_loss": 0.18978875875473022 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.6358152627944946, "learning_rate": 7.871909787480122e-06, "loss": 0.245, "step": 1815, "teacher_loss": 0.20152145624160767 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.6016253232955933, "learning_rate": 7.8762469278589e-06, "loss": 0.2378, "step": 1816, "teacher_loss": 0.19731970131397247 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.5367385149002075, "learning_rate": 7.880584068237676e-06, "loss": 0.2971, "step": 1817, "teacher_loss": 0.2705267071723938 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.36606889963150024, "learning_rate": 7.884921208616451e-06, "loss": 0.1835, "step": 1818, "teacher_loss": 0.1631900668144226 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.32446256279945374, "learning_rate": 7.889258348995229e-06, "loss": 0.2417, "step": 1819, "teacher_loss": 0.23246291279792786 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.4756159484386444, "learning_rate": 7.893595489374007e-06, "loss": 0.2695, "step": 1820, "teacher_loss": 0.24654775857925415 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.7762539982795715, "learning_rate": 7.897932629752784e-06, "loss": 0.3051, "step": 1821, "teacher_loss": 0.25277841091156006 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.6988166570663452, "learning_rate": 7.902269770131559e-06, "loss": 0.2872, "step": 1822, "teacher_loss": 0.24142813682556152 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.8394314050674438, "learning_rate": 7.906606910510336e-06, "loss": 0.3168, "step": 1823, "teacher_loss": 0.2587035894393921 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.362871915102005, "learning_rate": 7.910944050889114e-06, "loss": 0.2392, "step": 1824, "teacher_loss": 0.22541531920433044 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.282943457365036, "learning_rate": 7.915281191267891e-06, "loss": 0.238, "step": 1825, "teacher_loss": 0.23297810554504395 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.5541413426399231, "learning_rate": 7.919618331646669e-06, "loss": 0.2908, "step": 1826, "teacher_loss": 0.26150596141815186 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.32107317447662354, "learning_rate": 7.923955472025445e-06, "loss": 0.1861, "step": 1827, "teacher_loss": 0.17114490270614624 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.22877249121665955, "learning_rate": 7.928292612404221e-06, "loss": 0.1852, "step": 1828, "teacher_loss": 0.18038858473300934 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.11934243142604828, "learning_rate": 7.932629752782999e-06, "loss": 0.1836, "step": 1829, "teacher_loss": 0.19077277183532715 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.2745369076728821, "learning_rate": 7.936966893161776e-06, "loss": 0.2884, "step": 1830, "teacher_loss": 0.28991490602493286 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.11852400004863739, "learning_rate": 7.941304033540552e-06, "loss": 0.2759, "step": 1831, "teacher_loss": 0.29337313771247864 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.47853589057922363, "learning_rate": 7.94564117391933e-06, "loss": 0.2427, "step": 1832, "teacher_loss": 0.21647757291793823 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.5026630759239197, "learning_rate": 7.949978314298106e-06, "loss": 0.2874, "step": 1833, "teacher_loss": 0.2634488344192505 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.21414603292942047, "learning_rate": 7.954315454676883e-06, "loss": 0.3274, "step": 1834, "teacher_loss": 0.33998072147369385 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.7230276465415955, "learning_rate": 7.95865259505566e-06, "loss": 0.3458, "step": 1835, "teacher_loss": 0.30385270714759827 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.37154334783554077, "learning_rate": 7.962989735434437e-06, "loss": 0.2456, "step": 1836, "teacher_loss": 0.23164236545562744 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.47035640478134155, "learning_rate": 7.967326875813214e-06, "loss": 0.227, "step": 1837, "teacher_loss": 0.1999903917312622 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.567501425743103, "learning_rate": 7.971664016191992e-06, "loss": 0.2413, "step": 1838, "teacher_loss": 0.20501494407653809 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.20935213565826416, "learning_rate": 7.976001156570768e-06, "loss": 0.1791, "step": 1839, "teacher_loss": 0.1757459044456482 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.37829095125198364, "learning_rate": 7.980338296949544e-06, "loss": 0.258, "step": 1840, "teacher_loss": 0.24460014700889587 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.5724002718925476, "learning_rate": 7.984675437328322e-06, "loss": 0.2893, "step": 1841, "teacher_loss": 0.2578818202018738 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.4348899722099304, "learning_rate": 7.989012577707099e-06, "loss": 0.4818, "step": 1842, "teacher_loss": 0.48706772923469543 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.2693827152252197, "learning_rate": 7.993349718085877e-06, "loss": 0.2129, "step": 1843, "teacher_loss": 0.20658424496650696 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.2475694715976715, "learning_rate": 7.997686858464651e-06, "loss": 0.2334, "step": 1844, "teacher_loss": 0.23186978697776794 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.4242836833000183, "learning_rate": 8.002023998843429e-06, "loss": 0.2774, "step": 1845, "teacher_loss": 0.2610628306865692 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.23021869361400604, "learning_rate": 8.006361139222206e-06, "loss": 0.1816, "step": 1846, "teacher_loss": 0.17618069052696228 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.22081930935382843, "learning_rate": 8.010698279600984e-06, "loss": 0.2012, "step": 1847, "teacher_loss": 0.19905588030815125 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 1.1445214748382568, "learning_rate": 8.015035419979761e-06, "loss": 0.2835, "step": 1848, "teacher_loss": 0.18778514862060547 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.4486529231071472, "learning_rate": 8.019372560358537e-06, "loss": 0.2777, "step": 1849, "teacher_loss": 0.2587205469608307 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.3795764148235321, "learning_rate": 8.023709700737313e-06, "loss": 0.2392, "step": 1850, "teacher_loss": 0.22363372147083282 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.46964168548583984, "learning_rate": 8.028046841116091e-06, "loss": 0.2773, "step": 1851, "teacher_loss": 0.25590986013412476 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.38283711671829224, "learning_rate": 8.032383981494869e-06, "loss": 0.2398, "step": 1852, "teacher_loss": 0.22394323348999023 }, { "compression_loss": 0.0, "epoch": 0.33, "label_loss": 0.3926635682582855, "learning_rate": 8.036721121873645e-06, "loss": 0.3141, "step": 1853, "teacher_loss": 0.3053417205810547 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.38047635555267334, "learning_rate": 8.041058262252422e-06, "loss": 0.2538, "step": 1854, "teacher_loss": 0.23969416320323944 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.4139835238456726, "learning_rate": 8.045395402631198e-06, "loss": 0.5321, "step": 1855, "teacher_loss": 0.5452369451522827 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.6319215297698975, "learning_rate": 8.049732543009976e-06, "loss": 0.3191, "step": 1856, "teacher_loss": 0.28429466485977173 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.43632233142852783, "learning_rate": 8.054069683388753e-06, "loss": 0.2113, "step": 1857, "teacher_loss": 0.18629267811775208 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.28551626205444336, "learning_rate": 8.05840682376753e-06, "loss": 0.2541, "step": 1858, "teacher_loss": 0.25059640407562256 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.30213332176208496, "learning_rate": 8.062743964146307e-06, "loss": 0.2116, "step": 1859, "teacher_loss": 0.20150771737098694 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.5224485397338867, "learning_rate": 8.067081104525083e-06, "loss": 0.3494, "step": 1860, "teacher_loss": 0.3301740884780884 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.4405520558357239, "learning_rate": 8.07141824490386e-06, "loss": 0.1956, "step": 1861, "teacher_loss": 0.1684151589870453 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.7682600617408752, "learning_rate": 8.075755385282636e-06, "loss": 0.3387, "step": 1862, "teacher_loss": 0.29100501537323 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.4820849597454071, "learning_rate": 8.080092525661414e-06, "loss": 0.2092, "step": 1863, "teacher_loss": 0.17890843749046326 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.5034163594245911, "learning_rate": 8.084429666040192e-06, "loss": 0.3517, "step": 1864, "teacher_loss": 0.334837943315506 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.3912999927997589, "learning_rate": 8.08876680641897e-06, "loss": 0.1784, "step": 1865, "teacher_loss": 0.154697448015213 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.47309085726737976, "learning_rate": 8.093103946797743e-06, "loss": 0.28, "step": 1866, "teacher_loss": 0.2585349977016449 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.37852251529693604, "learning_rate": 8.097441087176521e-06, "loss": 0.1871, "step": 1867, "teacher_loss": 0.16582050919532776 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.3048030734062195, "learning_rate": 8.101778227555299e-06, "loss": 0.2125, "step": 1868, "teacher_loss": 0.20228412747383118 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.33019232749938965, "learning_rate": 8.106115367934076e-06, "loss": 0.3206, "step": 1869, "teacher_loss": 0.31949031352996826 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.2795789837837219, "learning_rate": 8.110452508312854e-06, "loss": 0.2815, "step": 1870, "teacher_loss": 0.28176620602607727 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.45445406436920166, "learning_rate": 8.114789648691628e-06, "loss": 0.2391, "step": 1871, "teacher_loss": 0.21512597799301147 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.39225757122039795, "learning_rate": 8.119126789070406e-06, "loss": 0.2256, "step": 1872, "teacher_loss": 0.20707634091377258 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.5685315132141113, "learning_rate": 8.123463929449183e-06, "loss": 0.2673, "step": 1873, "teacher_loss": 0.23386883735656738 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.8287670612335205, "learning_rate": 8.127801069827961e-06, "loss": 0.2696, "step": 1874, "teacher_loss": 0.20750975608825684 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.5341501235961914, "learning_rate": 8.132138210206737e-06, "loss": 0.2647, "step": 1875, "teacher_loss": 0.23473864793777466 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.5260509252548218, "learning_rate": 8.136475350585515e-06, "loss": 0.2295, "step": 1876, "teacher_loss": 0.19658933579921722 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.24463702738285065, "learning_rate": 8.14081249096429e-06, "loss": 0.1556, "step": 1877, "teacher_loss": 0.14567025005817413 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.5799046754837036, "learning_rate": 8.145149631343068e-06, "loss": 0.2654, "step": 1878, "teacher_loss": 0.23040470480918884 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.36795681715011597, "learning_rate": 8.149486771721846e-06, "loss": 0.2787, "step": 1879, "teacher_loss": 0.26876747608184814 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.44979578256607056, "learning_rate": 8.153823912100622e-06, "loss": 0.2507, "step": 1880, "teacher_loss": 0.22854046523571014 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.3928729295730591, "learning_rate": 8.1581610524794e-06, "loss": 0.2171, "step": 1881, "teacher_loss": 0.19758939743041992 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.32123103737831116, "learning_rate": 8.162498192858175e-06, "loss": 0.2037, "step": 1882, "teacher_loss": 0.19061529636383057 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.35667696595191956, "learning_rate": 8.166835333236953e-06, "loss": 0.2124, "step": 1883, "teacher_loss": 0.1963464319705963 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.3588745594024658, "learning_rate": 8.171172473615729e-06, "loss": 0.198, "step": 1884, "teacher_loss": 0.1800839900970459 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.2592361569404602, "learning_rate": 8.175509613994506e-06, "loss": 0.1916, "step": 1885, "teacher_loss": 0.1841181218624115 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.7408689260482788, "learning_rate": 8.179846754373284e-06, "loss": 0.2605, "step": 1886, "teacher_loss": 0.20710447430610657 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.9991213083267212, "learning_rate": 8.184183894752062e-06, "loss": 0.5528, "step": 1887, "teacher_loss": 0.5031658411026001 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.6155551671981812, "learning_rate": 8.188521035130836e-06, "loss": 0.2379, "step": 1888, "teacher_loss": 0.19594156742095947 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.43069207668304443, "learning_rate": 8.192858175509614e-06, "loss": 0.2532, "step": 1889, "teacher_loss": 0.23347456753253937 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.24603256583213806, "learning_rate": 8.197195315888391e-06, "loss": 0.1704, "step": 1890, "teacher_loss": 0.1620493233203888 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.38412415981292725, "learning_rate": 8.201532456267169e-06, "loss": 0.2486, "step": 1891, "teacher_loss": 0.23351064324378967 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.22729331254959106, "learning_rate": 8.205869596645946e-06, "loss": 0.207, "step": 1892, "teacher_loss": 0.2047477513551712 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.41279059648513794, "learning_rate": 8.21020673702472e-06, "loss": 0.3292, "step": 1893, "teacher_loss": 0.31993693113327026 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.43090611696243286, "learning_rate": 8.214543877403498e-06, "loss": 0.2179, "step": 1894, "teacher_loss": 0.19422873854637146 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.40354040265083313, "learning_rate": 8.218881017782276e-06, "loss": 0.205, "step": 1895, "teacher_loss": 0.18296034634113312 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.4139193296432495, "learning_rate": 8.223218158161053e-06, "loss": 0.2601, "step": 1896, "teacher_loss": 0.2430555820465088 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.5586681365966797, "learning_rate": 8.22755529853983e-06, "loss": 0.2272, "step": 1897, "teacher_loss": 0.19037766754627228 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.11299015581607819, "learning_rate": 8.231892438918607e-06, "loss": 0.1714, "step": 1898, "teacher_loss": 0.17792829871177673 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.9816170334815979, "learning_rate": 8.236229579297383e-06, "loss": 0.2945, "step": 1899, "teacher_loss": 0.21817022562026978 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.48324888944625854, "learning_rate": 8.24056671967616e-06, "loss": 0.2699, "step": 1900, "teacher_loss": 0.24622660875320435 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.7907062768936157, "learning_rate": 8.244903860054938e-06, "loss": 0.314, "step": 1901, "teacher_loss": 0.261014461517334 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.5526193976402283, "learning_rate": 8.249241000433714e-06, "loss": 0.2596, "step": 1902, "teacher_loss": 0.2270744889974594 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.49716830253601074, "learning_rate": 8.253578140812492e-06, "loss": 0.2379, "step": 1903, "teacher_loss": 0.2091204673051834 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.5063827037811279, "learning_rate": 8.257915281191268e-06, "loss": 0.3117, "step": 1904, "teacher_loss": 0.2900693714618683 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.49774765968322754, "learning_rate": 8.262252421570045e-06, "loss": 0.2549, "step": 1905, "teacher_loss": 0.22790485620498657 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.07791583985090256, "learning_rate": 8.266589561948821e-06, "loss": 0.1852, "step": 1906, "teacher_loss": 0.19706636667251587 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.3394966721534729, "learning_rate": 8.270926702327599e-06, "loss": 0.1758, "step": 1907, "teacher_loss": 0.1576264500617981 }, { "compression_loss": 0.0, "epoch": 0.34, "label_loss": 0.14728473126888275, "learning_rate": 8.275263842706376e-06, "loss": 0.1815, "step": 1908, "teacher_loss": 0.18535292148590088 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.3011743724346161, "learning_rate": 8.279600983085152e-06, "loss": 0.2428, "step": 1909, "teacher_loss": 0.23629418015480042 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.23928838968276978, "learning_rate": 8.283938123463928e-06, "loss": 0.2582, "step": 1910, "teacher_loss": 0.2603057026863098 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.6685923933982849, "learning_rate": 8.288275263842706e-06, "loss": 0.2905, "step": 1911, "teacher_loss": 0.24852657318115234 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.2540750801563263, "learning_rate": 8.292612404221484e-06, "loss": 0.2204, "step": 1912, "teacher_loss": 0.21667227149009705 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.2171754240989685, "learning_rate": 8.296949544600261e-06, "loss": 0.2504, "step": 1913, "teacher_loss": 0.25403815507888794 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.5087028741836548, "learning_rate": 8.301286684979039e-06, "loss": 0.2456, "step": 1914, "teacher_loss": 0.2163279950618744 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.3206031620502472, "learning_rate": 8.305623825357813e-06, "loss": 0.2478, "step": 1915, "teacher_loss": 0.2397019863128662 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.3642832636833191, "learning_rate": 8.30996096573659e-06, "loss": 0.1778, "step": 1916, "teacher_loss": 0.15712422132492065 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.33998531103134155, "learning_rate": 8.314298106115368e-06, "loss": 0.2724, "step": 1917, "teacher_loss": 0.26492470502853394 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.8645210266113281, "learning_rate": 8.318635246494146e-06, "loss": 0.3185, "step": 1918, "teacher_loss": 0.25780972838401794 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.5151434540748596, "learning_rate": 8.322972386872922e-06, "loss": 0.2398, "step": 1919, "teacher_loss": 0.2092100828886032 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.7129989266395569, "learning_rate": 8.327309527251698e-06, "loss": 0.3279, "step": 1920, "teacher_loss": 0.2850641906261444 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.6337594985961914, "learning_rate": 8.331646667630475e-06, "loss": 0.2171, "step": 1921, "teacher_loss": 0.17081128060817719 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.2058110535144806, "learning_rate": 8.335983808009253e-06, "loss": 0.2572, "step": 1922, "teacher_loss": 0.26286107301712036 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.5388700366020203, "learning_rate": 8.34032094838803e-06, "loss": 0.2236, "step": 1923, "teacher_loss": 0.18857789039611816 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.4667588472366333, "learning_rate": 8.344658088766807e-06, "loss": 0.2655, "step": 1924, "teacher_loss": 0.24310317635536194 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.7177525758743286, "learning_rate": 8.348995229145584e-06, "loss": 0.3799, "step": 1925, "teacher_loss": 0.3423910140991211 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.21521690487861633, "learning_rate": 8.35333236952436e-06, "loss": 0.2158, "step": 1926, "teacher_loss": 0.2158641517162323 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.6600738763809204, "learning_rate": 8.357669509903138e-06, "loss": 0.2781, "step": 1927, "teacher_loss": 0.23561862111091614 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 1.013218641281128, "learning_rate": 8.362006650281914e-06, "loss": 0.3579, "step": 1928, "teacher_loss": 0.28506553173065186 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.5831384062767029, "learning_rate": 8.366343790660691e-06, "loss": 0.2954, "step": 1929, "teacher_loss": 0.2633988857269287 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.5085728168487549, "learning_rate": 8.370680931039469e-06, "loss": 0.2627, "step": 1930, "teacher_loss": 0.23541685938835144 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.4259492754936218, "learning_rate": 8.375018071418245e-06, "loss": 0.2922, "step": 1931, "teacher_loss": 0.2773159444332123 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.41211897134780884, "learning_rate": 8.37935521179702e-06, "loss": 0.2966, "step": 1932, "teacher_loss": 0.28374889492988586 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.4677123725414276, "learning_rate": 8.383692352175798e-06, "loss": 0.3102, "step": 1933, "teacher_loss": 0.29270684719085693 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.4804128408432007, "learning_rate": 8.388029492554576e-06, "loss": 0.2588, "step": 1934, "teacher_loss": 0.23421144485473633 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.5016388297080994, "learning_rate": 8.392366632933354e-06, "loss": 0.1815, "step": 1935, "teacher_loss": 0.14591535925865173 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.19861455261707306, "learning_rate": 8.396703773312131e-06, "loss": 0.202, "step": 1936, "teacher_loss": 0.2024187445640564 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.23110893368721008, "learning_rate": 8.401040913690905e-06, "loss": 0.1938, "step": 1937, "teacher_loss": 0.1896395981311798 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.23265612125396729, "learning_rate": 8.405378054069683e-06, "loss": 0.2173, "step": 1938, "teacher_loss": 0.21555998921394348 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.885893702507019, "learning_rate": 8.40971519444846e-06, "loss": 0.3835, "step": 1939, "teacher_loss": 0.32764244079589844 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.21131856739521027, "learning_rate": 8.414052334827238e-06, "loss": 0.1986, "step": 1940, "teacher_loss": 0.19723433256149292 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.22887814044952393, "learning_rate": 8.418389475206014e-06, "loss": 0.2178, "step": 1941, "teacher_loss": 0.21652166545391083 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.2977261245250702, "learning_rate": 8.42272661558479e-06, "loss": 0.2457, "step": 1942, "teacher_loss": 0.23992031812667847 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.49699923396110535, "learning_rate": 8.427063755963568e-06, "loss": 0.3613, "step": 1943, "teacher_loss": 0.34621569514274597 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 1.1092720031738281, "learning_rate": 8.431400896342345e-06, "loss": 0.3127, "step": 1944, "teacher_loss": 0.22424188256263733 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.6106966733932495, "learning_rate": 8.435738036721123e-06, "loss": 0.2614, "step": 1945, "teacher_loss": 0.22261390089988708 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.4760059714317322, "learning_rate": 8.440075177099899e-06, "loss": 0.2849, "step": 1946, "teacher_loss": 0.2637171745300293 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.2203870415687561, "learning_rate": 8.444412317478677e-06, "loss": 0.1845, "step": 1947, "teacher_loss": 0.18046513199806213 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.17986935377120972, "learning_rate": 8.448749457857453e-06, "loss": 0.1808, "step": 1948, "teacher_loss": 0.18089967966079712 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.6490846872329712, "learning_rate": 8.45308659823623e-06, "loss": 0.2337, "step": 1949, "teacher_loss": 0.18756935000419617 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.429772287607193, "learning_rate": 8.457423738615006e-06, "loss": 0.2072, "step": 1950, "teacher_loss": 0.1824311465024948 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.2201623022556305, "learning_rate": 8.461760878993784e-06, "loss": 0.2901, "step": 1951, "teacher_loss": 0.2978992462158203 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.31017157435417175, "learning_rate": 8.466098019372561e-06, "loss": 0.2671, "step": 1952, "teacher_loss": 0.26235055923461914 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.787773847579956, "learning_rate": 8.470435159751337e-06, "loss": 0.2896, "step": 1953, "teacher_loss": 0.23421788215637207 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.3537115454673767, "learning_rate": 8.474772300130113e-06, "loss": 0.2512, "step": 1954, "teacher_loss": 0.23979242146015167 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.2504391372203827, "learning_rate": 8.47910944050889e-06, "loss": 0.25, "step": 1955, "teacher_loss": 0.2500060498714447 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.49551188945770264, "learning_rate": 8.483446580887668e-06, "loss": 0.2184, "step": 1956, "teacher_loss": 0.18759498000144958 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.6004061102867126, "learning_rate": 8.487783721266446e-06, "loss": 0.2364, "step": 1957, "teacher_loss": 0.19599968194961548 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.3101799488067627, "learning_rate": 8.492120861645222e-06, "loss": 0.2937, "step": 1958, "teacher_loss": 0.2918395698070526 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.6094840168952942, "learning_rate": 8.496458002023998e-06, "loss": 0.3062, "step": 1959, "teacher_loss": 0.2724979519844055 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.9341248273849487, "learning_rate": 8.500795142402776e-06, "loss": 0.2208, "step": 1960, "teacher_loss": 0.14158010482788086 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.8260982036590576, "learning_rate": 8.505132282781553e-06, "loss": 0.2861, "step": 1961, "teacher_loss": 0.22607558965682983 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.4312358498573303, "learning_rate": 8.50946942316033e-06, "loss": 0.2948, "step": 1962, "teacher_loss": 0.2796017825603485 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.4646150469779968, "learning_rate": 8.513806563539107e-06, "loss": 0.2254, "step": 1963, "teacher_loss": 0.19886037707328796 }, { "compression_loss": 0.0, "epoch": 0.35, "label_loss": 0.4066265821456909, "learning_rate": 8.518143703917883e-06, "loss": 0.223, "step": 1964, "teacher_loss": 0.20255476236343384 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.3543008863925934, "learning_rate": 8.52248084429666e-06, "loss": 0.1826, "step": 1965, "teacher_loss": 0.1635093241930008 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.273688405752182, "learning_rate": 8.526817984675438e-06, "loss": 0.1896, "step": 1966, "teacher_loss": 0.18022316694259644 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.4326523542404175, "learning_rate": 8.531155125054216e-06, "loss": 0.3011, "step": 1967, "teacher_loss": 0.28644561767578125 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.30893221497535706, "learning_rate": 8.535492265432991e-06, "loss": 0.2587, "step": 1968, "teacher_loss": 0.25310301780700684 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.22304552793502808, "learning_rate": 8.539829405811767e-06, "loss": 0.2132, "step": 1969, "teacher_loss": 0.2121340036392212 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.20896759629249573, "learning_rate": 8.544166546190545e-06, "loss": 0.2105, "step": 1970, "teacher_loss": 0.21067768335342407 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.5303832292556763, "learning_rate": 8.548503686569323e-06, "loss": 0.196, "step": 1971, "teacher_loss": 0.15886105597019196 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.3733620047569275, "learning_rate": 8.552840826948099e-06, "loss": 0.1734, "step": 1972, "teacher_loss": 0.15118274092674255 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.4080936908721924, "learning_rate": 8.557177967326876e-06, "loss": 0.1998, "step": 1973, "teacher_loss": 0.17666301131248474 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.8268827795982361, "learning_rate": 8.561515107705654e-06, "loss": 0.2386, "step": 1974, "teacher_loss": 0.17326869070529938 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.18118637800216675, "learning_rate": 8.56585224808443e-06, "loss": 0.189, "step": 1975, "teacher_loss": 0.18990099430084229 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.7398730516433716, "learning_rate": 8.570189388463206e-06, "loss": 0.272, "step": 1976, "teacher_loss": 0.22005236148834229 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.15803629159927368, "learning_rate": 8.574526528841983e-06, "loss": 0.1875, "step": 1977, "teacher_loss": 0.19074061512947083 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.2643776535987854, "learning_rate": 8.578863669220761e-06, "loss": 0.1901, "step": 1978, "teacher_loss": 0.18189160525798798 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.2945954501628876, "learning_rate": 8.583200809599539e-06, "loss": 0.2082, "step": 1979, "teacher_loss": 0.19857361912727356 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.30913832783699036, "learning_rate": 8.587537949978314e-06, "loss": 0.2251, "step": 1980, "teacher_loss": 0.2157401144504547 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.558434247970581, "learning_rate": 8.59187509035709e-06, "loss": 0.2544, "step": 1981, "teacher_loss": 0.22056470811367035 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.20248591899871826, "learning_rate": 8.596212230735868e-06, "loss": 0.1903, "step": 1982, "teacher_loss": 0.18894410133361816 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.37931349873542786, "learning_rate": 8.600549371114646e-06, "loss": 0.2783, "step": 1983, "teacher_loss": 0.2670256793498993 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.381530225276947, "learning_rate": 8.604886511493423e-06, "loss": 0.1666, "step": 1984, "teacher_loss": 0.14266520738601685 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.31294405460357666, "learning_rate": 8.6092236518722e-06, "loss": 0.2126, "step": 1985, "teacher_loss": 0.20144245028495789 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.318248987197876, "learning_rate": 8.613560792250975e-06, "loss": 0.2728, "step": 1986, "teacher_loss": 0.2677832245826721 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.39165812730789185, "learning_rate": 8.617897932629753e-06, "loss": 0.254, "step": 1987, "teacher_loss": 0.238724485039711 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.3828313946723938, "learning_rate": 8.62223507300853e-06, "loss": 0.2183, "step": 1988, "teacher_loss": 0.20003418624401093 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.5963172912597656, "learning_rate": 8.626572213387308e-06, "loss": 0.3147, "step": 1989, "teacher_loss": 0.28335729241371155 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.18338504433631897, "learning_rate": 8.630909353766084e-06, "loss": 0.174, "step": 1990, "teacher_loss": 0.17292268574237823 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.7385209798812866, "learning_rate": 8.63524649414486e-06, "loss": 0.3269, "step": 1991, "teacher_loss": 0.28118351101875305 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.5545350313186646, "learning_rate": 8.639583634523637e-06, "loss": 0.2669, "step": 1992, "teacher_loss": 0.23492911458015442 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.2846753001213074, "learning_rate": 8.643920774902415e-06, "loss": 0.1898, "step": 1993, "teacher_loss": 0.17925618588924408 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.6343709826469421, "learning_rate": 8.648257915281191e-06, "loss": 0.2883, "step": 1994, "teacher_loss": 0.24989870190620422 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.5843238830566406, "learning_rate": 8.652595055659969e-06, "loss": 0.3305, "step": 1995, "teacher_loss": 0.3022594451904297 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.23642893135547638, "learning_rate": 8.656932196038746e-06, "loss": 0.228, "step": 1996, "teacher_loss": 0.2270338088274002 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.3431861102581024, "learning_rate": 8.661269336417522e-06, "loss": 0.232, "step": 1997, "teacher_loss": 0.2196808159351349 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.41902440786361694, "learning_rate": 8.665606476796298e-06, "loss": 0.2062, "step": 1998, "teacher_loss": 0.18254628777503967 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.25876468420028687, "learning_rate": 8.669943617175076e-06, "loss": 0.2687, "step": 1999, "teacher_loss": 0.2698257267475128 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.7336583137512207, "learning_rate": 8.674280757553853e-06, "loss": 0.3261, "step": 2000, "teacher_loss": 0.2807842493057251 }, { "epoch": 0.36, "eval_exact_match": 79.9526963103122, "eval_f1": 87.32404141032497, "step": 2000 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.588871955871582, "learning_rate": 8.678617897932631e-06, "loss": 0.2493, "step": 2001, "teacher_loss": 0.2115837037563324 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.7952429056167603, "learning_rate": 8.682955038311407e-06, "loss": 0.2277, "step": 2002, "teacher_loss": 0.1646772027015686 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.5486610531806946, "learning_rate": 8.687292178690183e-06, "loss": 0.2259, "step": 2003, "teacher_loss": 0.19000765681266785 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 1.0395715236663818, "learning_rate": 8.69162931906896e-06, "loss": 0.2831, "step": 2004, "teacher_loss": 0.1990422010421753 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.2756597697734833, "learning_rate": 8.695966459447738e-06, "loss": 0.1824, "step": 2005, "teacher_loss": 0.17209115624427795 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.33922529220581055, "learning_rate": 8.700303599826516e-06, "loss": 0.2085, "step": 2006, "teacher_loss": 0.19396401941776276 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.33040666580200195, "learning_rate": 8.704640740205292e-06, "loss": 0.241, "step": 2007, "teacher_loss": 0.2310769110918045 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.3931776285171509, "learning_rate": 8.708977880584068e-06, "loss": 0.2487, "step": 2008, "teacher_loss": 0.23267048597335815 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.2084324061870575, "learning_rate": 8.713315020962845e-06, "loss": 0.2092, "step": 2009, "teacher_loss": 0.20931166410446167 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.5330890417098999, "learning_rate": 8.717652161341623e-06, "loss": 0.2161, "step": 2010, "teacher_loss": 0.18085786700248718 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.288196325302124, "learning_rate": 8.7219893017204e-06, "loss": 0.2688, "step": 2011, "teacher_loss": 0.2666383385658264 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.25869935750961304, "learning_rate": 8.726326442099176e-06, "loss": 0.1881, "step": 2012, "teacher_loss": 0.18026980757713318 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.7456955909729004, "learning_rate": 8.730663582477952e-06, "loss": 0.2768, "step": 2013, "teacher_loss": 0.22473645210266113 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.39600855112075806, "learning_rate": 8.73500072285673e-06, "loss": 0.2457, "step": 2014, "teacher_loss": 0.22903785109519958 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.3784370422363281, "learning_rate": 8.739337863235507e-06, "loss": 0.1831, "step": 2015, "teacher_loss": 0.16134443879127502 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.3339955806732178, "learning_rate": 8.743675003614283e-06, "loss": 0.1901, "step": 2016, "teacher_loss": 0.17406892776489258 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.19792324304580688, "learning_rate": 8.748012143993061e-06, "loss": 0.1823, "step": 2017, "teacher_loss": 0.1805633008480072 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.25825729966163635, "learning_rate": 8.752349284371837e-06, "loss": 0.2273, "step": 2018, "teacher_loss": 0.22387373447418213 }, { "compression_loss": 0.0, "epoch": 0.36, "label_loss": 0.48217809200286865, "learning_rate": 8.756686424750615e-06, "loss": 0.2582, "step": 2019, "teacher_loss": 0.23334653675556183 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.21896736323833466, "learning_rate": 8.761023565129392e-06, "loss": 0.1825, "step": 2020, "teacher_loss": 0.1784682273864746 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.6242147088050842, "learning_rate": 8.765360705508168e-06, "loss": 0.2849, "step": 2021, "teacher_loss": 0.24722039699554443 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.6768083572387695, "learning_rate": 8.769697845886946e-06, "loss": 0.3201, "step": 2022, "teacher_loss": 0.2804555892944336 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.7817792892456055, "learning_rate": 8.774034986265723e-06, "loss": 0.2801, "step": 2023, "teacher_loss": 0.22440074384212494 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.756580114364624, "learning_rate": 8.7783721266445e-06, "loss": 0.2759, "step": 2024, "teacher_loss": 0.2225157916545868 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5330575108528137, "learning_rate": 8.782709267023275e-06, "loss": 0.2556, "step": 2025, "teacher_loss": 0.22478607296943665 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.7903412580490112, "learning_rate": 8.787046407402053e-06, "loss": 0.2729, "step": 2026, "teacher_loss": 0.21535338461399078 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5399750471115112, "learning_rate": 8.79138354778083e-06, "loss": 0.4124, "step": 2027, "teacher_loss": 0.398262083530426 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.2639715075492859, "learning_rate": 8.795720688159608e-06, "loss": 0.1791, "step": 2028, "teacher_loss": 0.1696719527244568 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.2311077117919922, "learning_rate": 8.800057828538382e-06, "loss": 0.1831, "step": 2029, "teacher_loss": 0.17779403924942017 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.4688766300678253, "learning_rate": 8.80439496891716e-06, "loss": 0.2569, "step": 2030, "teacher_loss": 0.2333940714597702 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.2640064060688019, "learning_rate": 8.808732109295938e-06, "loss": 0.1591, "step": 2031, "teacher_loss": 0.14747260510921478 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.6614512801170349, "learning_rate": 8.813069249674715e-06, "loss": 0.2616, "step": 2032, "teacher_loss": 0.21721000969409943 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.3999093174934387, "learning_rate": 8.817406390053493e-06, "loss": 0.1848, "step": 2033, "teacher_loss": 0.16086967289447784 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.7947626113891602, "learning_rate": 8.821743530432269e-06, "loss": 0.301, "step": 2034, "teacher_loss": 0.24614980816841125 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5450666546821594, "learning_rate": 8.826080670811045e-06, "loss": 0.2814, "step": 2035, "teacher_loss": 0.2520662248134613 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5424337387084961, "learning_rate": 8.830417811189822e-06, "loss": 0.3092, "step": 2036, "teacher_loss": 0.2832494378089905 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.6603044271469116, "learning_rate": 8.8347549515686e-06, "loss": 0.4978, "step": 2037, "teacher_loss": 0.47970038652420044 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.370551735162735, "learning_rate": 8.839092091947376e-06, "loss": 0.2222, "step": 2038, "teacher_loss": 0.2057531476020813 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.41036537289619446, "learning_rate": 8.843429232326153e-06, "loss": 0.2172, "step": 2039, "teacher_loss": 0.19574546813964844 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.36894387006759644, "learning_rate": 8.84776637270493e-06, "loss": 0.2151, "step": 2040, "teacher_loss": 0.19801267981529236 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.7365674376487732, "learning_rate": 8.852103513083707e-06, "loss": 0.2903, "step": 2041, "teacher_loss": 0.2407400757074356 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.3448624908924103, "learning_rate": 8.856440653462485e-06, "loss": 0.1811, "step": 2042, "teacher_loss": 0.16288888454437256 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.24986150860786438, "learning_rate": 8.86077779384126e-06, "loss": 0.2289, "step": 2043, "teacher_loss": 0.22653119266033173 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.31847259402275085, "learning_rate": 8.865114934220038e-06, "loss": 0.291, "step": 2044, "teacher_loss": 0.288002610206604 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.49197614192962646, "learning_rate": 8.869452074598816e-06, "loss": 0.2302, "step": 2045, "teacher_loss": 0.20107513666152954 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.702581524848938, "learning_rate": 8.873789214977592e-06, "loss": 0.2921, "step": 2046, "teacher_loss": 0.24647703766822815 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5075963139533997, "learning_rate": 8.878126355356368e-06, "loss": 0.2328, "step": 2047, "teacher_loss": 0.20224273204803467 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.6403321623802185, "learning_rate": 8.882463495735145e-06, "loss": 0.2642, "step": 2048, "teacher_loss": 0.22235816717147827 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.35267117619514465, "learning_rate": 8.886800636113923e-06, "loss": 0.235, "step": 2049, "teacher_loss": 0.2218785583972931 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.12514732778072357, "learning_rate": 8.8911377764927e-06, "loss": 0.1653, "step": 2050, "teacher_loss": 0.1697188913822174 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.3797406852245331, "learning_rate": 8.895474916871475e-06, "loss": 0.2321, "step": 2051, "teacher_loss": 0.21572750806808472 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.6932750940322876, "learning_rate": 8.899812057250252e-06, "loss": 0.342, "step": 2052, "teacher_loss": 0.3029642105102539 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.27024197578430176, "learning_rate": 8.90414919762903e-06, "loss": 0.2503, "step": 2053, "teacher_loss": 0.2481074333190918 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.4812772274017334, "learning_rate": 8.908486338007808e-06, "loss": 0.3006, "step": 2054, "teacher_loss": 0.2804809808731079 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.13784107565879822, "learning_rate": 8.912823478386585e-06, "loss": 0.1788, "step": 2055, "teacher_loss": 0.1833113133907318 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.38942182064056396, "learning_rate": 8.917160618765361e-06, "loss": 0.2152, "step": 2056, "teacher_loss": 0.195870041847229 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.35631370544433594, "learning_rate": 8.921497759144137e-06, "loss": 0.2804, "step": 2057, "teacher_loss": 0.27196890115737915 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.21285969018936157, "learning_rate": 8.925834899522915e-06, "loss": 0.2008, "step": 2058, "teacher_loss": 0.19941532611846924 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.450714111328125, "learning_rate": 8.930172039901692e-06, "loss": 0.2127, "step": 2059, "teacher_loss": 0.18625584244728088 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.3550308346748352, "learning_rate": 8.934509180280468e-06, "loss": 0.2122, "step": 2060, "teacher_loss": 0.19637781381607056 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.3289303779602051, "learning_rate": 8.938846320659246e-06, "loss": 0.2421, "step": 2061, "teacher_loss": 0.23245888948440552 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5442341566085815, "learning_rate": 8.943183461038022e-06, "loss": 0.2404, "step": 2062, "teacher_loss": 0.20665675401687622 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5283471941947937, "learning_rate": 8.9475206014168e-06, "loss": 0.2716, "step": 2063, "teacher_loss": 0.24312719702720642 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.3343338668346405, "learning_rate": 8.951857741795577e-06, "loss": 0.1686, "step": 2064, "teacher_loss": 0.15023337304592133 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.39035654067993164, "learning_rate": 8.956194882174353e-06, "loss": 0.1989, "step": 2065, "teacher_loss": 0.17762787640094757 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.3479209542274475, "learning_rate": 8.96053202255313e-06, "loss": 0.2541, "step": 2066, "teacher_loss": 0.24369065463542938 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.20199933648109436, "learning_rate": 8.964869162931907e-06, "loss": 0.2247, "step": 2067, "teacher_loss": 0.22719204425811768 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5131199955940247, "learning_rate": 8.969206303310684e-06, "loss": 0.2447, "step": 2068, "teacher_loss": 0.2149207890033722 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.37718072533607483, "learning_rate": 8.97354344368946e-06, "loss": 0.2398, "step": 2069, "teacher_loss": 0.224510058760643 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.24448087811470032, "learning_rate": 8.977880584068238e-06, "loss": 0.1891, "step": 2070, "teacher_loss": 0.182974711060524 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.7908239960670471, "learning_rate": 8.982217724447015e-06, "loss": 0.3191, "step": 2071, "teacher_loss": 0.26663511991500854 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5636007189750671, "learning_rate": 8.986554864825793e-06, "loss": 0.2119, "step": 2072, "teacher_loss": 0.172852024435997 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5297435522079468, "learning_rate": 8.990892005204567e-06, "loss": 0.2453, "step": 2073, "teacher_loss": 0.213679701089859 }, { "compression_loss": 0.0, "epoch": 0.37, "label_loss": 0.5025248527526855, "learning_rate": 8.995229145583345e-06, "loss": 0.2351, "step": 2074, "teacher_loss": 0.20539703965187073 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4148525595664978, "learning_rate": 8.999566285962122e-06, "loss": 0.2804, "step": 2075, "teacher_loss": 0.26550984382629395 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.594307005405426, "learning_rate": 9.0039034263409e-06, "loss": 0.2604, "step": 2076, "teacher_loss": 0.22331345081329346 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4579019546508789, "learning_rate": 9.008240566719678e-06, "loss": 0.1722, "step": 2077, "teacher_loss": 0.140457421541214 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.41960811614990234, "learning_rate": 9.012577707098452e-06, "loss": 0.1757, "step": 2078, "teacher_loss": 0.14856143295764923 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.2651267647743225, "learning_rate": 9.01691484747723e-06, "loss": 0.2083, "step": 2079, "teacher_loss": 0.2020222246646881 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.6848872900009155, "learning_rate": 9.021251987856007e-06, "loss": 0.2707, "step": 2080, "teacher_loss": 0.224684938788414 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.7914320230484009, "learning_rate": 9.025589128234785e-06, "loss": 0.2626, "step": 2081, "teacher_loss": 0.2038527876138687 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.296835720539093, "learning_rate": 9.02992626861356e-06, "loss": 0.1991, "step": 2082, "teacher_loss": 0.1882028877735138 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.48113179206848145, "learning_rate": 9.034263408992338e-06, "loss": 0.2101, "step": 2083, "teacher_loss": 0.17998534440994263 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.37829673290252686, "learning_rate": 9.038600549371114e-06, "loss": 0.2522, "step": 2084, "teacher_loss": 0.23822768032550812 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.2498432695865631, "learning_rate": 9.042937689749892e-06, "loss": 0.2035, "step": 2085, "teacher_loss": 0.19834960997104645 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.5646398067474365, "learning_rate": 9.04727483012867e-06, "loss": 0.2589, "step": 2086, "teacher_loss": 0.2249540537595749 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.6695016026496887, "learning_rate": 9.051611970507445e-06, "loss": 0.2765, "step": 2087, "teacher_loss": 0.23279020190238953 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.20475949347019196, "learning_rate": 9.055949110886223e-06, "loss": 0.2163, "step": 2088, "teacher_loss": 0.21759286522865295 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.7687006592750549, "learning_rate": 9.060286251264999e-06, "loss": 0.2746, "step": 2089, "teacher_loss": 0.21971040964126587 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.5942267179489136, "learning_rate": 9.064623391643777e-06, "loss": 0.2956, "step": 2090, "teacher_loss": 0.2624244689941406 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.25686657428741455, "learning_rate": 9.068960532022553e-06, "loss": 0.2123, "step": 2091, "teacher_loss": 0.20739948749542236 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.39781856536865234, "learning_rate": 9.07329767240133e-06, "loss": 0.4133, "step": 2092, "teacher_loss": 0.4150695502758026 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.9219763278961182, "learning_rate": 9.077634812780108e-06, "loss": 0.2123, "step": 2093, "teacher_loss": 0.13346675038337708 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4803623557090759, "learning_rate": 9.081971953158885e-06, "loss": 0.2548, "step": 2094, "teacher_loss": 0.2297501564025879 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.46216902136802673, "learning_rate": 9.08630909353766e-06, "loss": 0.2183, "step": 2095, "teacher_loss": 0.19125859439373016 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.3731536865234375, "learning_rate": 9.090646233916437e-06, "loss": 0.2482, "step": 2096, "teacher_loss": 0.23431020975112915 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.3863866925239563, "learning_rate": 9.094983374295215e-06, "loss": 0.2634, "step": 2097, "teacher_loss": 0.24969086050987244 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.8510243892669678, "learning_rate": 9.099320514673993e-06, "loss": 0.3111, "step": 2098, "teacher_loss": 0.2511082887649536 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4815167188644409, "learning_rate": 9.10365765505277e-06, "loss": 0.253, "step": 2099, "teacher_loss": 0.22756710648536682 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.3032773733139038, "learning_rate": 9.107994795431544e-06, "loss": 0.2387, "step": 2100, "teacher_loss": 0.23156434297561646 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.8672282099723816, "learning_rate": 9.112331935810322e-06, "loss": 0.4178, "step": 2101, "teacher_loss": 0.3678891360759735 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.7124062776565552, "learning_rate": 9.1166690761891e-06, "loss": 0.2583, "step": 2102, "teacher_loss": 0.20782314240932465 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4278835654258728, "learning_rate": 9.121006216567877e-06, "loss": 0.2713, "step": 2103, "teacher_loss": 0.2538614869117737 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.18787714838981628, "learning_rate": 9.125343356946653e-06, "loss": 0.1931, "step": 2104, "teacher_loss": 0.1937095820903778 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4821511507034302, "learning_rate": 9.12968049732543e-06, "loss": 0.291, "step": 2105, "teacher_loss": 0.2697063088417053 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4224971830844879, "learning_rate": 9.134017637704207e-06, "loss": 0.2987, "step": 2106, "teacher_loss": 0.2849319577217102 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4728459119796753, "learning_rate": 9.138354778082984e-06, "loss": 0.3212, "step": 2107, "teacher_loss": 0.30432137846946716 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4405709505081177, "learning_rate": 9.142691918461762e-06, "loss": 0.3043, "step": 2108, "teacher_loss": 0.2891288995742798 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.42375272512435913, "learning_rate": 9.147029058840538e-06, "loss": 0.2275, "step": 2109, "teacher_loss": 0.20569898188114166 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.1682019829750061, "learning_rate": 9.151366199219316e-06, "loss": 0.2193, "step": 2110, "teacher_loss": 0.2249731421470642 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4222942292690277, "learning_rate": 9.155703339598091e-06, "loss": 0.2325, "step": 2111, "teacher_loss": 0.21136754751205444 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4456612467765808, "learning_rate": 9.160040479976869e-06, "loss": 0.3435, "step": 2112, "teacher_loss": 0.332125723361969 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.48074495792388916, "learning_rate": 9.164377620355645e-06, "loss": 0.2321, "step": 2113, "teacher_loss": 0.20442691445350647 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.19290198385715485, "learning_rate": 9.168714760734423e-06, "loss": 0.2297, "step": 2114, "teacher_loss": 0.23380103707313538 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4083178639411926, "learning_rate": 9.1730519011132e-06, "loss": 0.209, "step": 2115, "teacher_loss": 0.18685731291770935 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.47407251596450806, "learning_rate": 9.177389041491976e-06, "loss": 0.3237, "step": 2116, "teacher_loss": 0.30699652433395386 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.26519811153411865, "learning_rate": 9.181726181870752e-06, "loss": 0.1737, "step": 2117, "teacher_loss": 0.16349007189273834 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.37804514169692993, "learning_rate": 9.18606332224953e-06, "loss": 0.2048, "step": 2118, "teacher_loss": 0.18558034300804138 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 1.0541452169418335, "learning_rate": 9.190400462628307e-06, "loss": 0.2747, "step": 2119, "teacher_loss": 0.18807856738567352 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.5357826948165894, "learning_rate": 9.194737603007085e-06, "loss": 0.2789, "step": 2120, "teacher_loss": 0.25034138560295105 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.3532499670982361, "learning_rate": 9.199074743385863e-06, "loss": 0.296, "step": 2121, "teacher_loss": 0.2896440029144287 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.3230995237827301, "learning_rate": 9.203411883764637e-06, "loss": 0.2296, "step": 2122, "teacher_loss": 0.2192222774028778 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.9647735953330994, "learning_rate": 9.207749024143414e-06, "loss": 0.2812, "step": 2123, "teacher_loss": 0.20519641041755676 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4289681911468506, "learning_rate": 9.212086164522192e-06, "loss": 0.2993, "step": 2124, "teacher_loss": 0.28487497568130493 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4380072355270386, "learning_rate": 9.21642330490097e-06, "loss": 0.2316, "step": 2125, "teacher_loss": 0.20871500670909882 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.6278570890426636, "learning_rate": 9.220760445279746e-06, "loss": 0.2618, "step": 2126, "teacher_loss": 0.2211454212665558 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.41738349199295044, "learning_rate": 9.225097585658522e-06, "loss": 0.2602, "step": 2127, "teacher_loss": 0.24276286363601685 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4858693480491638, "learning_rate": 9.2294347260373e-06, "loss": 0.2281, "step": 2128, "teacher_loss": 0.19951000809669495 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.4537910223007202, "learning_rate": 9.233771866416077e-06, "loss": 0.2073, "step": 2129, "teacher_loss": 0.17989768087863922 }, { "compression_loss": 0.0, "epoch": 0.38, "label_loss": 0.23246467113494873, "learning_rate": 9.238109006794854e-06, "loss": 0.2513, "step": 2130, "teacher_loss": 0.25334692001342773 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.4051157236099243, "learning_rate": 9.24244614717363e-06, "loss": 0.2203, "step": 2131, "teacher_loss": 0.199751615524292 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.28404003381729126, "learning_rate": 9.246783287552408e-06, "loss": 0.2217, "step": 2132, "teacher_loss": 0.21472257375717163 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.17972180247306824, "learning_rate": 9.251120427931184e-06, "loss": 0.1692, "step": 2133, "teacher_loss": 0.16801142692565918 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.3178696036338806, "learning_rate": 9.255457568309962e-06, "loss": 0.2196, "step": 2134, "teacher_loss": 0.20871925354003906 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.4049859642982483, "learning_rate": 9.259794708688737e-06, "loss": 0.2821, "step": 2135, "teacher_loss": 0.268497496843338 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.7567098140716553, "learning_rate": 9.264131849067515e-06, "loss": 0.2024, "step": 2136, "teacher_loss": 0.14081138372421265 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.4760478138923645, "learning_rate": 9.268468989446293e-06, "loss": 0.5551, "step": 2137, "teacher_loss": 0.5638923645019531 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.3158319592475891, "learning_rate": 9.272806129825069e-06, "loss": 0.1546, "step": 2138, "teacher_loss": 0.1366778165102005 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.4463692307472229, "learning_rate": 9.277143270203845e-06, "loss": 0.2482, "step": 2139, "teacher_loss": 0.22623416781425476 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.31751370429992676, "learning_rate": 9.281480410582622e-06, "loss": 0.1651, "step": 2140, "teacher_loss": 0.1481410562992096 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.2751868963241577, "learning_rate": 9.2858175509614e-06, "loss": 0.2523, "step": 2141, "teacher_loss": 0.24979335069656372 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.2521328628063202, "learning_rate": 9.290154691340177e-06, "loss": 0.2432, "step": 2142, "teacher_loss": 0.24226175248622894 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.5611305236816406, "learning_rate": 9.294491831718955e-06, "loss": 0.2541, "step": 2143, "teacher_loss": 0.22002173960208893 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 1.1910628080368042, "learning_rate": 9.29882897209773e-06, "loss": 0.3634, "step": 2144, "teacher_loss": 0.27148547768592834 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.5105026364326477, "learning_rate": 9.303166112476507e-06, "loss": 0.2313, "step": 2145, "teacher_loss": 0.20025435090065002 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 1.1379374265670776, "learning_rate": 9.307503252855285e-06, "loss": 0.358, "step": 2146, "teacher_loss": 0.2713821530342102 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.6692630052566528, "learning_rate": 9.311840393234062e-06, "loss": 0.2763, "step": 2147, "teacher_loss": 0.23267696797847748 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.34099072217941284, "learning_rate": 9.316177533612838e-06, "loss": 0.2496, "step": 2148, "teacher_loss": 0.23947863280773163 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.5257954597473145, "learning_rate": 9.320514673991614e-06, "loss": 0.2321, "step": 2149, "teacher_loss": 0.1994883418083191 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.42799150943756104, "learning_rate": 9.324851814370392e-06, "loss": 0.2667, "step": 2150, "teacher_loss": 0.24874289333820343 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.41456642746925354, "learning_rate": 9.32918895474917e-06, "loss": 0.2828, "step": 2151, "teacher_loss": 0.2681078314781189 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.3473452627658844, "learning_rate": 9.333526095127947e-06, "loss": 0.2366, "step": 2152, "teacher_loss": 0.22431805729866028 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.5971699953079224, "learning_rate": 9.337863235506723e-06, "loss": 0.2208, "step": 2153, "teacher_loss": 0.17901864647865295 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.23845408856868744, "learning_rate": 9.3422003758855e-06, "loss": 0.1879, "step": 2154, "teacher_loss": 0.1823098361492157 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.3232029676437378, "learning_rate": 9.346537516264276e-06, "loss": 0.1809, "step": 2155, "teacher_loss": 0.1651291698217392 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.7645639181137085, "learning_rate": 9.350874656643054e-06, "loss": 0.3224, "step": 2156, "teacher_loss": 0.2732837200164795 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.30576765537261963, "learning_rate": 9.35521179702183e-06, "loss": 0.2027, "step": 2157, "teacher_loss": 0.1912125200033188 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.13728633522987366, "learning_rate": 9.359548937400607e-06, "loss": 0.1575, "step": 2158, "teacher_loss": 0.15970264375209808 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.38085559010505676, "learning_rate": 9.363886077779385e-06, "loss": 0.292, "step": 2159, "teacher_loss": 0.28213202953338623 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.15269820392131805, "learning_rate": 9.368223218158161e-06, "loss": 0.1957, "step": 2160, "teacher_loss": 0.20049670338630676 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.8320853114128113, "learning_rate": 9.372560358536939e-06, "loss": 0.3104, "step": 2161, "teacher_loss": 0.2524350881576538 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.4620468318462372, "learning_rate": 9.376897498915715e-06, "loss": 0.3223, "step": 2162, "teacher_loss": 0.3067595362663269 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.5319443345069885, "learning_rate": 9.381234639294492e-06, "loss": 0.3103, "step": 2163, "teacher_loss": 0.2856678366661072 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.6011531949043274, "learning_rate": 9.38557177967327e-06, "loss": 0.2303, "step": 2164, "teacher_loss": 0.189103901386261 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.7799822092056274, "learning_rate": 9.389908920052046e-06, "loss": 0.2928, "step": 2165, "teacher_loss": 0.2386912852525711 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.8331639766693115, "learning_rate": 9.394246060430822e-06, "loss": 0.2792, "step": 2166, "teacher_loss": 0.21768295764923096 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.8568414449691772, "learning_rate": 9.3985832008096e-06, "loss": 0.2889, "step": 2167, "teacher_loss": 0.225747212767601 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.5447222590446472, "learning_rate": 9.402920341188377e-06, "loss": 0.2724, "step": 2168, "teacher_loss": 0.24210651218891144 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.4856337308883667, "learning_rate": 9.407257481567155e-06, "loss": 0.3182, "step": 2169, "teacher_loss": 0.2995622754096985 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.6067987084388733, "learning_rate": 9.41159462194593e-06, "loss": 0.3291, "step": 2170, "teacher_loss": 0.29819437861442566 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.2784542143344879, "learning_rate": 9.415931762324706e-06, "loss": 0.2455, "step": 2171, "teacher_loss": 0.24187399446964264 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.5377089977264404, "learning_rate": 9.420268902703484e-06, "loss": 0.3191, "step": 2172, "teacher_loss": 0.2947670519351959 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.07679194957017899, "learning_rate": 9.424606043082262e-06, "loss": 0.2144, "step": 2173, "teacher_loss": 0.22972631454467773 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.1682363599538803, "learning_rate": 9.42894318346104e-06, "loss": 0.2643, "step": 2174, "teacher_loss": 0.27501484751701355 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.813317060470581, "learning_rate": 9.433280323839815e-06, "loss": 0.3818, "step": 2175, "teacher_loss": 0.3338812291622162 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.2971251606941223, "learning_rate": 9.437617464218591e-06, "loss": 0.2549, "step": 2176, "teacher_loss": 0.2501808702945709 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.7232875823974609, "learning_rate": 9.441954604597369e-06, "loss": 0.3495, "step": 2177, "teacher_loss": 0.30794596672058105 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.3615860044956207, "learning_rate": 9.446291744976146e-06, "loss": 0.2588, "step": 2178, "teacher_loss": 0.24740543961524963 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.5762007832527161, "learning_rate": 9.450628885354922e-06, "loss": 0.3085, "step": 2179, "teacher_loss": 0.2787438631057739 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.28633952140808105, "learning_rate": 9.4549660257337e-06, "loss": 0.257, "step": 2180, "teacher_loss": 0.2537161707878113 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.23427675664424896, "learning_rate": 9.459303166112478e-06, "loss": 0.1641, "step": 2181, "teacher_loss": 0.15630245208740234 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.3803035616874695, "learning_rate": 9.463640306491253e-06, "loss": 0.2538, "step": 2182, "teacher_loss": 0.2397821992635727 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.5253793001174927, "learning_rate": 9.467977446870031e-06, "loss": 0.3326, "step": 2183, "teacher_loss": 0.3111928403377533 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.6766204833984375, "learning_rate": 9.472314587248807e-06, "loss": 0.3705, "step": 2184, "teacher_loss": 0.3365001678466797 }, { "compression_loss": 0.0, "epoch": 0.39, "label_loss": 0.36411163210868835, "learning_rate": 9.476651727627585e-06, "loss": 0.2854, "step": 2185, "teacher_loss": 0.2767032980918884 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.4328285753726959, "learning_rate": 9.480988868006362e-06, "loss": 0.2208, "step": 2186, "teacher_loss": 0.1972917765378952 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.4181402325630188, "learning_rate": 9.485326008385138e-06, "loss": 0.1978, "step": 2187, "teacher_loss": 0.17331382632255554 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.48330575227737427, "learning_rate": 9.489663148763914e-06, "loss": 0.2788, "step": 2188, "teacher_loss": 0.25611740350723267 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.2704477906227112, "learning_rate": 9.494000289142692e-06, "loss": 0.2156, "step": 2189, "teacher_loss": 0.209501713514328 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.8168417811393738, "learning_rate": 9.49833742952147e-06, "loss": 0.3547, "step": 2190, "teacher_loss": 0.3033197224140167 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.7836443781852722, "learning_rate": 9.502674569900247e-06, "loss": 0.2576, "step": 2191, "teacher_loss": 0.19912272691726685 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.3452082574367523, "learning_rate": 9.507011710279023e-06, "loss": 0.2214, "step": 2192, "teacher_loss": 0.20760349929332733 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.7372835874557495, "learning_rate": 9.511348850657799e-06, "loss": 0.2297, "step": 2193, "teacher_loss": 0.17330431938171387 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.23263344168663025, "learning_rate": 9.515685991036576e-06, "loss": 0.2469, "step": 2194, "teacher_loss": 0.24846260249614716 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.30855706334114075, "learning_rate": 9.520023131415354e-06, "loss": 0.1961, "step": 2195, "teacher_loss": 0.18356117606163025 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.3313453197479248, "learning_rate": 9.524360271794132e-06, "loss": 0.2574, "step": 2196, "teacher_loss": 0.24912987649440765 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.3201009929180145, "learning_rate": 9.528697412172908e-06, "loss": 0.2066, "step": 2197, "teacher_loss": 0.19396552443504333 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.5197339653968811, "learning_rate": 9.533034552551684e-06, "loss": 0.2215, "step": 2198, "teacher_loss": 0.18833063542842865 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.3336051106452942, "learning_rate": 9.537371692930461e-06, "loss": 0.3164, "step": 2199, "teacher_loss": 0.31451624631881714 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.2260984182357788, "learning_rate": 9.541708833309239e-06, "loss": 0.1653, "step": 2200, "teacher_loss": 0.15855246782302856 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.40012457966804504, "learning_rate": 9.546045973688015e-06, "loss": 0.2205, "step": 2201, "teacher_loss": 0.20049473643302917 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.4544990658760071, "learning_rate": 9.550383114066792e-06, "loss": 0.2793, "step": 2202, "teacher_loss": 0.25987643003463745 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.5559662580490112, "learning_rate": 9.55472025444557e-06, "loss": 0.2435, "step": 2203, "teacher_loss": 0.20883101224899292 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.4598582983016968, "learning_rate": 9.559057394824346e-06, "loss": 0.2208, "step": 2204, "teacher_loss": 0.1942415088415146 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.32681959867477417, "learning_rate": 9.563394535203124e-06, "loss": 0.2221, "step": 2205, "teacher_loss": 0.21041376888751984 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.49676692485809326, "learning_rate": 9.5677316755819e-06, "loss": 0.2572, "step": 2206, "teacher_loss": 0.23056307435035706 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.0893811360001564, "learning_rate": 9.572068815960677e-06, "loss": 0.1314, "step": 2207, "teacher_loss": 0.13603034615516663 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.14873647689819336, "learning_rate": 9.576405956339455e-06, "loss": 0.1928, "step": 2208, "teacher_loss": 0.1977020800113678 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.5860141515731812, "learning_rate": 9.58074309671823e-06, "loss": 0.3024, "step": 2209, "teacher_loss": 0.27087458968162537 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.20724236965179443, "learning_rate": 9.585080237097007e-06, "loss": 0.2204, "step": 2210, "teacher_loss": 0.22189565002918243 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.26069575548171997, "learning_rate": 9.589417377475784e-06, "loss": 0.2963, "step": 2211, "teacher_loss": 0.3002605438232422 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.5191426873207092, "learning_rate": 9.593754517854562e-06, "loss": 0.2987, "step": 2212, "teacher_loss": 0.2741590738296509 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.3210410475730896, "learning_rate": 9.59809165823334e-06, "loss": 0.3276, "step": 2213, "teacher_loss": 0.32833173871040344 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.4151713252067566, "learning_rate": 9.602428798612115e-06, "loss": 0.1755, "step": 2214, "teacher_loss": 0.14887070655822754 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.48839229345321655, "learning_rate": 9.606765938990891e-06, "loss": 0.2418, "step": 2215, "teacher_loss": 0.21436454355716705 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.45806974172592163, "learning_rate": 9.611103079369669e-06, "loss": 0.2144, "step": 2216, "teacher_loss": 0.18735596537590027 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.14502759277820587, "learning_rate": 9.615440219748447e-06, "loss": 0.222, "step": 2217, "teacher_loss": 0.23054593801498413 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.5173236727714539, "learning_rate": 9.619777360127224e-06, "loss": 0.2951, "step": 2218, "teacher_loss": 0.2704623341560364 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.5228928327560425, "learning_rate": 9.624114500506e-06, "loss": 0.2223, "step": 2219, "teacher_loss": 0.18892785906791687 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.5419129729270935, "learning_rate": 9.628451640884776e-06, "loss": 0.2425, "step": 2220, "teacher_loss": 0.2092212289571762 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.19005830585956573, "learning_rate": 9.632788781263554e-06, "loss": 0.1725, "step": 2221, "teacher_loss": 0.17049893736839294 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.5575596690177917, "learning_rate": 9.637125921642331e-06, "loss": 0.3116, "step": 2222, "teacher_loss": 0.2842558026313782 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.28845590353012085, "learning_rate": 9.641463062021107e-06, "loss": 0.1955, "step": 2223, "teacher_loss": 0.18520215153694153 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.24566662311553955, "learning_rate": 9.645800202399885e-06, "loss": 0.1872, "step": 2224, "teacher_loss": 0.18067467212677002 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.20837059617042542, "learning_rate": 9.65013734277866e-06, "loss": 0.1957, "step": 2225, "teacher_loss": 0.1943013072013855 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.6324210166931152, "learning_rate": 9.654474483157438e-06, "loss": 0.3344, "step": 2226, "teacher_loss": 0.30128300189971924 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.4252736568450928, "learning_rate": 9.658811623536216e-06, "loss": 0.2594, "step": 2227, "teacher_loss": 0.24094724655151367 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.47234320640563965, "learning_rate": 9.663148763914992e-06, "loss": 0.278, "step": 2228, "teacher_loss": 0.2563807964324951 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.1902233362197876, "learning_rate": 9.66748590429377e-06, "loss": 0.2068, "step": 2229, "teacher_loss": 0.20864424109458923 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.6113001108169556, "learning_rate": 9.671823044672547e-06, "loss": 0.2365, "step": 2230, "teacher_loss": 0.1949041187763214 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.504298746585846, "learning_rate": 9.676160185051323e-06, "loss": 0.2368, "step": 2231, "teacher_loss": 0.2070726901292801 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.7816634178161621, "learning_rate": 9.680497325430099e-06, "loss": 0.3517, "step": 2232, "teacher_loss": 0.30388006567955017 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.24324238300323486, "learning_rate": 9.684834465808877e-06, "loss": 0.2083, "step": 2233, "teacher_loss": 0.20443210005760193 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.2713974118232727, "learning_rate": 9.689171606187654e-06, "loss": 0.1972, "step": 2234, "teacher_loss": 0.1889331042766571 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.39800143241882324, "learning_rate": 9.693508746566432e-06, "loss": 0.2472, "step": 2235, "teacher_loss": 0.23048971593379974 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.32909536361694336, "learning_rate": 9.697845886945206e-06, "loss": 0.2349, "step": 2236, "teacher_loss": 0.22447431087493896 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.13109725713729858, "learning_rate": 9.702183027323984e-06, "loss": 0.1523, "step": 2237, "teacher_loss": 0.15464109182357788 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.9299656748771667, "learning_rate": 9.706520167702761e-06, "loss": 0.3251, "step": 2238, "teacher_loss": 0.2579048275947571 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.629909873008728, "learning_rate": 9.710857308081539e-06, "loss": 0.2106, "step": 2239, "teacher_loss": 0.16398456692695618 }, { "compression_loss": 0.0, "epoch": 0.4, "label_loss": 0.8239361047744751, "learning_rate": 9.715194448460317e-06, "loss": 0.2567, "step": 2240, "teacher_loss": 0.1936299204826355 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.463945209980011, "learning_rate": 9.719531588839093e-06, "loss": 0.4089, "step": 2241, "teacher_loss": 0.40281015634536743 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.4012555480003357, "learning_rate": 9.723868729217868e-06, "loss": 0.2647, "step": 2242, "teacher_loss": 0.24955996870994568 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.197137251496315, "learning_rate": 9.728205869596646e-06, "loss": 0.1611, "step": 2243, "teacher_loss": 0.15712295472621918 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.6228808164596558, "learning_rate": 9.732543009975424e-06, "loss": 0.3091, "step": 2244, "teacher_loss": 0.274278849363327 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.5808538198471069, "learning_rate": 9.7368801503542e-06, "loss": 0.313, "step": 2245, "teacher_loss": 0.28319019079208374 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.4658641219139099, "learning_rate": 9.741217290732977e-06, "loss": 0.3236, "step": 2246, "teacher_loss": 0.3077549934387207 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.7658321857452393, "learning_rate": 9.745554431111753e-06, "loss": 0.2756, "step": 2247, "teacher_loss": 0.22112470865249634 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.9507397413253784, "learning_rate": 9.74989157149053e-06, "loss": 0.2563, "step": 2248, "teacher_loss": 0.1791505515575409 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.8397104740142822, "learning_rate": 9.754228711869308e-06, "loss": 0.2558, "step": 2249, "teacher_loss": 0.19092419743537903 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.45005905628204346, "learning_rate": 9.758565852248084e-06, "loss": 0.2413, "step": 2250, "teacher_loss": 0.21810144186019897 }, { "epoch": 0.41, "eval_exact_match": 79.5364238410596, "eval_f1": 87.11622431744448, "step": 2250 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.6465145945549011, "learning_rate": 9.762902992626862e-06, "loss": 0.2887, "step": 2251, "teacher_loss": 0.24898266792297363 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.833666205406189, "learning_rate": 9.76724013300564e-06, "loss": 0.3373, "step": 2252, "teacher_loss": 0.28210878372192383 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.7266898155212402, "learning_rate": 9.771577273384416e-06, "loss": 0.2551, "step": 2253, "teacher_loss": 0.20272672176361084 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.273129403591156, "learning_rate": 9.775914413763191e-06, "loss": 0.1758, "step": 2254, "teacher_loss": 0.16501866281032562 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.9625400304794312, "learning_rate": 9.780251554141969e-06, "loss": 0.3584, "step": 2255, "teacher_loss": 0.29128509759902954 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.6064280271530151, "learning_rate": 9.784588694520747e-06, "loss": 0.2198, "step": 2256, "teacher_loss": 0.17681677639484406 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.716954231262207, "learning_rate": 9.788925834899524e-06, "loss": 0.2849, "step": 2257, "teacher_loss": 0.2369404435157776 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.9100842475891113, "learning_rate": 9.793262975278299e-06, "loss": 0.3052, "step": 2258, "teacher_loss": 0.23797696828842163 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.29342323541641235, "learning_rate": 9.797600115657076e-06, "loss": 0.2229, "step": 2259, "teacher_loss": 0.21510908007621765 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.6890444755554199, "learning_rate": 9.801937256035854e-06, "loss": 0.2278, "step": 2260, "teacher_loss": 0.17649900913238525 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.6942050457000732, "learning_rate": 9.806274396414631e-06, "loss": 0.3363, "step": 2261, "teacher_loss": 0.2965131402015686 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.3415362238883972, "learning_rate": 9.810611536793409e-06, "loss": 0.1996, "step": 2262, "teacher_loss": 0.18387725949287415 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.5318688154220581, "learning_rate": 9.814948677172185e-06, "loss": 0.2522, "step": 2263, "teacher_loss": 0.22111092507839203 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.6075310707092285, "learning_rate": 9.819285817550961e-06, "loss": 0.3005, "step": 2264, "teacher_loss": 0.26643550395965576 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.3764263987541199, "learning_rate": 9.823622957929739e-06, "loss": 0.2415, "step": 2265, "teacher_loss": 0.22651365399360657 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.34776681661605835, "learning_rate": 9.827960098308516e-06, "loss": 0.1894, "step": 2266, "teacher_loss": 0.1718285083770752 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.292323499917984, "learning_rate": 9.832297238687292e-06, "loss": 0.2334, "step": 2267, "teacher_loss": 0.22689521312713623 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.34436583518981934, "learning_rate": 9.83663437906607e-06, "loss": 0.1959, "step": 2268, "teacher_loss": 0.17939028143882751 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.44654616713523865, "learning_rate": 9.840971519444846e-06, "loss": 0.2089, "step": 2269, "teacher_loss": 0.18249574303627014 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.5569769144058228, "learning_rate": 9.845308659823623e-06, "loss": 0.2609, "step": 2270, "teacher_loss": 0.2279849797487259 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.2435835897922516, "learning_rate": 9.849645800202401e-06, "loss": 0.2641, "step": 2271, "teacher_loss": 0.26636117696762085 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.5169653296470642, "learning_rate": 9.853982940581177e-06, "loss": 0.238, "step": 2272, "teacher_loss": 0.20704606175422668 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.769324779510498, "learning_rate": 9.858320080959954e-06, "loss": 0.3828, "step": 2273, "teacher_loss": 0.33986395597457886 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.24644087255001068, "learning_rate": 9.86265722133873e-06, "loss": 0.2132, "step": 2274, "teacher_loss": 0.20946389436721802 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.7297289371490479, "learning_rate": 9.866994361717508e-06, "loss": 0.3247, "step": 2275, "teacher_loss": 0.2797221541404724 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.44911664724349976, "learning_rate": 9.871331502096284e-06, "loss": 0.2705, "step": 2276, "teacher_loss": 0.25062471628189087 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.5729313492774963, "learning_rate": 9.875668642475062e-06, "loss": 0.4894, "step": 2277, "teacher_loss": 0.48017197847366333 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.2987278401851654, "learning_rate": 9.880005782853839e-06, "loss": 0.1885, "step": 2278, "teacher_loss": 0.17620965838432312 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.3366811275482178, "learning_rate": 9.884342923232617e-06, "loss": 0.2544, "step": 2279, "teacher_loss": 0.24530529975891113 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.6210167407989502, "learning_rate": 9.888680063611391e-06, "loss": 0.2479, "step": 2280, "teacher_loss": 0.2064325362443924 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.48328113555908203, "learning_rate": 9.893017203990169e-06, "loss": 0.3367, "step": 2281, "teacher_loss": 0.32045167684555054 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.47786957025527954, "learning_rate": 9.897354344368946e-06, "loss": 0.2611, "step": 2282, "teacher_loss": 0.237041175365448 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.3577849268913269, "learning_rate": 9.901691484747724e-06, "loss": 0.2212, "step": 2283, "teacher_loss": 0.2059764266014099 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.20109067857265472, "learning_rate": 9.906028625126501e-06, "loss": 0.2015, "step": 2284, "teacher_loss": 0.20152871310710907 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.20129795372486115, "learning_rate": 9.910365765505276e-06, "loss": 0.2715, "step": 2285, "teacher_loss": 0.2792593836784363 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.302428275346756, "learning_rate": 9.914702905884053e-06, "loss": 0.1749, "step": 2286, "teacher_loss": 0.16077542304992676 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.48621752858161926, "learning_rate": 9.919040046262831e-06, "loss": 0.2539, "step": 2287, "teacher_loss": 0.22805029153823853 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.5985076427459717, "learning_rate": 9.923377186641609e-06, "loss": 0.2641, "step": 2288, "teacher_loss": 0.22693976759910583 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.49067115783691406, "learning_rate": 9.927714327020385e-06, "loss": 0.2899, "step": 2289, "teacher_loss": 0.2675774097442627 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.6574618816375732, "learning_rate": 9.932051467399162e-06, "loss": 0.2771, "step": 2290, "teacher_loss": 0.23481649160385132 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.5394272804260254, "learning_rate": 9.936388607777938e-06, "loss": 0.2282, "step": 2291, "teacher_loss": 0.1935638040304184 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.2152406871318817, "learning_rate": 9.940725748156716e-06, "loss": 0.1851, "step": 2292, "teacher_loss": 0.18172720074653625 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.34301191568374634, "learning_rate": 9.945062888535493e-06, "loss": 0.2145, "step": 2293, "teacher_loss": 0.20020702481269836 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.3301433324813843, "learning_rate": 9.94940002891427e-06, "loss": 0.2282, "step": 2294, "teacher_loss": 0.21684956550598145 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.45296117663383484, "learning_rate": 9.953737169293047e-06, "loss": 0.289, "step": 2295, "teacher_loss": 0.27076005935668945 }, { "compression_loss": 0.0, "epoch": 0.41, "label_loss": 0.19504909217357635, "learning_rate": 9.958074309671823e-06, "loss": 0.1652, "step": 2296, "teacher_loss": 0.16192524135112762 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.23968197405338287, "learning_rate": 9.9624114500506e-06, "loss": 0.1992, "step": 2297, "teacher_loss": 0.19464752078056335 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.24263378977775574, "learning_rate": 9.966748590429376e-06, "loss": 0.1575, "step": 2298, "teacher_loss": 0.14803577959537506 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.2428988814353943, "learning_rate": 9.971085730808154e-06, "loss": 0.2096, "step": 2299, "teacher_loss": 0.20585303008556366 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.15833738446235657, "learning_rate": 9.975422871186932e-06, "loss": 0.1511, "step": 2300, "teacher_loss": 0.15024270117282867 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.5531819462776184, "learning_rate": 9.97976001156571e-06, "loss": 0.2749, "step": 2301, "teacher_loss": 0.2439463883638382 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.375693678855896, "learning_rate": 9.984097151944483e-06, "loss": 0.1803, "step": 2302, "teacher_loss": 0.15855905413627625 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.4576565623283386, "learning_rate": 9.988434292323261e-06, "loss": 0.2523, "step": 2303, "teacher_loss": 0.22948497533798218 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.43955957889556885, "learning_rate": 9.992771432702039e-06, "loss": 0.211, "step": 2304, "teacher_loss": 0.18558531999588013 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.553405225276947, "learning_rate": 9.997108573080816e-06, "loss": 0.2798, "step": 2305, "teacher_loss": 0.2493765652179718 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.47034305334091187, "learning_rate": 1.0001445713459594e-05, "loss": 0.2776, "step": 2306, "teacher_loss": 0.2562023997306824 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.5617996454238892, "learning_rate": 1.0005782853838368e-05, "loss": 0.3638, "step": 2307, "teacher_loss": 0.3418330252170563 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.6733913421630859, "learning_rate": 1.0010119994217146e-05, "loss": 0.2621, "step": 2308, "teacher_loss": 0.21637627482414246 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.8181602954864502, "learning_rate": 1.0014457134595923e-05, "loss": 0.3101, "step": 2309, "teacher_loss": 0.25363266468048096 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.20042556524276733, "learning_rate": 1.0018794274974701e-05, "loss": 0.228, "step": 2310, "teacher_loss": 0.23111391067504883 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.5272350311279297, "learning_rate": 1.0023131415353477e-05, "loss": 0.2672, "step": 2311, "teacher_loss": 0.23825430870056152 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.5850472450256348, "learning_rate": 1.0027468555732255e-05, "loss": 0.2469, "step": 2312, "teacher_loss": 0.2092863917350769 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.3368449807167053, "learning_rate": 1.003180569611103e-05, "loss": 0.2562, "step": 2313, "teacher_loss": 0.2472047507762909 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.22068515419960022, "learning_rate": 1.0036142836489808e-05, "loss": 0.1698, "step": 2314, "teacher_loss": 0.1641535460948944 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.2691343128681183, "learning_rate": 1.0040479976868586e-05, "loss": 0.1812, "step": 2315, "teacher_loss": 0.17144426703453064 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.18616539239883423, "learning_rate": 1.0044817117247362e-05, "loss": 0.1978, "step": 2316, "teacher_loss": 0.19905070960521698 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.3410555124282837, "learning_rate": 1.004915425762614e-05, "loss": 0.1889, "step": 2317, "teacher_loss": 0.1720198094844818 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.5155552625656128, "learning_rate": 1.0053491398004915e-05, "loss": 0.3477, "step": 2318, "teacher_loss": 0.3290276825428009 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.39196711778640747, "learning_rate": 1.0057828538383693e-05, "loss": 0.3081, "step": 2319, "teacher_loss": 0.29880768060684204 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.6837252378463745, "learning_rate": 1.0062165678762469e-05, "loss": 0.2913, "step": 2320, "teacher_loss": 0.2476527988910675 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.6306057572364807, "learning_rate": 1.0066502819141246e-05, "loss": 0.3729, "step": 2321, "teacher_loss": 0.344268262386322 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.31750744581222534, "learning_rate": 1.0070839959520024e-05, "loss": 0.2029, "step": 2322, "teacher_loss": 0.19020652770996094 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.28623390197753906, "learning_rate": 1.00751770998988e-05, "loss": 0.2095, "step": 2323, "teacher_loss": 0.2009831964969635 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.6174214482307434, "learning_rate": 1.0079514240277578e-05, "loss": 0.26, "step": 2324, "teacher_loss": 0.22029903531074524 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.07627439498901367, "learning_rate": 1.0083851380656353e-05, "loss": 0.1481, "step": 2325, "teacher_loss": 0.15602877736091614 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.3895377814769745, "learning_rate": 1.0088188521035131e-05, "loss": 0.2325, "step": 2326, "teacher_loss": 0.21507856249809265 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.4977247714996338, "learning_rate": 1.0092525661413909e-05, "loss": 0.2148, "step": 2327, "teacher_loss": 0.18338578939437866 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.6924000382423401, "learning_rate": 1.0096862801792686e-05, "loss": 0.3048, "step": 2328, "teacher_loss": 0.2617051601409912 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.7958387136459351, "learning_rate": 1.010119994217146e-05, "loss": 0.3015, "step": 2329, "teacher_loss": 0.24660885334014893 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.51271653175354, "learning_rate": 1.0105537082550238e-05, "loss": 0.2054, "step": 2330, "teacher_loss": 0.17126289010047913 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.3208063840866089, "learning_rate": 1.0109874222929016e-05, "loss": 0.207, "step": 2331, "teacher_loss": 0.19439736008644104 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.6714671850204468, "learning_rate": 1.0114211363307793e-05, "loss": 0.3493, "step": 2332, "teacher_loss": 0.3135136067867279 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.3434569835662842, "learning_rate": 1.011854850368657e-05, "loss": 0.2983, "step": 2333, "teacher_loss": 0.2933364510536194 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.34067946672439575, "learning_rate": 1.0122885644065345e-05, "loss": 0.1844, "step": 2334, "teacher_loss": 0.16701556742191315 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.5197765827178955, "learning_rate": 1.0127222784444123e-05, "loss": 0.2599, "step": 2335, "teacher_loss": 0.23100724816322327 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.42884141206741333, "learning_rate": 1.01315599248229e-05, "loss": 0.2935, "step": 2336, "teacher_loss": 0.2784738540649414 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.803558349609375, "learning_rate": 1.0135897065201678e-05, "loss": 0.2865, "step": 2337, "teacher_loss": 0.22906732559204102 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.7363027334213257, "learning_rate": 1.0140234205580454e-05, "loss": 0.2867, "step": 2338, "teacher_loss": 0.23679344356060028 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.1508190929889679, "learning_rate": 1.0144571345959232e-05, "loss": 0.1517, "step": 2339, "teacher_loss": 0.15184976160526276 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.3046949803829193, "learning_rate": 1.0148908486338008e-05, "loss": 0.1679, "step": 2340, "teacher_loss": 0.1527009904384613 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.15535122156143188, "learning_rate": 1.0153245626716785e-05, "loss": 0.1496, "step": 2341, "teacher_loss": 0.14897367358207703 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.27468621730804443, "learning_rate": 1.0157582767095561e-05, "loss": 0.1957, "step": 2342, "teacher_loss": 0.1869387924671173 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.3823353350162506, "learning_rate": 1.0161919907474339e-05, "loss": 0.2366, "step": 2343, "teacher_loss": 0.22037062048912048 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.33034995198249817, "learning_rate": 1.0166257047853116e-05, "loss": 0.1946, "step": 2344, "teacher_loss": 0.17950767278671265 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.8185216188430786, "learning_rate": 1.0170594188231892e-05, "loss": 0.3126, "step": 2345, "teacher_loss": 0.25640130043029785 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 1.0245083570480347, "learning_rate": 1.017493132861067e-05, "loss": 0.3125, "step": 2346, "teacher_loss": 0.23335415124893188 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.3670728802680969, "learning_rate": 1.0179268468989446e-05, "loss": 0.2592, "step": 2347, "teacher_loss": 0.24723802506923676 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.5001082420349121, "learning_rate": 1.0183605609368224e-05, "loss": 0.2641, "step": 2348, "teacher_loss": 0.23785510659217834 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.7036440372467041, "learning_rate": 1.0187942749747001e-05, "loss": 0.2698, "step": 2349, "teacher_loss": 0.22159487009048462 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.3120863437652588, "learning_rate": 1.0192279890125779e-05, "loss": 0.3208, "step": 2350, "teacher_loss": 0.32175183296203613 }, { "compression_loss": 0.0, "epoch": 0.42, "label_loss": 0.319937139749527, "learning_rate": 1.0196617030504553e-05, "loss": 0.3222, "step": 2351, "teacher_loss": 0.32242417335510254 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.42182475328445435, "learning_rate": 1.020095417088333e-05, "loss": 0.1954, "step": 2352, "teacher_loss": 0.17026779055595398 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.9801952838897705, "learning_rate": 1.0205291311262108e-05, "loss": 0.3076, "step": 2353, "teacher_loss": 0.2328852415084839 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.16557687520980835, "learning_rate": 1.0209628451640886e-05, "loss": 0.1648, "step": 2354, "teacher_loss": 0.1647290587425232 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.5737104415893555, "learning_rate": 1.0213965592019662e-05, "loss": 0.2421, "step": 2355, "teacher_loss": 0.20520544052124023 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.25342780351638794, "learning_rate": 1.0218302732398438e-05, "loss": 0.2138, "step": 2356, "teacher_loss": 0.2093610316514969 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.5346795916557312, "learning_rate": 1.0222639872777215e-05, "loss": 0.573, "step": 2357, "teacher_loss": 0.5773087739944458 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.307625412940979, "learning_rate": 1.0226977013155993e-05, "loss": 0.2187, "step": 2358, "teacher_loss": 0.20886112749576569 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.6142769455909729, "learning_rate": 1.023131415353477e-05, "loss": 0.4483, "step": 2359, "teacher_loss": 0.429845929145813 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.629026472568512, "learning_rate": 1.0235651293913547e-05, "loss": 0.2345, "step": 2360, "teacher_loss": 0.19061236083507538 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.45466238260269165, "learning_rate": 1.0239988434292324e-05, "loss": 0.2111, "step": 2361, "teacher_loss": 0.18409281969070435 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.3808279037475586, "learning_rate": 1.02443255746711e-05, "loss": 0.2319, "step": 2362, "teacher_loss": 0.21537208557128906 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.44422000646591187, "learning_rate": 1.0248662715049878e-05, "loss": 0.2135, "step": 2363, "teacher_loss": 0.18791040778160095 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.2515581250190735, "learning_rate": 1.0252999855428654e-05, "loss": 0.1596, "step": 2364, "teacher_loss": 0.1494198888540268 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.2316003441810608, "learning_rate": 1.0257336995807431e-05, "loss": 0.2103, "step": 2365, "teacher_loss": 0.20790261030197144 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.2647305727005005, "learning_rate": 1.0261674136186209e-05, "loss": 0.2122, "step": 2366, "teacher_loss": 0.20640867948532104 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.7372491359710693, "learning_rate": 1.0266011276564985e-05, "loss": 0.2394, "step": 2367, "teacher_loss": 0.18407300114631653 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.41440093517303467, "learning_rate": 1.0270348416943762e-05, "loss": 0.2798, "step": 2368, "teacher_loss": 0.2648549973964691 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.17522266507148743, "learning_rate": 1.0274685557322538e-05, "loss": 0.1932, "step": 2369, "teacher_loss": 0.19521096348762512 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 1.035237431526184, "learning_rate": 1.0279022697701316e-05, "loss": 0.3951, "step": 2370, "teacher_loss": 0.3239811062812805 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.45606887340545654, "learning_rate": 1.0283359838080094e-05, "loss": 0.3314, "step": 2371, "teacher_loss": 0.3175421357154846 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.47557497024536133, "learning_rate": 1.028769697845887e-05, "loss": 0.2263, "step": 2372, "teacher_loss": 0.19856590032577515 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.16883325576782227, "learning_rate": 1.0292034118837645e-05, "loss": 0.2022, "step": 2373, "teacher_loss": 0.2058539092540741 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.42074912786483765, "learning_rate": 1.0296371259216423e-05, "loss": 0.4154, "step": 2374, "teacher_loss": 0.41480326652526855 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.5496131181716919, "learning_rate": 1.03007083995952e-05, "loss": 0.2345, "step": 2375, "teacher_loss": 0.1994635909795761 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.1385919153690338, "learning_rate": 1.0305045539973978e-05, "loss": 0.232, "step": 2376, "teacher_loss": 0.24233722686767578 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.4555704593658447, "learning_rate": 1.0309382680352754e-05, "loss": 0.2456, "step": 2377, "teacher_loss": 0.22230252623558044 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.5253335237503052, "learning_rate": 1.031371982073153e-05, "loss": 0.2521, "step": 2378, "teacher_loss": 0.22170299291610718 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.6658607721328735, "learning_rate": 1.0318056961110308e-05, "loss": 0.3161, "step": 2379, "teacher_loss": 0.2772500514984131 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.3261749744415283, "learning_rate": 1.0322394101489085e-05, "loss": 0.2044, "step": 2380, "teacher_loss": 0.1908426135778427 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.29901862144470215, "learning_rate": 1.0326731241867863e-05, "loss": 0.2229, "step": 2381, "teacher_loss": 0.2143876701593399 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.6709582805633545, "learning_rate": 1.0331068382246639e-05, "loss": 0.2748, "step": 2382, "teacher_loss": 0.2308310568332672 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.9853832721710205, "learning_rate": 1.0335405522625415e-05, "loss": 0.2763, "step": 2383, "teacher_loss": 0.19752341508865356 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.4647062420845032, "learning_rate": 1.0339742663004193e-05, "loss": 0.255, "step": 2384, "teacher_loss": 0.23172365128993988 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 1.1897125244140625, "learning_rate": 1.034407980338297e-05, "loss": 0.6817, "step": 2385, "teacher_loss": 0.6252492666244507 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.7053435444831848, "learning_rate": 1.0348416943761746e-05, "loss": 0.2584, "step": 2386, "teacher_loss": 0.20875926315784454 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.2810845971107483, "learning_rate": 1.0352754084140524e-05, "loss": 0.1777, "step": 2387, "teacher_loss": 0.1661744862794876 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.7917479872703552, "learning_rate": 1.0357091224519301e-05, "loss": 0.2732, "step": 2388, "teacher_loss": 0.21561607718467712 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.3023416996002197, "learning_rate": 1.0361428364898077e-05, "loss": 0.1887, "step": 2389, "teacher_loss": 0.17606337368488312 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.22276663780212402, "learning_rate": 1.0365765505276855e-05, "loss": 0.1666, "step": 2390, "teacher_loss": 0.16031357645988464 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.3278570771217346, "learning_rate": 1.037010264565563e-05, "loss": 0.1639, "step": 2391, "teacher_loss": 0.14565476775169373 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 1.2403373718261719, "learning_rate": 1.0374439786034408e-05, "loss": 0.3944, "step": 2392, "teacher_loss": 0.30045682191848755 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.3178249001502991, "learning_rate": 1.0378776926413186e-05, "loss": 0.1812, "step": 2393, "teacher_loss": 0.1659734845161438 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.16195733845233917, "learning_rate": 1.0383114066791962e-05, "loss": 0.2266, "step": 2394, "teacher_loss": 0.23380282521247864 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.3827659487724304, "learning_rate": 1.0387451207170738e-05, "loss": 0.2067, "step": 2395, "teacher_loss": 0.1870955228805542 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.5561103224754333, "learning_rate": 1.0391788347549516e-05, "loss": 0.2373, "step": 2396, "teacher_loss": 0.20193053781986237 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.19197705388069153, "learning_rate": 1.0396125487928293e-05, "loss": 0.2192, "step": 2397, "teacher_loss": 0.22227761149406433 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.8447026014328003, "learning_rate": 1.040046262830707e-05, "loss": 0.2801, "step": 2398, "teacher_loss": 0.21741461753845215 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.24189075827598572, "learning_rate": 1.0404799768685847e-05, "loss": 0.2338, "step": 2399, "teacher_loss": 0.23292356729507446 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.40745067596435547, "learning_rate": 1.0409136909064623e-05, "loss": 0.1841, "step": 2400, "teacher_loss": 0.159266859292984 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.3060387372970581, "learning_rate": 1.04134740494434e-05, "loss": 0.1903, "step": 2401, "teacher_loss": 0.17744530737400055 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.2830800414085388, "learning_rate": 1.0417811189822178e-05, "loss": 0.2044, "step": 2402, "teacher_loss": 0.19561263918876648 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.34571605920791626, "learning_rate": 1.0422148330200956e-05, "loss": 0.3041, "step": 2403, "teacher_loss": 0.2994650602340698 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 1.0207722187042236, "learning_rate": 1.0426485470579731e-05, "loss": 0.3004, "step": 2404, "teacher_loss": 0.2203269749879837 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.49686408042907715, "learning_rate": 1.0430822610958507e-05, "loss": 0.2912, "step": 2405, "teacher_loss": 0.2683042883872986 }, { "compression_loss": 0.0, "epoch": 0.43, "label_loss": 0.633056640625, "learning_rate": 1.0435159751337285e-05, "loss": 0.2433, "step": 2406, "teacher_loss": 0.19998416304588318 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.9559853076934814, "learning_rate": 1.0439496891716063e-05, "loss": 0.2451, "step": 2407, "teacher_loss": 0.16610883176326752 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.28321364521980286, "learning_rate": 1.0443834032094839e-05, "loss": 0.19, "step": 2408, "teacher_loss": 0.17967134714126587 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.2944931387901306, "learning_rate": 1.0448171172473616e-05, "loss": 0.2064, "step": 2409, "teacher_loss": 0.19664905965328217 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.3292686641216278, "learning_rate": 1.0452508312852394e-05, "loss": 0.2291, "step": 2410, "teacher_loss": 0.21791525185108185 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.3860967755317688, "learning_rate": 1.045684545323117e-05, "loss": 0.2801, "step": 2411, "teacher_loss": 0.26829662919044495 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.3558676242828369, "learning_rate": 1.0461182593609947e-05, "loss": 0.2191, "step": 2412, "teacher_loss": 0.20393508672714233 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.4674742817878723, "learning_rate": 1.0465519733988723e-05, "loss": 0.1898, "step": 2413, "teacher_loss": 0.15898266434669495 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.9602545499801636, "learning_rate": 1.0469856874367501e-05, "loss": 0.3149, "step": 2414, "teacher_loss": 0.243166983127594 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.5890560150146484, "learning_rate": 1.0474194014746278e-05, "loss": 0.2299, "step": 2415, "teacher_loss": 0.19004853069782257 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.2755882143974304, "learning_rate": 1.0478531155125054e-05, "loss": 0.2126, "step": 2416, "teacher_loss": 0.20558002591133118 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.39748287200927734, "learning_rate": 1.048286829550383e-05, "loss": 0.2097, "step": 2417, "teacher_loss": 0.1887979656457901 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.5701079964637756, "learning_rate": 1.0487205435882608e-05, "loss": 0.2016, "step": 2418, "teacher_loss": 0.1606331169605255 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.3085557818412781, "learning_rate": 1.0491542576261386e-05, "loss": 0.2555, "step": 2419, "teacher_loss": 0.249592125415802 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.39123088121414185, "learning_rate": 1.0495879716640163e-05, "loss": 0.3359, "step": 2420, "teacher_loss": 0.32972100377082825 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.349899023771286, "learning_rate": 1.0500216857018937e-05, "loss": 0.1667, "step": 2421, "teacher_loss": 0.14635542035102844 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.3001956343650818, "learning_rate": 1.0504553997397715e-05, "loss": 0.1906, "step": 2422, "teacher_loss": 0.178460955619812 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.7973374128341675, "learning_rate": 1.0508891137776493e-05, "loss": 0.275, "step": 2423, "teacher_loss": 0.21696211397647858 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.4588952660560608, "learning_rate": 1.051322827815527e-05, "loss": 0.2801, "step": 2424, "teacher_loss": 0.2601942718029022 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 1.0183534622192383, "learning_rate": 1.0517565418534048e-05, "loss": 0.2498, "step": 2425, "teacher_loss": 0.16445884108543396 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.8354297280311584, "learning_rate": 1.0521902558912824e-05, "loss": 0.2826, "step": 2426, "teacher_loss": 0.22111913561820984 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.2762755751609802, "learning_rate": 1.05262396992916e-05, "loss": 0.3158, "step": 2427, "teacher_loss": 0.32019728422164917 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.22240827977657318, "learning_rate": 1.0530576839670377e-05, "loss": 0.2396, "step": 2428, "teacher_loss": 0.2414652407169342 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.2177400290966034, "learning_rate": 1.0534913980049155e-05, "loss": 0.1792, "step": 2429, "teacher_loss": 0.17497298121452332 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.5535277128219604, "learning_rate": 1.0539251120427931e-05, "loss": 0.3143, "step": 2430, "teacher_loss": 0.28777188062667847 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.41109567880630493, "learning_rate": 1.0543588260806709e-05, "loss": 0.2533, "step": 2431, "teacher_loss": 0.23573818802833557 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.357093870639801, "learning_rate": 1.0547925401185485e-05, "loss": 0.2618, "step": 2432, "teacher_loss": 0.25119879841804504 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.5321446657180786, "learning_rate": 1.0552262541564262e-05, "loss": 0.2393, "step": 2433, "teacher_loss": 0.20674863457679749 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.48428842425346375, "learning_rate": 1.055659968194304e-05, "loss": 0.2548, "step": 2434, "teacher_loss": 0.22925138473510742 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.5370008945465088, "learning_rate": 1.0560936822321816e-05, "loss": 0.2635, "step": 2435, "teacher_loss": 0.23310929536819458 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.18650224804878235, "learning_rate": 1.0565273962700593e-05, "loss": 0.1878, "step": 2436, "teacher_loss": 0.1879117339849472 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.5419920682907104, "learning_rate": 1.0569611103079371e-05, "loss": 0.2414, "step": 2437, "teacher_loss": 0.20802277326583862 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.33632010221481323, "learning_rate": 1.0573948243458147e-05, "loss": 0.1999, "step": 2438, "teacher_loss": 0.18478095531463623 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.21605004370212555, "learning_rate": 1.0578285383836923e-05, "loss": 0.2109, "step": 2439, "teacher_loss": 0.2103685438632965 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.31071263551712036, "learning_rate": 1.05826225242157e-05, "loss": 0.2232, "step": 2440, "teacher_loss": 0.2135113924741745 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.30834901332855225, "learning_rate": 1.0586959664594478e-05, "loss": 0.2553, "step": 2441, "teacher_loss": 0.24945297837257385 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.45771241188049316, "learning_rate": 1.0591296804973256e-05, "loss": 0.2743, "step": 2442, "teacher_loss": 0.25395581126213074 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.4475412964820862, "learning_rate": 1.059563394535203e-05, "loss": 0.2855, "step": 2443, "teacher_loss": 0.2674804627895355 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.47804325819015503, "learning_rate": 1.0599971085730808e-05, "loss": 0.2948, "step": 2444, "teacher_loss": 0.27439579367637634 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.1676841676235199, "learning_rate": 1.0604308226109585e-05, "loss": 0.189, "step": 2445, "teacher_loss": 0.1913500279188156 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.6251732707023621, "learning_rate": 1.0608645366488363e-05, "loss": 0.266, "step": 2446, "teacher_loss": 0.22612860798835754 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.5512125492095947, "learning_rate": 1.061298250686714e-05, "loss": 0.2804, "step": 2447, "teacher_loss": 0.2502940595149994 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.6812999844551086, "learning_rate": 1.0617319647245916e-05, "loss": 0.254, "step": 2448, "teacher_loss": 0.20654049515724182 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.45413699746131897, "learning_rate": 1.0621656787624692e-05, "loss": 0.2726, "step": 2449, "teacher_loss": 0.2523787021636963 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.7652422189712524, "learning_rate": 1.062599392800347e-05, "loss": 0.2743, "step": 2450, "teacher_loss": 0.2197152078151703 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.8214482069015503, "learning_rate": 1.0630331068382247e-05, "loss": 0.4236, "step": 2451, "teacher_loss": 0.3794212341308594 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.7018332481384277, "learning_rate": 1.0634668208761023e-05, "loss": 0.2579, "step": 2452, "teacher_loss": 0.20857229828834534 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.258870005607605, "learning_rate": 1.0639005349139801e-05, "loss": 0.2538, "step": 2453, "teacher_loss": 0.2532517910003662 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.723617434501648, "learning_rate": 1.0643342489518577e-05, "loss": 0.2927, "step": 2454, "teacher_loss": 0.24477502703666687 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.32321298122406006, "learning_rate": 1.0647679629897355e-05, "loss": 0.2652, "step": 2455, "teacher_loss": 0.2587481737136841 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.47916722297668457, "learning_rate": 1.0652016770276132e-05, "loss": 0.3125, "step": 2456, "teacher_loss": 0.2940249443054199 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.3623248338699341, "learning_rate": 1.0656353910654908e-05, "loss": 0.2185, "step": 2457, "teacher_loss": 0.20257121324539185 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.49092942476272583, "learning_rate": 1.0660691051033686e-05, "loss": 0.2399, "step": 2458, "teacher_loss": 0.21204935014247894 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.5602243542671204, "learning_rate": 1.0665028191412463e-05, "loss": 0.2323, "step": 2459, "teacher_loss": 0.19585971534252167 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.7578895092010498, "learning_rate": 1.066936533179124e-05, "loss": 0.3163, "step": 2460, "teacher_loss": 0.2672047019004822 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.2102113962173462, "learning_rate": 1.0673702472170015e-05, "loss": 0.1865, "step": 2461, "teacher_loss": 0.183834508061409 }, { "compression_loss": 0.0, "epoch": 0.44, "label_loss": 0.28813832998275757, "learning_rate": 1.0678039612548793e-05, "loss": 0.2235, "step": 2462, "teacher_loss": 0.2163233608007431 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.6058992147445679, "learning_rate": 1.068237675292757e-05, "loss": 0.2699, "step": 2463, "teacher_loss": 0.232540100812912 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.6213823556900024, "learning_rate": 1.0686713893306348e-05, "loss": 0.2252, "step": 2464, "teacher_loss": 0.18116098642349243 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.46115273237228394, "learning_rate": 1.0691051033685124e-05, "loss": 0.2202, "step": 2465, "teacher_loss": 0.19341117143630981 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.4507609009742737, "learning_rate": 1.06953881740639e-05, "loss": 0.2426, "step": 2466, "teacher_loss": 0.21950627863407135 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.7430602312088013, "learning_rate": 1.0699725314442678e-05, "loss": 0.3182, "step": 2467, "teacher_loss": 0.27104824781417847 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.43340665102005005, "learning_rate": 1.0704062454821455e-05, "loss": 0.217, "step": 2468, "teacher_loss": 0.19291651248931885 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.7246847152709961, "learning_rate": 1.0708399595200233e-05, "loss": 0.3134, "step": 2469, "teacher_loss": 0.26768508553504944 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.41216230392456055, "learning_rate": 1.0712736735579009e-05, "loss": 0.2966, "step": 2470, "teacher_loss": 0.28374308347702026 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.28492599725723267, "learning_rate": 1.0717073875957785e-05, "loss": 0.156, "step": 2471, "teacher_loss": 0.14169326424598694 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.5045020580291748, "learning_rate": 1.0721411016336562e-05, "loss": 0.2281, "step": 2472, "teacher_loss": 0.19744394719600677 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.18070977926254272, "learning_rate": 1.072574815671534e-05, "loss": 0.1358, "step": 2473, "teacher_loss": 0.13079451024532318 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.5173914432525635, "learning_rate": 1.0730085297094116e-05, "loss": 0.2517, "step": 2474, "teacher_loss": 0.22215279936790466 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.3792479634284973, "learning_rate": 1.0734422437472893e-05, "loss": 0.3118, "step": 2475, "teacher_loss": 0.3043276369571686 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.2878420054912567, "learning_rate": 1.073875957785167e-05, "loss": 0.2172, "step": 2476, "teacher_loss": 0.2093481570482254 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.9661229848861694, "learning_rate": 1.0743096718230447e-05, "loss": 0.3143, "step": 2477, "teacher_loss": 0.24186795949935913 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.5042422413825989, "learning_rate": 1.0747433858609225e-05, "loss": 0.2528, "step": 2478, "teacher_loss": 0.22481946647167206 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.26766276359558105, "learning_rate": 1.0751770998988e-05, "loss": 0.2115, "step": 2479, "teacher_loss": 0.20520789921283722 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.1566394567489624, "learning_rate": 1.0756108139366778e-05, "loss": 0.1784, "step": 2480, "teacher_loss": 0.18076878786087036 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.19514954090118408, "learning_rate": 1.0760445279745554e-05, "loss": 0.1976, "step": 2481, "teacher_loss": 0.19785045087337494 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.6062769889831543, "learning_rate": 1.0764782420124332e-05, "loss": 0.2444, "step": 2482, "teacher_loss": 0.20415878295898438 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.5608301758766174, "learning_rate": 1.0769119560503108e-05, "loss": 0.4011, "step": 2483, "teacher_loss": 0.3833540081977844 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.34917008876800537, "learning_rate": 1.0773456700881885e-05, "loss": 0.2107, "step": 2484, "teacher_loss": 0.1952686607837677 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.4104021191596985, "learning_rate": 1.0777793841260663e-05, "loss": 0.305, "step": 2485, "teacher_loss": 0.2932960093021393 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.3379254937171936, "learning_rate": 1.078213098163944e-05, "loss": 0.2786, "step": 2486, "teacher_loss": 0.271992564201355 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.5497634410858154, "learning_rate": 1.0786468122018216e-05, "loss": 0.3201, "step": 2487, "teacher_loss": 0.2945822477340698 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.2738702893257141, "learning_rate": 1.0790805262396992e-05, "loss": 0.1955, "step": 2488, "teacher_loss": 0.18674317002296448 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.4531628489494324, "learning_rate": 1.079514240277577e-05, "loss": 0.3489, "step": 2489, "teacher_loss": 0.33734965324401855 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.22863608598709106, "learning_rate": 1.0799479543154548e-05, "loss": 0.1606, "step": 2490, "teacher_loss": 0.1529940664768219 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.4543974697589874, "learning_rate": 1.0803816683533325e-05, "loss": 0.2489, "step": 2491, "teacher_loss": 0.22604787349700928 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.45465630292892456, "learning_rate": 1.08081538239121e-05, "loss": 0.2871, "step": 2492, "teacher_loss": 0.26848796010017395 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.19923967123031616, "learning_rate": 1.0812490964290877e-05, "loss": 0.215, "step": 2493, "teacher_loss": 0.21674197912216187 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.6403992176055908, "learning_rate": 1.0816828104669655e-05, "loss": 0.2242, "step": 2494, "teacher_loss": 0.17793838679790497 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.2759147882461548, "learning_rate": 1.0821165245048432e-05, "loss": 0.1929, "step": 2495, "teacher_loss": 0.18373069167137146 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.5355908870697021, "learning_rate": 1.0825502385427208e-05, "loss": 0.2315, "step": 2496, "teacher_loss": 0.19766712188720703 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.38285648822784424, "learning_rate": 1.0829839525805986e-05, "loss": 0.2511, "step": 2497, "teacher_loss": 0.23642978072166443 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 1.0085573196411133, "learning_rate": 1.0834176666184762e-05, "loss": 0.3613, "step": 2498, "teacher_loss": 0.2893427610397339 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.2714253067970276, "learning_rate": 1.083851380656354e-05, "loss": 0.1885, "step": 2499, "teacher_loss": 0.17929911613464355 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.4236676096916199, "learning_rate": 1.0842850946942317e-05, "loss": 0.1919, "step": 2500, "teacher_loss": 0.16618189215660095 }, { "epoch": 0.45, "eval_exact_match": 79.96215704824976, "eval_f1": 87.18487043071096, "step": 2500 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.3454571068286896, "learning_rate": 1.0847188087321093e-05, "loss": 0.1868, "step": 2501, "teacher_loss": 0.16914451122283936 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.8027348518371582, "learning_rate": 1.085152522769987e-05, "loss": 0.3536, "step": 2502, "teacher_loss": 0.3037194013595581 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.28169554471969604, "learning_rate": 1.0855862368078647e-05, "loss": 0.1936, "step": 2503, "teacher_loss": 0.18384036421775818 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.5043920874595642, "learning_rate": 1.0860199508457424e-05, "loss": 0.2612, "step": 2504, "teacher_loss": 0.23421043157577515 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.7622601985931396, "learning_rate": 1.08645366488362e-05, "loss": 0.2907, "step": 2505, "teacher_loss": 0.2383057177066803 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.3897848129272461, "learning_rate": 1.0868873789214978e-05, "loss": 0.2278, "step": 2506, "teacher_loss": 0.2097935527563095 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.2639317810535431, "learning_rate": 1.0873210929593755e-05, "loss": 0.2033, "step": 2507, "teacher_loss": 0.19652585685253143 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.21258127689361572, "learning_rate": 1.0877548069972533e-05, "loss": 0.1947, "step": 2508, "teacher_loss": 0.19266745448112488 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.2877371311187744, "learning_rate": 1.0881885210351309e-05, "loss": 0.2368, "step": 2509, "teacher_loss": 0.23108959197998047 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.6338034272193909, "learning_rate": 1.0886222350730085e-05, "loss": 0.3323, "step": 2510, "teacher_loss": 0.2987942099571228 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.1302526593208313, "learning_rate": 1.0890559491108862e-05, "loss": 0.1784, "step": 2511, "teacher_loss": 0.18375495076179504 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 1.029805064201355, "learning_rate": 1.089489663148764e-05, "loss": 0.3266, "step": 2512, "teacher_loss": 0.24846431612968445 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.3524097204208374, "learning_rate": 1.0899233771866418e-05, "loss": 0.1903, "step": 2513, "teacher_loss": 0.17223826050758362 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.485371470451355, "learning_rate": 1.0903570912245192e-05, "loss": 0.2336, "step": 2514, "teacher_loss": 0.2055875062942505 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.16797083616256714, "learning_rate": 1.090790805262397e-05, "loss": 0.1802, "step": 2515, "teacher_loss": 0.18154458701610565 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.32138270139694214, "learning_rate": 1.0912245193002747e-05, "loss": 0.2608, "step": 2516, "teacher_loss": 0.254066526889801 }, { "compression_loss": 0.0, "epoch": 0.45, "label_loss": 0.3390832543373108, "learning_rate": 1.0916582333381525e-05, "loss": 0.1711, "step": 2517, "teacher_loss": 0.15241624414920807 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.6732155084609985, "learning_rate": 1.09209194737603e-05, "loss": 0.2799, "step": 2518, "teacher_loss": 0.23619751632213593 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.44451266527175903, "learning_rate": 1.0925256614139078e-05, "loss": 0.2279, "step": 2519, "teacher_loss": 0.20377777516841888 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.18079423904418945, "learning_rate": 1.0929593754517854e-05, "loss": 0.2439, "step": 2520, "teacher_loss": 0.25092098116874695 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.3356236219406128, "learning_rate": 1.0933930894896632e-05, "loss": 0.2946, "step": 2521, "teacher_loss": 0.29003196954727173 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.6910123825073242, "learning_rate": 1.093826803527541e-05, "loss": 0.4239, "step": 2522, "teacher_loss": 0.3942108750343323 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.5024522542953491, "learning_rate": 1.0942605175654185e-05, "loss": 0.2779, "step": 2523, "teacher_loss": 0.25294816493988037 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.4282335638999939, "learning_rate": 1.0946942316032963e-05, "loss": 0.2195, "step": 2524, "teacher_loss": 0.19629724323749542 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.3950645327568054, "learning_rate": 1.0951279456411739e-05, "loss": 0.1897, "step": 2525, "teacher_loss": 0.16693225502967834 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.3842361569404602, "learning_rate": 1.0955616596790517e-05, "loss": 0.2112, "step": 2526, "teacher_loss": 0.19197949767112732 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.26759836077690125, "learning_rate": 1.0959953737169293e-05, "loss": 0.1839, "step": 2527, "teacher_loss": 0.17459891736507416 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.3099031448364258, "learning_rate": 1.096429087754807e-05, "loss": 0.2601, "step": 2528, "teacher_loss": 0.25451797246932983 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.6155750155448914, "learning_rate": 1.0968628017926848e-05, "loss": 0.327, "step": 2529, "teacher_loss": 0.29488617181777954 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.34697088599205017, "learning_rate": 1.0972965158305624e-05, "loss": 0.2829, "step": 2530, "teacher_loss": 0.27577322721481323 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.8503072261810303, "learning_rate": 1.0977302298684401e-05, "loss": 0.2882, "step": 2531, "teacher_loss": 0.22579072415828705 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.6570554971694946, "learning_rate": 1.0981639439063177e-05, "loss": 0.3597, "step": 2532, "teacher_loss": 0.3266124129295349 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.430215060710907, "learning_rate": 1.0985976579441955e-05, "loss": 0.3421, "step": 2533, "teacher_loss": 0.3322986364364624 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.7646737098693848, "learning_rate": 1.0990313719820733e-05, "loss": 0.2996, "step": 2534, "teacher_loss": 0.247940793633461 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.36907637119293213, "learning_rate": 1.099465086019951e-05, "loss": 0.2151, "step": 2535, "teacher_loss": 0.19801479578018188 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.2139626145362854, "learning_rate": 1.0998988000578284e-05, "loss": 0.1942, "step": 2536, "teacher_loss": 0.19205938279628754 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.47275546193122864, "learning_rate": 1.1003325140957062e-05, "loss": 0.1999, "step": 2537, "teacher_loss": 0.16956260800361633 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.43362492322921753, "learning_rate": 1.100766228133584e-05, "loss": 0.187, "step": 2538, "teacher_loss": 0.1595967561006546 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.5260012149810791, "learning_rate": 1.1011999421714617e-05, "loss": 0.2439, "step": 2539, "teacher_loss": 0.21258725225925446 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.26398134231567383, "learning_rate": 1.1016336562093393e-05, "loss": 0.2086, "step": 2540, "teacher_loss": 0.202442467212677 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.32088086009025574, "learning_rate": 1.1020673702472169e-05, "loss": 0.1942, "step": 2541, "teacher_loss": 0.18017607927322388 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.5509706735610962, "learning_rate": 1.1025010842850947e-05, "loss": 0.2594, "step": 2542, "teacher_loss": 0.2270534485578537 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.35418152809143066, "learning_rate": 1.1029347983229724e-05, "loss": 0.1638, "step": 2543, "teacher_loss": 0.14259693026542664 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.43902963399887085, "learning_rate": 1.1033685123608502e-05, "loss": 0.2449, "step": 2544, "teacher_loss": 0.22338388860225677 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.5425081253051758, "learning_rate": 1.1038022263987278e-05, "loss": 0.2706, "step": 2545, "teacher_loss": 0.24037402868270874 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.15332895517349243, "learning_rate": 1.1042359404366056e-05, "loss": 0.2692, "step": 2546, "teacher_loss": 0.28209415078163147 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.1199747622013092, "learning_rate": 1.1046696544744831e-05, "loss": 0.1982, "step": 2547, "teacher_loss": 0.20692431926727295 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.3821457028388977, "learning_rate": 1.1051033685123609e-05, "loss": 0.1952, "step": 2548, "teacher_loss": 0.1744241714477539 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.4765229821205139, "learning_rate": 1.1055370825502385e-05, "loss": 0.2758, "step": 2549, "teacher_loss": 0.25352945923805237 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.47685506939888, "learning_rate": 1.1059707965881163e-05, "loss": 0.2363, "step": 2550, "teacher_loss": 0.20954486727714539 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.47066396474838257, "learning_rate": 1.106404510625994e-05, "loss": 0.2794, "step": 2551, "teacher_loss": 0.2581639587879181 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.7032426595687866, "learning_rate": 1.1068382246638716e-05, "loss": 0.2748, "step": 2552, "teacher_loss": 0.22718043625354767 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.48337581753730774, "learning_rate": 1.1072719387017494e-05, "loss": 0.2654, "step": 2553, "teacher_loss": 0.24115249514579773 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.21241718530654907, "learning_rate": 1.107705652739627e-05, "loss": 0.208, "step": 2554, "teacher_loss": 0.20746438205242157 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.5338148474693298, "learning_rate": 1.1081393667775047e-05, "loss": 0.305, "step": 2555, "teacher_loss": 0.2795740067958832 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.9185878038406372, "learning_rate": 1.1085730808153825e-05, "loss": 0.2797, "step": 2556, "teacher_loss": 0.20875728130340576 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.34692180156707764, "learning_rate": 1.1090067948532603e-05, "loss": 0.2462, "step": 2557, "teacher_loss": 0.23505136370658875 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.4488905668258667, "learning_rate": 1.1094405088911377e-05, "loss": 0.202, "step": 2558, "teacher_loss": 0.1745334267616272 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.46294406056404114, "learning_rate": 1.1098742229290154e-05, "loss": 0.2405, "step": 2559, "teacher_loss": 0.2158115804195404 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.3270619511604309, "learning_rate": 1.1103079369668932e-05, "loss": 0.2787, "step": 2560, "teacher_loss": 0.2733554244041443 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.3540976047515869, "learning_rate": 1.110741651004771e-05, "loss": 0.2012, "step": 2561, "teacher_loss": 0.184186190366745 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.5566182136535645, "learning_rate": 1.1111753650426486e-05, "loss": 0.2533, "step": 2562, "teacher_loss": 0.21956832706928253 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.3199962079524994, "learning_rate": 1.1116090790805262e-05, "loss": 0.2473, "step": 2563, "teacher_loss": 0.23919281363487244 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.4552381634712219, "learning_rate": 1.112042793118404e-05, "loss": 0.2483, "step": 2564, "teacher_loss": 0.22526150941848755 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 1.0423669815063477, "learning_rate": 1.1124765071562817e-05, "loss": 0.3261, "step": 2565, "teacher_loss": 0.24647323787212372 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.30001139640808105, "learning_rate": 1.1129102211941594e-05, "loss": 0.2329, "step": 2566, "teacher_loss": 0.22545361518859863 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.4931596517562866, "learning_rate": 1.113343935232037e-05, "loss": 0.2552, "step": 2567, "teacher_loss": 0.22871464490890503 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.44307535886764526, "learning_rate": 1.1137776492699148e-05, "loss": 0.3035, "step": 2568, "teacher_loss": 0.2880330979824066 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.34950628876686096, "learning_rate": 1.1142113633077924e-05, "loss": 0.2213, "step": 2569, "teacher_loss": 0.20708659291267395 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.2891450524330139, "learning_rate": 1.1146450773456701e-05, "loss": 0.2305, "step": 2570, "teacher_loss": 0.22394594550132751 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.6095059514045715, "learning_rate": 1.1150787913835477e-05, "loss": 0.397, "step": 2571, "teacher_loss": 0.3733842968940735 }, { "compression_loss": 0.0, "epoch": 0.46, "label_loss": 0.7332524061203003, "learning_rate": 1.1155125054214255e-05, "loss": 0.3006, "step": 2572, "teacher_loss": 0.2524861693382263 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.6859009265899658, "learning_rate": 1.1159462194593033e-05, "loss": 0.2499, "step": 2573, "teacher_loss": 0.20145851373672485 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.3341946005821228, "learning_rate": 1.1163799334971809e-05, "loss": 0.1832, "step": 2574, "teacher_loss": 0.16647052764892578 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.44110292196273804, "learning_rate": 1.1168136475350586e-05, "loss": 0.264, "step": 2575, "teacher_loss": 0.24430416524410248 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.36559349298477173, "learning_rate": 1.1172473615729362e-05, "loss": 0.266, "step": 2576, "teacher_loss": 0.25488996505737305 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.5404074192047119, "learning_rate": 1.117681075610814e-05, "loss": 0.2309, "step": 2577, "teacher_loss": 0.19647076725959778 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.24006101489067078, "learning_rate": 1.1181147896486917e-05, "loss": 0.214, "step": 2578, "teacher_loss": 0.2111242711544037 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.5765140056610107, "learning_rate": 1.1185485036865693e-05, "loss": 0.2699, "step": 2579, "teacher_loss": 0.2358786016702652 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.36410245299339294, "learning_rate": 1.118982217724447e-05, "loss": 0.2084, "step": 2580, "teacher_loss": 0.19108179211616516 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.2288976013660431, "learning_rate": 1.1194159317623247e-05, "loss": 0.1497, "step": 2581, "teacher_loss": 0.1408894807100296 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.1544944941997528, "learning_rate": 1.1198496458002024e-05, "loss": 0.2286, "step": 2582, "teacher_loss": 0.2368241548538208 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.22837793827056885, "learning_rate": 1.1202833598380802e-05, "loss": 0.2089, "step": 2583, "teacher_loss": 0.20677350461483002 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.5781692862510681, "learning_rate": 1.1207170738759578e-05, "loss": 0.3754, "step": 2584, "teacher_loss": 0.35281580686569214 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.45877158641815186, "learning_rate": 1.1211507879138354e-05, "loss": 0.2545, "step": 2585, "teacher_loss": 0.23182399570941925 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.7806944847106934, "learning_rate": 1.1215845019517132e-05, "loss": 0.2556, "step": 2586, "teacher_loss": 0.19729870557785034 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.24409300088882446, "learning_rate": 1.122018215989591e-05, "loss": 0.2414, "step": 2587, "teacher_loss": 0.2411501407623291 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.20185744762420654, "learning_rate": 1.1224519300274687e-05, "loss": 0.2223, "step": 2588, "teacher_loss": 0.22458413243293762 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.27556174993515015, "learning_rate": 1.1228856440653463e-05, "loss": 0.1771, "step": 2589, "teacher_loss": 0.16619546711444855 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.4446481466293335, "learning_rate": 1.1233193581032239e-05, "loss": 0.3707, "step": 2590, "teacher_loss": 0.36245495080947876 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.3170129656791687, "learning_rate": 1.1237530721411016e-05, "loss": 0.2897, "step": 2591, "teacher_loss": 0.2866743206977844 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.4113917052745819, "learning_rate": 1.1241867861789794e-05, "loss": 0.238, "step": 2592, "teacher_loss": 0.21871866285800934 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.3163455128669739, "learning_rate": 1.124620500216857e-05, "loss": 0.1706, "step": 2593, "teacher_loss": 0.15442919731140137 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.27670609951019287, "learning_rate": 1.1250542142547347e-05, "loss": 0.1626, "step": 2594, "teacher_loss": 0.14989374577999115 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.25696122646331787, "learning_rate": 1.1254879282926125e-05, "loss": 0.1715, "step": 2595, "teacher_loss": 0.16200119256973267 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.49533218145370483, "learning_rate": 1.1259216423304901e-05, "loss": 0.2376, "step": 2596, "teacher_loss": 0.2089582234621048 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.17777703702449799, "learning_rate": 1.1263553563683679e-05, "loss": 0.2038, "step": 2597, "teacher_loss": 0.20671939849853516 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.5646034479141235, "learning_rate": 1.1267890704062455e-05, "loss": 0.209, "step": 2598, "teacher_loss": 0.16949300467967987 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.735980749130249, "learning_rate": 1.1272227844441232e-05, "loss": 0.3186, "step": 2599, "teacher_loss": 0.27217650413513184 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.5757689476013184, "learning_rate": 1.127656498482001e-05, "loss": 0.3043, "step": 2600, "teacher_loss": 0.27409130334854126 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.3667204976081848, "learning_rate": 1.1280902125198786e-05, "loss": 0.2139, "step": 2601, "teacher_loss": 0.19693458080291748 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.4980495572090149, "learning_rate": 1.1285239265577562e-05, "loss": 0.2086, "step": 2602, "teacher_loss": 0.17644032835960388 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.6358938217163086, "learning_rate": 1.128957640595634e-05, "loss": 0.3808, "step": 2603, "teacher_loss": 0.35240083932876587 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.49157604575157166, "learning_rate": 1.1293913546335117e-05, "loss": 0.2527, "step": 2604, "teacher_loss": 0.22621144354343414 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.20347315073013306, "learning_rate": 1.1298250686713895e-05, "loss": 0.1968, "step": 2605, "teacher_loss": 0.19608746469020844 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.9442662000656128, "learning_rate": 1.130258782709267e-05, "loss": 0.336, "step": 2606, "teacher_loss": 0.2683694660663605 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.4317672550678253, "learning_rate": 1.1306924967471446e-05, "loss": 0.247, "step": 2607, "teacher_loss": 0.22647657990455627 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.5235751867294312, "learning_rate": 1.1311262107850224e-05, "loss": 0.3508, "step": 2608, "teacher_loss": 0.3316184878349304 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.3959978222846985, "learning_rate": 1.1315599248229002e-05, "loss": 0.2556, "step": 2609, "teacher_loss": 0.2399551272392273 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.2884896397590637, "learning_rate": 1.131993638860778e-05, "loss": 0.1707, "step": 2610, "teacher_loss": 0.1576024293899536 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.20990833640098572, "learning_rate": 1.1324273528986555e-05, "loss": 0.2038, "step": 2611, "teacher_loss": 0.2031690627336502 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.4589312672615051, "learning_rate": 1.1328610669365331e-05, "loss": 0.2272, "step": 2612, "teacher_loss": 0.20147913694381714 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.5243818759918213, "learning_rate": 1.1332947809744109e-05, "loss": 0.2805, "step": 2613, "teacher_loss": 0.2533561885356903 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.26924628019332886, "learning_rate": 1.1337284950122886e-05, "loss": 0.1736, "step": 2614, "teacher_loss": 0.16292704641819 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.6231783628463745, "learning_rate": 1.1341622090501662e-05, "loss": 0.2336, "step": 2615, "teacher_loss": 0.1902596652507782 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.7980999946594238, "learning_rate": 1.134595923088044e-05, "loss": 0.3567, "step": 2616, "teacher_loss": 0.3076779842376709 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.4810009002685547, "learning_rate": 1.1350296371259218e-05, "loss": 0.3618, "step": 2617, "teacher_loss": 0.34852278232574463 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.4672296345233917, "learning_rate": 1.1354633511637993e-05, "loss": 0.3082, "step": 2618, "teacher_loss": 0.29047930240631104 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.1778792291879654, "learning_rate": 1.1358970652016771e-05, "loss": 0.2226, "step": 2619, "teacher_loss": 0.22753119468688965 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.5313377380371094, "learning_rate": 1.1363307792395547e-05, "loss": 0.296, "step": 2620, "teacher_loss": 0.2698304355144501 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.838951826095581, "learning_rate": 1.1367644932774325e-05, "loss": 0.3165, "step": 2621, "teacher_loss": 0.25843414664268494 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.5401074886322021, "learning_rate": 1.1371982073153102e-05, "loss": 0.3942, "step": 2622, "teacher_loss": 0.3780317008495331 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.32501643896102905, "learning_rate": 1.1376319213531878e-05, "loss": 0.225, "step": 2623, "teacher_loss": 0.21384525299072266 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.5073922872543335, "learning_rate": 1.1380656353910654e-05, "loss": 0.3492, "step": 2624, "teacher_loss": 0.33159786462783813 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.3377583920955658, "learning_rate": 1.1384993494289432e-05, "loss": 0.2441, "step": 2625, "teacher_loss": 0.23374885320663452 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.26270586252212524, "learning_rate": 1.138933063466821e-05, "loss": 0.1704, "step": 2626, "teacher_loss": 0.16010406613349915 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.721676230430603, "learning_rate": 1.1393667775046987e-05, "loss": 0.2794, "step": 2627, "teacher_loss": 0.23020729422569275 }, { "compression_loss": 0.0, "epoch": 0.47, "label_loss": 0.1762390434741974, "learning_rate": 1.1398004915425763e-05, "loss": 0.1806, "step": 2628, "teacher_loss": 0.1810353845357895 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.34777331352233887, "learning_rate": 1.1402342055804539e-05, "loss": 0.2001, "step": 2629, "teacher_loss": 0.18364128470420837 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.301580548286438, "learning_rate": 1.1406679196183316e-05, "loss": 0.1589, "step": 2630, "teacher_loss": 0.1430257111787796 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.17474818229675293, "learning_rate": 1.1411016336562094e-05, "loss": 0.1951, "step": 2631, "teacher_loss": 0.19736558198928833 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.3953016400337219, "learning_rate": 1.1415353476940872e-05, "loss": 0.3168, "step": 2632, "teacher_loss": 0.3080419898033142 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.39360880851745605, "learning_rate": 1.1419690617319648e-05, "loss": 0.2275, "step": 2633, "teacher_loss": 0.20904362201690674 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.6607809662818909, "learning_rate": 1.1424027757698424e-05, "loss": 0.2853, "step": 2634, "teacher_loss": 0.24353361129760742 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.3955228328704834, "learning_rate": 1.1428364898077201e-05, "loss": 0.1799, "step": 2635, "teacher_loss": 0.15595200657844543 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.23248809576034546, "learning_rate": 1.1432702038455979e-05, "loss": 0.1844, "step": 2636, "teacher_loss": 0.1790345311164856 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.23448440432548523, "learning_rate": 1.1437039178834755e-05, "loss": 0.1965, "step": 2637, "teacher_loss": 0.19231267273426056 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.927783727645874, "learning_rate": 1.1441376319213532e-05, "loss": 0.2794, "step": 2638, "teacher_loss": 0.20730894804000854 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.3491996228694916, "learning_rate": 1.1445713459592308e-05, "loss": 0.2736, "step": 2639, "teacher_loss": 0.26519787311553955 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.571537435054779, "learning_rate": 1.1450050599971086e-05, "loss": 0.3666, "step": 2640, "teacher_loss": 0.3438640832901001 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.25649961829185486, "learning_rate": 1.1454387740349864e-05, "loss": 0.2054, "step": 2641, "teacher_loss": 0.19977012276649475 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.7252275347709656, "learning_rate": 1.145872488072864e-05, "loss": 0.3431, "step": 2642, "teacher_loss": 0.30058836936950684 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.30321455001831055, "learning_rate": 1.1463062021107417e-05, "loss": 0.1684, "step": 2643, "teacher_loss": 0.15338864922523499 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.6212984323501587, "learning_rate": 1.1467399161486195e-05, "loss": 0.3577, "step": 2644, "teacher_loss": 0.328414648771286 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.634611964225769, "learning_rate": 1.147173630186497e-05, "loss": 0.2898, "step": 2645, "teacher_loss": 0.25150930881500244 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.3808760643005371, "learning_rate": 1.1476073442243747e-05, "loss": 0.2287, "step": 2646, "teacher_loss": 0.21176062524318695 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.13941128551959991, "learning_rate": 1.1480410582622524e-05, "loss": 0.1538, "step": 2647, "teacher_loss": 0.1554056853055954 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.5635295510292053, "learning_rate": 1.1484747723001302e-05, "loss": 0.2888, "step": 2648, "teacher_loss": 0.2582892179489136 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.6621547937393188, "learning_rate": 1.148908486338008e-05, "loss": 0.2942, "step": 2649, "teacher_loss": 0.2532985210418701 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.36099833250045776, "learning_rate": 1.1493422003758855e-05, "loss": 0.2278, "step": 2650, "teacher_loss": 0.2129751443862915 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.44811612367630005, "learning_rate": 1.1497759144137631e-05, "loss": 0.2468, "step": 2651, "teacher_loss": 0.22444146871566772 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.40702179074287415, "learning_rate": 1.1502096284516409e-05, "loss": 0.2322, "step": 2652, "teacher_loss": 0.21280883252620697 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.9552428722381592, "learning_rate": 1.1506433424895187e-05, "loss": 0.3319, "step": 2653, "teacher_loss": 0.2626475691795349 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.34105437994003296, "learning_rate": 1.1510770565273964e-05, "loss": 0.2581, "step": 2654, "teacher_loss": 0.24885720014572144 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.5169386863708496, "learning_rate": 1.151510770565274e-05, "loss": 0.2379, "step": 2655, "teacher_loss": 0.20691420137882233 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 1.1421351432800293, "learning_rate": 1.1519444846031516e-05, "loss": 0.3482, "step": 2656, "teacher_loss": 0.26002037525177 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.2524987459182739, "learning_rate": 1.1523781986410294e-05, "loss": 0.2361, "step": 2657, "teacher_loss": 0.23427344858646393 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.5148970484733582, "learning_rate": 1.1528119126789071e-05, "loss": 0.2578, "step": 2658, "teacher_loss": 0.22926665842533112 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.3486313819885254, "learning_rate": 1.1532456267167847e-05, "loss": 0.1845, "step": 2659, "teacher_loss": 0.16624927520751953 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.6609458923339844, "learning_rate": 1.1536793407546625e-05, "loss": 0.386, "step": 2660, "teacher_loss": 0.35549598932266235 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.1997494399547577, "learning_rate": 1.15411305479254e-05, "loss": 0.169, "step": 2661, "teacher_loss": 0.16560979187488556 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.40461117029190063, "learning_rate": 1.1545467688304178e-05, "loss": 0.2221, "step": 2662, "teacher_loss": 0.2018614113330841 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.2932624816894531, "learning_rate": 1.1549804828682956e-05, "loss": 0.1852, "step": 2663, "teacher_loss": 0.17317649722099304 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.4580557942390442, "learning_rate": 1.1554141969061732e-05, "loss": 0.223, "step": 2664, "teacher_loss": 0.19686487317085266 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.5219885110855103, "learning_rate": 1.155847910944051e-05, "loss": 0.25, "step": 2665, "teacher_loss": 0.21975529193878174 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 1.0087976455688477, "learning_rate": 1.1562816249819287e-05, "loss": 0.4456, "step": 2666, "teacher_loss": 0.3830302953720093 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.589967668056488, "learning_rate": 1.1567153390198063e-05, "loss": 0.2979, "step": 2667, "teacher_loss": 0.2654666602611542 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.2949758470058441, "learning_rate": 1.1571490530576839e-05, "loss": 0.1934, "step": 2668, "teacher_loss": 0.1821596920490265 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.3422999978065491, "learning_rate": 1.1575827670955617e-05, "loss": 0.214, "step": 2669, "teacher_loss": 0.19974404573440552 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.7723867893218994, "learning_rate": 1.1580164811334394e-05, "loss": 0.4085, "step": 2670, "teacher_loss": 0.36803698539733887 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.3261106610298157, "learning_rate": 1.1584501951713172e-05, "loss": 0.2088, "step": 2671, "teacher_loss": 0.1957629770040512 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 1.0239346027374268, "learning_rate": 1.1588839092091948e-05, "loss": 0.3255, "step": 2672, "teacher_loss": 0.24789631366729736 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.8380393981933594, "learning_rate": 1.1593176232470724e-05, "loss": 0.7181, "step": 2673, "teacher_loss": 0.7047662734985352 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.29857906699180603, "learning_rate": 1.1597513372849501e-05, "loss": 0.165, "step": 2674, "teacher_loss": 0.15017318725585938 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.23178820312023163, "learning_rate": 1.1601850513228279e-05, "loss": 0.1591, "step": 2675, "teacher_loss": 0.15098103880882263 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.5880321264266968, "learning_rate": 1.1606187653607057e-05, "loss": 0.2887, "step": 2676, "teacher_loss": 0.2553853988647461 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.4277018904685974, "learning_rate": 1.161052479398583e-05, "loss": 0.1933, "step": 2677, "teacher_loss": 0.16729441285133362 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.29130446910858154, "learning_rate": 1.1614861934364608e-05, "loss": 0.2135, "step": 2678, "teacher_loss": 0.20483243465423584 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.7780278921127319, "learning_rate": 1.1619199074743386e-05, "loss": 0.4369, "step": 2679, "teacher_loss": 0.3989725708961487 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.38825103640556335, "learning_rate": 1.1623536215122164e-05, "loss": 0.1983, "step": 2680, "teacher_loss": 0.1772429198026657 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.5059837102890015, "learning_rate": 1.162787335550094e-05, "loss": 0.2789, "step": 2681, "teacher_loss": 0.2536166310310364 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.7126432657241821, "learning_rate": 1.1632210495879717e-05, "loss": 0.236, "step": 2682, "teacher_loss": 0.18306750059127808 }, { "compression_loss": 0.0, "epoch": 0.48, "label_loss": 0.8429287075996399, "learning_rate": 1.1636547636258493e-05, "loss": 0.2516, "step": 2683, "teacher_loss": 0.18587175011634827 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.7280197739601135, "learning_rate": 1.164088477663727e-05, "loss": 0.331, "step": 2684, "teacher_loss": 0.2868611216545105 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.38181179761886597, "learning_rate": 1.1645221917016048e-05, "loss": 0.2322, "step": 2685, "teacher_loss": 0.21557992696762085 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.32834070920944214, "learning_rate": 1.1649559057394824e-05, "loss": 0.2372, "step": 2686, "teacher_loss": 0.22710910439491272 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.29126954078674316, "learning_rate": 1.1653896197773602e-05, "loss": 0.223, "step": 2687, "teacher_loss": 0.21542009711265564 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.15271346271038055, "learning_rate": 1.1658233338152378e-05, "loss": 0.2518, "step": 2688, "teacher_loss": 0.26280081272125244 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.6026004552841187, "learning_rate": 1.1662570478531156e-05, "loss": 0.2289, "step": 2689, "teacher_loss": 0.18735647201538086 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.2639501690864563, "learning_rate": 1.1666907618909931e-05, "loss": 0.2053, "step": 2690, "teacher_loss": 0.19873586297035217 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.6021695137023926, "learning_rate": 1.1671244759288709e-05, "loss": 0.3207, "step": 2691, "teacher_loss": 0.2894771993160248 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.4192838668823242, "learning_rate": 1.1675581899667487e-05, "loss": 0.1879, "step": 2692, "teacher_loss": 0.16221432387828827 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.7256275415420532, "learning_rate": 1.1679919040046264e-05, "loss": 0.3064, "step": 2693, "teacher_loss": 0.2597653269767761 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.33181193470954895, "learning_rate": 1.168425618042504e-05, "loss": 0.2421, "step": 2694, "teacher_loss": 0.23215770721435547 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.1506984829902649, "learning_rate": 1.1688593320803816e-05, "loss": 0.2146, "step": 2695, "teacher_loss": 0.22171252965927124 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.5970140695571899, "learning_rate": 1.1692930461182594e-05, "loss": 0.336, "step": 2696, "teacher_loss": 0.3069695234298706 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.47001200914382935, "learning_rate": 1.1697267601561371e-05, "loss": 0.2536, "step": 2697, "teacher_loss": 0.2296069860458374 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.23714056611061096, "learning_rate": 1.1701604741940149e-05, "loss": 0.2411, "step": 2698, "teacher_loss": 0.24155963957309723 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.37077924609184265, "learning_rate": 1.1705941882318923e-05, "loss": 0.2114, "step": 2699, "teacher_loss": 0.19367915391921997 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.7926560640335083, "learning_rate": 1.1710279022697701e-05, "loss": 0.3557, "step": 2700, "teacher_loss": 0.3071998059749603 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.49158933758735657, "learning_rate": 1.1714616163076479e-05, "loss": 0.3216, "step": 2701, "teacher_loss": 0.3027087450027466 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.6287871599197388, "learning_rate": 1.1718953303455256e-05, "loss": 0.2506, "step": 2702, "teacher_loss": 0.20857484638690948 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.3568631112575531, "learning_rate": 1.1723290443834032e-05, "loss": 0.2215, "step": 2703, "teacher_loss": 0.206405371427536 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.3900279998779297, "learning_rate": 1.172762758421281e-05, "loss": 0.265, "step": 2704, "teacher_loss": 0.251122385263443 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.4751322865486145, "learning_rate": 1.1731964724591586e-05, "loss": 0.2354, "step": 2705, "teacher_loss": 0.20875263214111328 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.2733660936355591, "learning_rate": 1.1736301864970363e-05, "loss": 0.1903, "step": 2706, "teacher_loss": 0.18105006217956543 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.7769244909286499, "learning_rate": 1.1740639005349141e-05, "loss": 0.3563, "step": 2707, "teacher_loss": 0.3095134496688843 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.49802684783935547, "learning_rate": 1.1744976145727917e-05, "loss": 0.2292, "step": 2708, "teacher_loss": 0.19931241869926453 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.43429094552993774, "learning_rate": 1.1749313286106694e-05, "loss": 0.264, "step": 2709, "teacher_loss": 0.24503526091575623 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.34477633237838745, "learning_rate": 1.175365042648547e-05, "loss": 0.2031, "step": 2710, "teacher_loss": 0.1873832494020462 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.17174574732780457, "learning_rate": 1.1757987566864248e-05, "loss": 0.1983, "step": 2711, "teacher_loss": 0.20125645399093628 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.372773140668869, "learning_rate": 1.1762324707243024e-05, "loss": 0.2701, "step": 2712, "teacher_loss": 0.2586411237716675 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.4282851219177246, "learning_rate": 1.1766661847621802e-05, "loss": 0.2253, "step": 2713, "teacher_loss": 0.20271286368370056 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.7138041853904724, "learning_rate": 1.1770998988000579e-05, "loss": 0.3552, "step": 2714, "teacher_loss": 0.31534916162490845 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.8002606630325317, "learning_rate": 1.1775336128379357e-05, "loss": 0.3493, "step": 2715, "teacher_loss": 0.2991919219493866 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.48987025022506714, "learning_rate": 1.1779673268758133e-05, "loss": 0.3607, "step": 2716, "teacher_loss": 0.34633442759513855 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.29670393466949463, "learning_rate": 1.1784010409136909e-05, "loss": 0.223, "step": 2717, "teacher_loss": 0.21485163271427155 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.33022040128707886, "learning_rate": 1.1788347549515686e-05, "loss": 0.203, "step": 2718, "teacher_loss": 0.18881940841674805 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.6116946935653687, "learning_rate": 1.1792684689894464e-05, "loss": 0.2316, "step": 2719, "teacher_loss": 0.18934962153434753 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.1104709655046463, "learning_rate": 1.1797021830273241e-05, "loss": 0.2309, "step": 2720, "teacher_loss": 0.24428671598434448 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.2969475984573364, "learning_rate": 1.1801358970652016e-05, "loss": 0.2652, "step": 2721, "teacher_loss": 0.2617086172103882 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.19045990705490112, "learning_rate": 1.1805696111030793e-05, "loss": 0.1925, "step": 2722, "teacher_loss": 0.19274374842643738 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.22433248162269592, "learning_rate": 1.1810033251409571e-05, "loss": 0.2379, "step": 2723, "teacher_loss": 0.2394241988658905 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.6566988229751587, "learning_rate": 1.1814370391788349e-05, "loss": 0.2636, "step": 2724, "teacher_loss": 0.21995803713798523 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.7349234819412231, "learning_rate": 1.1818707532167125e-05, "loss": 0.3091, "step": 2725, "teacher_loss": 0.2618390917778015 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.34041017293930054, "learning_rate": 1.18230446725459e-05, "loss": 0.279, "step": 2726, "teacher_loss": 0.2722034156322479 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.12734031677246094, "learning_rate": 1.1827381812924678e-05, "loss": 0.1456, "step": 2727, "teacher_loss": 0.14766442775726318 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.3510662317276001, "learning_rate": 1.1831718953303456e-05, "loss": 0.2419, "step": 2728, "teacher_loss": 0.2297602742910385 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.41019418835639954, "learning_rate": 1.1836056093682233e-05, "loss": 0.2705, "step": 2729, "teacher_loss": 0.25496602058410645 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.35969462990760803, "learning_rate": 1.184039323406101e-05, "loss": 0.223, "step": 2730, "teacher_loss": 0.20785868167877197 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.45235681533813477, "learning_rate": 1.1844730374439787e-05, "loss": 0.2942, "step": 2731, "teacher_loss": 0.2765870690345764 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.4447060823440552, "learning_rate": 1.1849067514818563e-05, "loss": 0.1987, "step": 2732, "teacher_loss": 0.17140191793441772 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.6314231157302856, "learning_rate": 1.185340465519734e-05, "loss": 0.256, "step": 2733, "teacher_loss": 0.21425361931324005 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.44326549768447876, "learning_rate": 1.1857741795576116e-05, "loss": 0.2518, "step": 2734, "teacher_loss": 0.23052552342414856 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.18477675318717957, "learning_rate": 1.1862078935954894e-05, "loss": 0.2282, "step": 2735, "teacher_loss": 0.23301461338996887 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.14530345797538757, "learning_rate": 1.1866416076333672e-05, "loss": 0.1712, "step": 2736, "teacher_loss": 0.17407536506652832 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.4104914665222168, "learning_rate": 1.1870753216712447e-05, "loss": 0.2197, "step": 2737, "teacher_loss": 0.19850321114063263 }, { "compression_loss": 0.0, "epoch": 0.49, "label_loss": 0.3709258437156677, "learning_rate": 1.1875090357091225e-05, "loss": 0.1987, "step": 2738, "teacher_loss": 0.17951494455337524 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.3927757143974304, "learning_rate": 1.1879427497470001e-05, "loss": 0.2122, "step": 2739, "teacher_loss": 0.19213169813156128 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.3682246804237366, "learning_rate": 1.1883764637848779e-05, "loss": 0.1874, "step": 2740, "teacher_loss": 0.16731923818588257 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.4583500027656555, "learning_rate": 1.1888101778227556e-05, "loss": 0.2639, "step": 2741, "teacher_loss": 0.24230840802192688 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.6776990294456482, "learning_rate": 1.1892438918606334e-05, "loss": 0.289, "step": 2742, "teacher_loss": 0.24575796723365784 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.6546613574028015, "learning_rate": 1.1896776058985108e-05, "loss": 0.2725, "step": 2743, "teacher_loss": 0.23004180192947388 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.5748443007469177, "learning_rate": 1.1901113199363886e-05, "loss": 0.3352, "step": 2744, "teacher_loss": 0.3085193634033203 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.7293906807899475, "learning_rate": 1.1905450339742663e-05, "loss": 0.2612, "step": 2745, "teacher_loss": 0.2091558575630188 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.5433912873268127, "learning_rate": 1.1909787480121441e-05, "loss": 0.2396, "step": 2746, "teacher_loss": 0.2058178037405014 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.46515753865242004, "learning_rate": 1.1914124620500217e-05, "loss": 0.2542, "step": 2747, "teacher_loss": 0.2307864874601364 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.1727769672870636, "learning_rate": 1.1918461760878993e-05, "loss": 0.1949, "step": 2748, "teacher_loss": 0.19736497104167938 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.392741322517395, "learning_rate": 1.192279890125777e-05, "loss": 0.2164, "step": 2749, "teacher_loss": 0.19679558277130127 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.5824330449104309, "learning_rate": 1.1927136041636548e-05, "loss": 0.3114, "step": 2750, "teacher_loss": 0.2812670171260834 }, { "epoch": 0.5, "eval_exact_match": 79.54588457899716, "eval_f1": 87.13278090917339, "step": 2750 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.3725287914276123, "learning_rate": 1.1931473182015326e-05, "loss": 0.2371, "step": 2751, "teacher_loss": 0.22203272581100464 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.26836833357810974, "learning_rate": 1.1935810322394102e-05, "loss": 0.2585, "step": 2752, "teacher_loss": 0.25743967294692993 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.24945861101150513, "learning_rate": 1.194014746277288e-05, "loss": 0.1526, "step": 2753, "teacher_loss": 0.14184768497943878 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.5122396945953369, "learning_rate": 1.1944484603151655e-05, "loss": 0.3449, "step": 2754, "teacher_loss": 0.3262713551521301 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.9176746606826782, "learning_rate": 1.1948821743530433e-05, "loss": 0.3953, "step": 2755, "teacher_loss": 0.33729344606399536 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.34939056634902954, "learning_rate": 1.1953158883909209e-05, "loss": 0.2283, "step": 2756, "teacher_loss": 0.21482336521148682 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.39029139280319214, "learning_rate": 1.1957496024287986e-05, "loss": 0.2044, "step": 2757, "teacher_loss": 0.18375341594219208 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.30850085616111755, "learning_rate": 1.1961833164666764e-05, "loss": 0.1769, "step": 2758, "teacher_loss": 0.16227969527244568 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.3603702187538147, "learning_rate": 1.196617030504554e-05, "loss": 0.2305, "step": 2759, "teacher_loss": 0.21602925658226013 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.5798477530479431, "learning_rate": 1.1970507445424318e-05, "loss": 0.2892, "step": 2760, "teacher_loss": 0.25690627098083496 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.6092903017997742, "learning_rate": 1.1974844585803093e-05, "loss": 0.2672, "step": 2761, "teacher_loss": 0.22922618687152863 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.4217924177646637, "learning_rate": 1.1979181726181871e-05, "loss": 0.3656, "step": 2762, "teacher_loss": 0.3594071567058563 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.4085831046104431, "learning_rate": 1.1983518866560649e-05, "loss": 0.2294, "step": 2763, "teacher_loss": 0.20952939987182617 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.5528672933578491, "learning_rate": 1.1987856006939426e-05, "loss": 0.3168, "step": 2764, "teacher_loss": 0.29051852226257324 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.5818487405776978, "learning_rate": 1.19921931473182e-05, "loss": 0.258, "step": 2765, "teacher_loss": 0.22200913727283478 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.7699317932128906, "learning_rate": 1.1996530287696978e-05, "loss": 0.3074, "step": 2766, "teacher_loss": 0.2559676468372345 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.7366092801094055, "learning_rate": 1.2000867428075756e-05, "loss": 0.2849, "step": 2767, "teacher_loss": 0.2347569465637207 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.8108692765235901, "learning_rate": 1.2005204568454533e-05, "loss": 0.3502, "step": 2768, "teacher_loss": 0.29904770851135254 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.627205491065979, "learning_rate": 1.2009541708833311e-05, "loss": 0.3901, "step": 2769, "teacher_loss": 0.3637816607952118 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.4472658932209015, "learning_rate": 1.2013878849212085e-05, "loss": 0.4401, "step": 2770, "teacher_loss": 0.4392518401145935 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.31642910838127136, "learning_rate": 1.2018215989590863e-05, "loss": 0.221, "step": 2771, "teacher_loss": 0.21039330959320068 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.39622044563293457, "learning_rate": 1.202255312996964e-05, "loss": 0.2684, "step": 2772, "teacher_loss": 0.25414392352104187 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.5222471952438354, "learning_rate": 1.2026890270348418e-05, "loss": 0.2168, "step": 2773, "teacher_loss": 0.18284811079502106 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.3399708867073059, "learning_rate": 1.2031227410727194e-05, "loss": 0.2381, "step": 2774, "teacher_loss": 0.226749449968338 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.3524761497974396, "learning_rate": 1.203556455110597e-05, "loss": 0.2597, "step": 2775, "teacher_loss": 0.24944472312927246 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.175907164812088, "learning_rate": 1.2039901691484748e-05, "loss": 0.167, "step": 2776, "teacher_loss": 0.1660478711128235 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.5861552357673645, "learning_rate": 1.2044238831863525e-05, "loss": 0.2065, "step": 2777, "teacher_loss": 0.16427116096019745 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.3690280318260193, "learning_rate": 1.2048575972242301e-05, "loss": 0.2138, "step": 2778, "teacher_loss": 0.19651058316230774 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.18774858117103577, "learning_rate": 1.2052913112621079e-05, "loss": 0.1682, "step": 2779, "teacher_loss": 0.16606751084327698 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.3397512435913086, "learning_rate": 1.2057250252999856e-05, "loss": 0.2046, "step": 2780, "teacher_loss": 0.18961921334266663 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.2792900502681732, "learning_rate": 1.2061587393378632e-05, "loss": 0.1867, "step": 2781, "teacher_loss": 0.17643892765045166 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.42095446586608887, "learning_rate": 1.206592453375741e-05, "loss": 0.1754, "step": 2782, "teacher_loss": 0.14810076355934143 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.6333006620407104, "learning_rate": 1.2070261674136186e-05, "loss": 0.2506, "step": 2783, "teacher_loss": 0.2080870270729065 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.3065766394138336, "learning_rate": 1.2074598814514964e-05, "loss": 0.2101, "step": 2784, "teacher_loss": 0.19934040307998657 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.4391058385372162, "learning_rate": 1.2078935954893741e-05, "loss": 0.2594, "step": 2785, "teacher_loss": 0.23942387104034424 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.2367694079875946, "learning_rate": 1.2083273095272517e-05, "loss": 0.2004, "step": 2786, "teacher_loss": 0.19637976586818695 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.21591030061244965, "learning_rate": 1.2087610235651293e-05, "loss": 0.2412, "step": 2787, "teacher_loss": 0.24399667978286743 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.2285538911819458, "learning_rate": 1.209194737603007e-05, "loss": 0.1792, "step": 2788, "teacher_loss": 0.17375943064689636 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.5220904350280762, "learning_rate": 1.2096284516408848e-05, "loss": 0.3024, "step": 2789, "teacher_loss": 0.27803170680999756 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.14029623568058014, "learning_rate": 1.2100621656787626e-05, "loss": 0.1604, "step": 2790, "teacher_loss": 0.16259488463401794 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.4635617136955261, "learning_rate": 1.2104958797166404e-05, "loss": 0.2824, "step": 2791, "teacher_loss": 0.2622499167919159 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.4710789620876312, "learning_rate": 1.2109295937545178e-05, "loss": 0.2666, "step": 2792, "teacher_loss": 0.2439209222793579 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.7224783301353455, "learning_rate": 1.2113633077923955e-05, "loss": 0.3356, "step": 2793, "teacher_loss": 0.29260414838790894 }, { "compression_loss": 0.0, "epoch": 0.5, "label_loss": 0.553899884223938, "learning_rate": 1.2117970218302733e-05, "loss": 0.3377, "step": 2794, "teacher_loss": 0.31364181637763977 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.4261709153652191, "learning_rate": 1.212230735868151e-05, "loss": 0.2447, "step": 2795, "teacher_loss": 0.2245543897151947 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.8256220817565918, "learning_rate": 1.2126644499060287e-05, "loss": 0.4943, "step": 2796, "teacher_loss": 0.45752960443496704 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.599109411239624, "learning_rate": 1.2130981639439062e-05, "loss": 0.2988, "step": 2797, "teacher_loss": 0.2654431164264679 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.4407704174518585, "learning_rate": 1.213531877981784e-05, "loss": 0.2365, "step": 2798, "teacher_loss": 0.21375274658203125 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.356095552444458, "learning_rate": 1.2139655920196618e-05, "loss": 0.2606, "step": 2799, "teacher_loss": 0.24996507167816162 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.4790748357772827, "learning_rate": 1.2143993060575394e-05, "loss": 0.2325, "step": 2800, "teacher_loss": 0.2051496058702469 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.23424725234508514, "learning_rate": 1.2148330200954171e-05, "loss": 0.1821, "step": 2801, "teacher_loss": 0.17631107568740845 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.25061652064323425, "learning_rate": 1.2152667341332949e-05, "loss": 0.1778, "step": 2802, "teacher_loss": 0.16966739296913147 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.8287709951400757, "learning_rate": 1.2157004481711725e-05, "loss": 0.3683, "step": 2803, "teacher_loss": 0.3171241283416748 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.10736843198537827, "learning_rate": 1.2161341622090502e-05, "loss": 0.1957, "step": 2804, "teacher_loss": 0.20553088188171387 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.2934303879737854, "learning_rate": 1.2165678762469278e-05, "loss": 0.2333, "step": 2805, "teacher_loss": 0.22658444941043854 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.4178912043571472, "learning_rate": 1.2170015902848056e-05, "loss": 0.2739, "step": 2806, "teacher_loss": 0.257956326007843 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.3434050679206848, "learning_rate": 1.2174353043226834e-05, "loss": 0.2075, "step": 2807, "teacher_loss": 0.19240860641002655 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.6782447695732117, "learning_rate": 1.217869018360561e-05, "loss": 0.2411, "step": 2808, "teacher_loss": 0.19255954027175903 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.3788807988166809, "learning_rate": 1.2183027323984385e-05, "loss": 0.306, "step": 2809, "teacher_loss": 0.2979472279548645 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.7416969537734985, "learning_rate": 1.2187364464363163e-05, "loss": 0.2785, "step": 2810, "teacher_loss": 0.22706541419029236 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.4763883948326111, "learning_rate": 1.219170160474194e-05, "loss": 0.2749, "step": 2811, "teacher_loss": 0.2525593638420105 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.08716742694377899, "learning_rate": 1.2196038745120718e-05, "loss": 0.1903, "step": 2812, "teacher_loss": 0.20171219110488892 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.2611193060874939, "learning_rate": 1.2200375885499496e-05, "loss": 0.2601, "step": 2813, "teacher_loss": 0.25993168354034424 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.5312067866325378, "learning_rate": 1.220471302587827e-05, "loss": 0.1794, "step": 2814, "teacher_loss": 0.1403425931930542 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.24882356822490692, "learning_rate": 1.2209050166257048e-05, "loss": 0.1695, "step": 2815, "teacher_loss": 0.16065910458564758 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.6479905843734741, "learning_rate": 1.2213387306635825e-05, "loss": 0.238, "step": 2816, "teacher_loss": 0.1924542933702469 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.3875870108604431, "learning_rate": 1.2217724447014603e-05, "loss": 0.1816, "step": 2817, "teacher_loss": 0.1587332785129547 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.7681722640991211, "learning_rate": 1.2222061587393379e-05, "loss": 0.3172, "step": 2818, "teacher_loss": 0.2670750916004181 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.2610691487789154, "learning_rate": 1.2226398727772155e-05, "loss": 0.2649, "step": 2819, "teacher_loss": 0.2653735876083374 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.7264211773872375, "learning_rate": 1.2230735868150933e-05, "loss": 0.2708, "step": 2820, "teacher_loss": 0.2201305776834488 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.8096187114715576, "learning_rate": 1.223507300852971e-05, "loss": 0.3187, "step": 2821, "teacher_loss": 0.2640998363494873 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.7291936874389648, "learning_rate": 1.2239410148908486e-05, "loss": 0.2545, "step": 2822, "teacher_loss": 0.20177412033081055 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.6875884532928467, "learning_rate": 1.2243747289287264e-05, "loss": 0.3436, "step": 2823, "teacher_loss": 0.30543211102485657 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.24918492138385773, "learning_rate": 1.224808442966604e-05, "loss": 0.173, "step": 2824, "teacher_loss": 0.16451023519039154 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.33140647411346436, "learning_rate": 1.2252421570044817e-05, "loss": 0.2441, "step": 2825, "teacher_loss": 0.23441873490810394 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.6007271409034729, "learning_rate": 1.2256758710423595e-05, "loss": 0.2107, "step": 2826, "teacher_loss": 0.167380690574646 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.3942256569862366, "learning_rate": 1.226109585080237e-05, "loss": 0.2235, "step": 2827, "teacher_loss": 0.20448797941207886 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.5017439126968384, "learning_rate": 1.2265432991181148e-05, "loss": 0.191, "step": 2828, "teacher_loss": 0.15643101930618286 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.4072628319263458, "learning_rate": 1.2269770131559926e-05, "loss": 0.2955, "step": 2829, "teacher_loss": 0.2830515503883362 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.372791588306427, "learning_rate": 1.2274107271938702e-05, "loss": 0.3129, "step": 2830, "teacher_loss": 0.30627113580703735 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.32183337211608887, "learning_rate": 1.2278444412317478e-05, "loss": 0.2056, "step": 2831, "teacher_loss": 0.19263172149658203 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.26824823021888733, "learning_rate": 1.2282781552696256e-05, "loss": 0.2484, "step": 2832, "teacher_loss": 0.24618060886859894 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.9093698263168335, "learning_rate": 1.2287118693075033e-05, "loss": 0.4284, "step": 2833, "teacher_loss": 0.3749205470085144 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.32723698019981384, "learning_rate": 1.229145583345381e-05, "loss": 0.2176, "step": 2834, "teacher_loss": 0.20544511079788208 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.3240583837032318, "learning_rate": 1.2295792973832587e-05, "loss": 0.1969, "step": 2835, "teacher_loss": 0.18272244930267334 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.47969186305999756, "learning_rate": 1.2300130114211363e-05, "loss": 0.228, "step": 2836, "teacher_loss": 0.20006148517131805 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.655357301235199, "learning_rate": 1.230446725459014e-05, "loss": 0.2366, "step": 2837, "teacher_loss": 0.19008678197860718 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.4674513339996338, "learning_rate": 1.2308804394968918e-05, "loss": 0.1721, "step": 2838, "teacher_loss": 0.13925771415233612 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.7312817573547363, "learning_rate": 1.2313141535347695e-05, "loss": 0.3999, "step": 2839, "teacher_loss": 0.3630879819393158 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.3362359404563904, "learning_rate": 1.2317478675726471e-05, "loss": 0.2156, "step": 2840, "teacher_loss": 0.20220975577831268 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.2592926025390625, "learning_rate": 1.2321815816105247e-05, "loss": 0.2117, "step": 2841, "teacher_loss": 0.2064572423696518 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.28975388407707214, "learning_rate": 1.2326152956484025e-05, "loss": 0.2361, "step": 2842, "teacher_loss": 0.2301521897315979 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.21224454045295715, "learning_rate": 1.2330490096862803e-05, "loss": 0.1825, "step": 2843, "teacher_loss": 0.1792450249195099 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.33605241775512695, "learning_rate": 1.2334827237241579e-05, "loss": 0.2524, "step": 2844, "teacher_loss": 0.24305102229118347 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.4312984347343445, "learning_rate": 1.2339164377620356e-05, "loss": 0.3095, "step": 2845, "teacher_loss": 0.2959633469581604 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.2551267445087433, "learning_rate": 1.2343501517999132e-05, "loss": 0.3258, "step": 2846, "teacher_loss": 0.3336018919944763 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.5234556794166565, "learning_rate": 1.234783865837791e-05, "loss": 0.3254, "step": 2847, "teacher_loss": 0.3033400774002075 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.45214471220970154, "learning_rate": 1.2352175798756687e-05, "loss": 0.2217, "step": 2848, "teacher_loss": 0.19613017141819 }, { "compression_loss": 0.0, "epoch": 0.51, "label_loss": 0.7057472467422485, "learning_rate": 1.2356512939135463e-05, "loss": 0.493, "step": 2849, "teacher_loss": 0.46934348344802856 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.4267593026161194, "learning_rate": 1.2360850079514241e-05, "loss": 0.2242, "step": 2850, "teacher_loss": 0.20168596506118774 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.33162248134613037, "learning_rate": 1.2365187219893018e-05, "loss": 0.2405, "step": 2851, "teacher_loss": 0.2303842306137085 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.5279541015625, "learning_rate": 1.2369524360271794e-05, "loss": 0.2799, "step": 2852, "teacher_loss": 0.25236034393310547 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.6938744783401489, "learning_rate": 1.237386150065057e-05, "loss": 0.3351, "step": 2853, "teacher_loss": 0.2952754497528076 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.3975841999053955, "learning_rate": 1.2378198641029348e-05, "loss": 0.1912, "step": 2854, "teacher_loss": 0.16821298003196716 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.6715163588523865, "learning_rate": 1.2382535781408126e-05, "loss": 0.379, "step": 2855, "teacher_loss": 0.34648776054382324 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.446830153465271, "learning_rate": 1.2386872921786903e-05, "loss": 0.3289, "step": 2856, "teacher_loss": 0.3158435821533203 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.28678110241889954, "learning_rate": 1.2391210062165679e-05, "loss": 0.1815, "step": 2857, "teacher_loss": 0.16984233260154724 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.5985437035560608, "learning_rate": 1.2395547202544455e-05, "loss": 0.2799, "step": 2858, "teacher_loss": 0.244448721408844 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.39948201179504395, "learning_rate": 1.2399884342923233e-05, "loss": 0.2913, "step": 2859, "teacher_loss": 0.27922794222831726 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.5931471586227417, "learning_rate": 1.240422148330201e-05, "loss": 0.3072, "step": 2860, "teacher_loss": 0.27547332644462585 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.3496328294277191, "learning_rate": 1.2408558623680788e-05, "loss": 0.2097, "step": 2861, "teacher_loss": 0.19419899582862854 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.5771419405937195, "learning_rate": 1.2412895764059564e-05, "loss": 0.2271, "step": 2862, "teacher_loss": 0.188198983669281 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.42289865016937256, "learning_rate": 1.241723290443834e-05, "loss": 0.1963, "step": 2863, "teacher_loss": 0.17114368081092834 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.5555446147918701, "learning_rate": 1.2421570044817117e-05, "loss": 0.2872, "step": 2864, "teacher_loss": 0.2573981285095215 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.411462664604187, "learning_rate": 1.2425907185195895e-05, "loss": 0.2659, "step": 2865, "teacher_loss": 0.24976889789104462 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.21053755283355713, "learning_rate": 1.2430244325574671e-05, "loss": 0.1975, "step": 2866, "teacher_loss": 0.19603992998600006 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.5680697560310364, "learning_rate": 1.2434581465953449e-05, "loss": 0.2807, "step": 2867, "teacher_loss": 0.24880953133106232 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.2706186771392822, "learning_rate": 1.2438918606332225e-05, "loss": 0.2031, "step": 2868, "teacher_loss": 0.19558212161064148 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.4105105996131897, "learning_rate": 1.2443255746711002e-05, "loss": 0.1986, "step": 2869, "teacher_loss": 0.17500299215316772 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.398613840341568, "learning_rate": 1.244759288708978e-05, "loss": 0.2584, "step": 2870, "teacher_loss": 0.24283260107040405 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.5273966193199158, "learning_rate": 1.2451930027468556e-05, "loss": 0.2526, "step": 2871, "teacher_loss": 0.22201509773731232 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.6459747552871704, "learning_rate": 1.2456267167847333e-05, "loss": 0.2373, "step": 2872, "teacher_loss": 0.19186076521873474 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.31774747371673584, "learning_rate": 1.246060430822611e-05, "loss": 0.2708, "step": 2873, "teacher_loss": 0.2656227648258209 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.2946651577949524, "learning_rate": 1.2464941448604887e-05, "loss": 0.2578, "step": 2874, "teacher_loss": 0.253707617521286 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 1.1280405521392822, "learning_rate": 1.2469278588983663e-05, "loss": 0.3383, "step": 2875, "teacher_loss": 0.25050485134124756 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.286027193069458, "learning_rate": 1.247361572936244e-05, "loss": 0.1733, "step": 2876, "teacher_loss": 0.1608046293258667 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.2591002583503723, "learning_rate": 1.2477952869741218e-05, "loss": 0.2106, "step": 2877, "teacher_loss": 0.2051936835050583 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.49066299200057983, "learning_rate": 1.2482290010119996e-05, "loss": 0.2394, "step": 2878, "teacher_loss": 0.21149258315563202 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.8377897143363953, "learning_rate": 1.2486627150498772e-05, "loss": 0.2933, "step": 2879, "teacher_loss": 0.2328384518623352 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.2672974765300751, "learning_rate": 1.2490964290877548e-05, "loss": 0.2285, "step": 2880, "teacher_loss": 0.2241635024547577 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.4621201753616333, "learning_rate": 1.2495301431256325e-05, "loss": 0.41, "step": 2881, "teacher_loss": 0.40423041582107544 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.4419918656349182, "learning_rate": 1.2499638571635103e-05, "loss": 0.2599, "step": 2882, "teacher_loss": 0.23968642950057983 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.36202648282051086, "learning_rate": 1.250397571201388e-05, "loss": 0.3001, "step": 2883, "teacher_loss": 0.2932130694389343 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.33822235465049744, "learning_rate": 1.2508312852392655e-05, "loss": 0.2168, "step": 2884, "teacher_loss": 0.20332825183868408 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.8938894271850586, "learning_rate": 1.2512649992771432e-05, "loss": 0.3272, "step": 2885, "teacher_loss": 0.2641940116882324 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.2768236994743347, "learning_rate": 1.251698713315021e-05, "loss": 0.2129, "step": 2886, "teacher_loss": 0.20578469336032867 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.22182798385620117, "learning_rate": 1.2521324273528987e-05, "loss": 0.1873, "step": 2887, "teacher_loss": 0.18349069356918335 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.34089893102645874, "learning_rate": 1.2525661413907763e-05, "loss": 0.1754, "step": 2888, "teacher_loss": 0.15703445672988892 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.6644532680511475, "learning_rate": 1.2529998554286541e-05, "loss": 0.2993, "step": 2889, "teacher_loss": 0.25872868299484253 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.44714200496673584, "learning_rate": 1.2534335694665317e-05, "loss": 0.2213, "step": 2890, "teacher_loss": 0.1962614804506302 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.2098473459482193, "learning_rate": 1.2538672835044095e-05, "loss": 0.1655, "step": 2891, "teacher_loss": 0.1605478674173355 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.34414029121398926, "learning_rate": 1.2543009975422872e-05, "loss": 0.2457, "step": 2892, "teacher_loss": 0.23479852080345154 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.5456206798553467, "learning_rate": 1.2547347115801648e-05, "loss": 0.2487, "step": 2893, "teacher_loss": 0.21566279232501984 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.15823210775852203, "learning_rate": 1.2551684256180426e-05, "loss": 0.1617, "step": 2894, "teacher_loss": 0.16212186217308044 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.2455466091632843, "learning_rate": 1.2556021396559202e-05, "loss": 0.1566, "step": 2895, "teacher_loss": 0.14672674238681793 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.23622816801071167, "learning_rate": 1.256035853693798e-05, "loss": 0.1727, "step": 2896, "teacher_loss": 0.1656712293624878 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.24362681806087494, "learning_rate": 1.2564695677316755e-05, "loss": 0.2157, "step": 2897, "teacher_loss": 0.2125878930091858 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.502642810344696, "learning_rate": 1.2569032817695533e-05, "loss": 0.2353, "step": 2898, "teacher_loss": 0.20561864972114563 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.28127628564834595, "learning_rate": 1.257336995807431e-05, "loss": 0.2254, "step": 2899, "teacher_loss": 0.21923725306987762 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.43687888979911804, "learning_rate": 1.2577707098453088e-05, "loss": 0.2391, "step": 2900, "teacher_loss": 0.21712376177310944 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.2428971230983734, "learning_rate": 1.2582044238831864e-05, "loss": 0.1937, "step": 2901, "teacher_loss": 0.1882447749376297 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.5596681833267212, "learning_rate": 1.258638137921064e-05, "loss": 0.2584, "step": 2902, "teacher_loss": 0.22494874894618988 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.31732532382011414, "learning_rate": 1.2590718519589418e-05, "loss": 0.2766, "step": 2903, "teacher_loss": 0.2720375657081604 }, { "compression_loss": 0.0, "epoch": 0.52, "label_loss": 0.9225265979766846, "learning_rate": 1.2595055659968195e-05, "loss": 0.2666, "step": 2904, "teacher_loss": 0.19372084736824036 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.22705870866775513, "learning_rate": 1.2599392800346973e-05, "loss": 0.1652, "step": 2905, "teacher_loss": 0.15828801691532135 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.5391129851341248, "learning_rate": 1.2603729940725747e-05, "loss": 0.2154, "step": 2906, "teacher_loss": 0.1794031858444214 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.9196866750717163, "learning_rate": 1.2608067081104525e-05, "loss": 1.0142, "step": 2907, "teacher_loss": 1.0247061252593994 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.3779296875, "learning_rate": 1.2612404221483302e-05, "loss": 0.2771, "step": 2908, "teacher_loss": 0.2658865451812744 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.8352428078651428, "learning_rate": 1.261674136186208e-05, "loss": 0.3728, "step": 2909, "teacher_loss": 0.3214607834815979 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.4498641788959503, "learning_rate": 1.2621078502240856e-05, "loss": 0.1908, "step": 2910, "teacher_loss": 0.1619841307401657 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.13946379721164703, "learning_rate": 1.2625415642619633e-05, "loss": 0.2458, "step": 2911, "teacher_loss": 0.25761860609054565 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.2651360332965851, "learning_rate": 1.262975278299841e-05, "loss": 0.1792, "step": 2912, "teacher_loss": 0.16965562105178833 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.3686763048171997, "learning_rate": 1.2634089923377187e-05, "loss": 0.2712, "step": 2913, "teacher_loss": 0.2603206932544708 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.3693809509277344, "learning_rate": 1.2638427063755965e-05, "loss": 0.254, "step": 2914, "teacher_loss": 0.24115371704101562 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.487461119890213, "learning_rate": 1.264276420413474e-05, "loss": 0.2517, "step": 2915, "teacher_loss": 0.22555246949195862 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.16619402170181274, "learning_rate": 1.2647101344513518e-05, "loss": 0.2858, "step": 2916, "teacher_loss": 0.299077570438385 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.26363813877105713, "learning_rate": 1.2651438484892294e-05, "loss": 0.2137, "step": 2917, "teacher_loss": 0.20814919471740723 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.7735315561294556, "learning_rate": 1.2655775625271072e-05, "loss": 0.4215, "step": 2918, "teacher_loss": 0.38242459297180176 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.68896484375, "learning_rate": 1.2660112765649848e-05, "loss": 0.2859, "step": 2919, "teacher_loss": 0.24111396074295044 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.31177210807800293, "learning_rate": 1.2664449906028625e-05, "loss": 0.3807, "step": 2920, "teacher_loss": 0.38830476999282837 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.957051157951355, "learning_rate": 1.2668787046407403e-05, "loss": 0.3113, "step": 2921, "teacher_loss": 0.23959200084209442 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.7861664891242981, "learning_rate": 1.267312418678618e-05, "loss": 0.3176, "step": 2922, "teacher_loss": 0.26552748680114746 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.37029603123664856, "learning_rate": 1.2677461327164956e-05, "loss": 0.2775, "step": 2923, "teacher_loss": 0.26715022325515747 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.7816681265830994, "learning_rate": 1.2681798467543732e-05, "loss": 0.2699, "step": 2924, "teacher_loss": 0.21298792958259583 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.4529823660850525, "learning_rate": 1.268613560792251e-05, "loss": 0.2383, "step": 2925, "teacher_loss": 0.2144116908311844 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.3037337064743042, "learning_rate": 1.2690472748301288e-05, "loss": 0.1906, "step": 2926, "teacher_loss": 0.17799633741378784 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.5503349304199219, "learning_rate": 1.2694809888680065e-05, "loss": 0.3369, "step": 2927, "teacher_loss": 0.3131994903087616 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.45318156480789185, "learning_rate": 1.269914702905884e-05, "loss": 0.2991, "step": 2928, "teacher_loss": 0.2819896340370178 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.6324386596679688, "learning_rate": 1.2703484169437617e-05, "loss": 0.2648, "step": 2929, "teacher_loss": 0.22397229075431824 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.4378213882446289, "learning_rate": 1.2707821309816395e-05, "loss": 0.265, "step": 2930, "teacher_loss": 0.24574480950832367 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.421183317899704, "learning_rate": 1.2712158450195172e-05, "loss": 0.2235, "step": 2931, "teacher_loss": 0.20153206586837769 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.3889850974082947, "learning_rate": 1.271649559057395e-05, "loss": 0.2386, "step": 2932, "teacher_loss": 0.22189223766326904 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.19437116384506226, "learning_rate": 1.2720832730952724e-05, "loss": 0.2291, "step": 2933, "teacher_loss": 0.23290346562862396 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.24955680966377258, "learning_rate": 1.2725169871331502e-05, "loss": 0.1752, "step": 2934, "teacher_loss": 0.16697286069393158 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.677643895149231, "learning_rate": 1.272950701171028e-05, "loss": 0.252, "step": 2935, "teacher_loss": 0.2047576904296875 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.4003026485443115, "learning_rate": 1.2733844152089057e-05, "loss": 0.3736, "step": 2936, "teacher_loss": 0.3706578016281128 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.3286217153072357, "learning_rate": 1.2738181292467833e-05, "loss": 0.2492, "step": 2937, "teacher_loss": 0.24041131138801575 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.3038771450519562, "learning_rate": 1.274251843284661e-05, "loss": 0.2376, "step": 2938, "teacher_loss": 0.2302335798740387 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.20332199335098267, "learning_rate": 1.2746855573225387e-05, "loss": 0.2685, "step": 2939, "teacher_loss": 0.27578121423721313 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.8955676555633545, "learning_rate": 1.2751192713604164e-05, "loss": 0.2943, "step": 2940, "teacher_loss": 0.22750616073608398 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.4472431540489197, "learning_rate": 1.275552985398294e-05, "loss": 0.2416, "step": 2941, "teacher_loss": 0.21879026293754578 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.23451286554336548, "learning_rate": 1.2759866994361718e-05, "loss": 0.2458, "step": 2942, "teacher_loss": 0.24701380729675293 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.17961151897907257, "learning_rate": 1.2764204134740495e-05, "loss": 0.1674, "step": 2943, "teacher_loss": 0.16604915261268616 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.2604311406612396, "learning_rate": 1.2768541275119271e-05, "loss": 0.1591, "step": 2944, "teacher_loss": 0.14781039953231812 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.5312840342521667, "learning_rate": 1.2772878415498049e-05, "loss": 0.2639, "step": 2945, "teacher_loss": 0.23418551683425903 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.3973923921585083, "learning_rate": 1.2777215555876825e-05, "loss": 0.1599, "step": 2946, "teacher_loss": 0.13354821503162384 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.19833387434482574, "learning_rate": 1.2781552696255602e-05, "loss": 0.1875, "step": 2947, "teacher_loss": 0.18631184101104736 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.5400410294532776, "learning_rate": 1.278588983663438e-05, "loss": 0.2465, "step": 2948, "teacher_loss": 0.21387764811515808 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.7205885648727417, "learning_rate": 1.2790226977013158e-05, "loss": 0.2289, "step": 2949, "teacher_loss": 0.17426863312721252 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.42183375358581543, "learning_rate": 1.2794564117391932e-05, "loss": 0.2627, "step": 2950, "teacher_loss": 0.24499329924583435 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.2620997130870819, "learning_rate": 1.279890125777071e-05, "loss": 0.2044, "step": 2951, "teacher_loss": 0.1979558765888214 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.36855006217956543, "learning_rate": 1.2803238398149487e-05, "loss": 0.2096, "step": 2952, "teacher_loss": 0.19195443391799927 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.21986077725887299, "learning_rate": 1.2807575538528265e-05, "loss": 0.2023, "step": 2953, "teacher_loss": 0.20036692917346954 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.45694780349731445, "learning_rate": 1.2811912678907042e-05, "loss": 0.254, "step": 2954, "teacher_loss": 0.23141643404960632 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.49373775720596313, "learning_rate": 1.2816249819285817e-05, "loss": 0.2591, "step": 2955, "teacher_loss": 0.23300310969352722 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.47111624479293823, "learning_rate": 1.2820586959664594e-05, "loss": 0.2297, "step": 2956, "teacher_loss": 0.20282186567783356 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.8163066506385803, "learning_rate": 1.2824924100043372e-05, "loss": 0.373, "step": 2957, "teacher_loss": 0.3237210512161255 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.7496196031570435, "learning_rate": 1.282926124042215e-05, "loss": 0.2933, "step": 2958, "teacher_loss": 0.2425549030303955 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.45043647289276123, "learning_rate": 1.2833598380800925e-05, "loss": 0.282, "step": 2959, "teacher_loss": 0.26325374841690063 }, { "compression_loss": 0.0, "epoch": 0.53, "label_loss": 0.695798397064209, "learning_rate": 1.2837935521179703e-05, "loss": 0.2972, "step": 2960, "teacher_loss": 0.25294405221939087 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.6354823708534241, "learning_rate": 1.2842272661558479e-05, "loss": 0.2207, "step": 2961, "teacher_loss": 0.17458616197109222 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.26955729722976685, "learning_rate": 1.2846609801937257e-05, "loss": 0.2109, "step": 2962, "teacher_loss": 0.20439890027046204 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.39085984230041504, "learning_rate": 1.2850946942316033e-05, "loss": 0.2908, "step": 2963, "teacher_loss": 0.27965545654296875 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.3037847876548767, "learning_rate": 1.285528408269481e-05, "loss": 0.1976, "step": 2964, "teacher_loss": 0.18583178520202637 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.5035691261291504, "learning_rate": 1.2859621223073588e-05, "loss": 0.2172, "step": 2965, "teacher_loss": 0.18540005385875702 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.1723177433013916, "learning_rate": 1.2863958363452364e-05, "loss": 0.1522, "step": 2966, "teacher_loss": 0.149988055229187 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.3799302875995636, "learning_rate": 1.2868295503831141e-05, "loss": 0.2547, "step": 2967, "teacher_loss": 0.24080045521259308 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.1720074862241745, "learning_rate": 1.2872632644209917e-05, "loss": 0.1863, "step": 2968, "teacher_loss": 0.18787409365177155 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.6693521738052368, "learning_rate": 1.2876969784588695e-05, "loss": 0.2875, "step": 2969, "teacher_loss": 0.24511204659938812 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.08292323350906372, "learning_rate": 1.2881306924967473e-05, "loss": 0.1411, "step": 2970, "teacher_loss": 0.14757077395915985 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.7695428133010864, "learning_rate": 1.288564406534625e-05, "loss": 0.5112, "step": 2971, "teacher_loss": 0.48253825306892395 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.419270783662796, "learning_rate": 1.2889981205725024e-05, "loss": 0.2384, "step": 2972, "teacher_loss": 0.21833764016628265 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.5678366422653198, "learning_rate": 1.2894318346103802e-05, "loss": 0.3143, "step": 2973, "teacher_loss": 0.2861189842224121 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.6132630705833435, "learning_rate": 1.289865548648258e-05, "loss": 0.5098, "step": 2974, "teacher_loss": 0.4983125329017639 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.4667607247829437, "learning_rate": 1.2902992626861357e-05, "loss": 0.2189, "step": 2975, "teacher_loss": 0.19137075543403625 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.29011255502700806, "learning_rate": 1.2907329767240135e-05, "loss": 0.2022, "step": 2976, "teacher_loss": 0.19248229265213013 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.5009244680404663, "learning_rate": 1.2911666907618909e-05, "loss": 0.2319, "step": 2977, "teacher_loss": 0.202016219496727 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.8742498159408569, "learning_rate": 1.2916004047997687e-05, "loss": 0.3398, "step": 2978, "teacher_loss": 0.28036707639694214 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.429987370967865, "learning_rate": 1.2920341188376464e-05, "loss": 0.2193, "step": 2979, "teacher_loss": 0.19584423303604126 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.4443659782409668, "learning_rate": 1.2924678328755242e-05, "loss": 0.2459, "step": 2980, "teacher_loss": 0.22390232980251312 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.5653756856918335, "learning_rate": 1.2929015469134018e-05, "loss": 0.255, "step": 2981, "teacher_loss": 0.22055259346961975 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.9079474210739136, "learning_rate": 1.2933352609512794e-05, "loss": 0.2699, "step": 2982, "teacher_loss": 0.1990591436624527 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.9636232852935791, "learning_rate": 1.2937689749891571e-05, "loss": 0.3, "step": 2983, "teacher_loss": 0.2263181209564209 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.9298241138458252, "learning_rate": 1.2942026890270349e-05, "loss": 0.4061, "step": 2984, "teacher_loss": 0.3479520082473755 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.795227587223053, "learning_rate": 1.2946364030649125e-05, "loss": 0.2828, "step": 2985, "teacher_loss": 0.22584865987300873 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.7138127088546753, "learning_rate": 1.2950701171027903e-05, "loss": 0.3002, "step": 2986, "teacher_loss": 0.2542181611061096 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.32672247290611267, "learning_rate": 1.295503831140668e-05, "loss": 0.182, "step": 2987, "teacher_loss": 0.16587606072425842 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.32905805110931396, "learning_rate": 1.2959375451785456e-05, "loss": 0.2375, "step": 2988, "teacher_loss": 0.22734344005584717 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.20300397276878357, "learning_rate": 1.2963712592164234e-05, "loss": 0.1999, "step": 2989, "teacher_loss": 0.19954471290111542 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.31182849407196045, "learning_rate": 1.296804973254301e-05, "loss": 0.276, "step": 2990, "teacher_loss": 0.27205315232276917 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.3289722800254822, "learning_rate": 1.2972386872921787e-05, "loss": 0.2052, "step": 2991, "teacher_loss": 0.19146430492401123 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.13437870144844055, "learning_rate": 1.2976724013300565e-05, "loss": 0.1561, "step": 2992, "teacher_loss": 0.15848934650421143 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.3751770555973053, "learning_rate": 1.2981061153679341e-05, "loss": 0.291, "step": 2993, "teacher_loss": 0.2816421389579773 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.43278443813323975, "learning_rate": 1.2985398294058117e-05, "loss": 0.316, "step": 2994, "teacher_loss": 0.3030541241168976 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.5744333267211914, "learning_rate": 1.2989735434436894e-05, "loss": 0.2895, "step": 2995, "teacher_loss": 0.2577863335609436 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.43087315559387207, "learning_rate": 1.2994072574815672e-05, "loss": 0.2658, "step": 2996, "teacher_loss": 0.24747687578201294 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.26595252752304077, "learning_rate": 1.299840971519445e-05, "loss": 0.1704, "step": 2997, "teacher_loss": 0.1597684770822525 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.7176780700683594, "learning_rate": 1.3002746855573227e-05, "loss": 0.3263, "step": 2998, "teacher_loss": 0.2828543782234192 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.4191933274269104, "learning_rate": 1.3007083995952002e-05, "loss": 0.2579, "step": 2999, "teacher_loss": 0.24002447724342346 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.6982427835464478, "learning_rate": 1.3011421136330779e-05, "loss": 0.2968, "step": 3000, "teacher_loss": 0.25222811102867126 }, { "epoch": 0.54, "eval_exact_match": 79.94323557237465, "eval_f1": 87.31618470192804, "step": 3000 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.2842622399330139, "learning_rate": 1.3015758276709557e-05, "loss": 0.2971, "step": 3001, "teacher_loss": 0.2985485792160034 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.30096569657325745, "learning_rate": 1.3020095417088334e-05, "loss": 0.1854, "step": 3002, "teacher_loss": 0.17260706424713135 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.2157330960035324, "learning_rate": 1.302443255746711e-05, "loss": 0.1485, "step": 3003, "teacher_loss": 0.1410660594701767 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.7136178612709045, "learning_rate": 1.3028769697845886e-05, "loss": 0.2387, "step": 3004, "teacher_loss": 0.18590174615383148 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.6043393611907959, "learning_rate": 1.3033106838224664e-05, "loss": 0.3809, "step": 3005, "teacher_loss": 0.35605260729789734 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.3761269152164459, "learning_rate": 1.3037443978603441e-05, "loss": 0.205, "step": 3006, "teacher_loss": 0.1859891414642334 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.6790250539779663, "learning_rate": 1.3041781118982217e-05, "loss": 0.3205, "step": 3007, "teacher_loss": 0.28062334656715393 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.25974640250205994, "learning_rate": 1.3046118259360995e-05, "loss": 0.2005, "step": 3008, "teacher_loss": 0.19389094412326813 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.4829614460468292, "learning_rate": 1.3050455399739773e-05, "loss": 0.2746, "step": 3009, "teacher_loss": 0.25148075819015503 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.5688278675079346, "learning_rate": 1.3054792540118549e-05, "loss": 0.2741, "step": 3010, "teacher_loss": 0.2413042187690735 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.4807244539260864, "learning_rate": 1.3059129680497326e-05, "loss": 0.3191, "step": 3011, "teacher_loss": 0.3011924624443054 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.5908994078636169, "learning_rate": 1.3063466820876102e-05, "loss": 0.2956, "step": 3012, "teacher_loss": 0.2627811133861542 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.3035929799079895, "learning_rate": 1.306780396125488e-05, "loss": 0.1878, "step": 3013, "teacher_loss": 0.17493954300880432 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.21108703315258026, "learning_rate": 1.3072141101633657e-05, "loss": 0.2615, "step": 3014, "teacher_loss": 0.2671399414539337 }, { "compression_loss": 0.0, "epoch": 0.54, "label_loss": 0.10633372515439987, "learning_rate": 1.3076478242012433e-05, "loss": 0.169, "step": 3015, "teacher_loss": 0.17599095404148102 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.4860968589782715, "learning_rate": 1.308081538239121e-05, "loss": 0.2296, "step": 3016, "teacher_loss": 0.20113936066627502 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.3239452838897705, "learning_rate": 1.3085152522769987e-05, "loss": 0.2393, "step": 3017, "teacher_loss": 0.22985979914665222 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.7383592128753662, "learning_rate": 1.3089489663148764e-05, "loss": 0.258, "step": 3018, "teacher_loss": 0.20466801524162292 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.3263548016548157, "learning_rate": 1.3093826803527542e-05, "loss": 0.2129, "step": 3019, "teacher_loss": 0.20030061900615692 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.3775064945220947, "learning_rate": 1.309816394390632e-05, "loss": 0.1967, "step": 3020, "teacher_loss": 0.17665760219097137 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.5024935007095337, "learning_rate": 1.3102501084285094e-05, "loss": 0.2949, "step": 3021, "teacher_loss": 0.27180016040802 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.3055901825428009, "learning_rate": 1.3106838224663872e-05, "loss": 0.1727, "step": 3022, "teacher_loss": 0.15798087418079376 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.3656060993671417, "learning_rate": 1.311117536504265e-05, "loss": 0.2292, "step": 3023, "teacher_loss": 0.2140159159898758 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.3799038529396057, "learning_rate": 1.3115512505421427e-05, "loss": 0.2787, "step": 3024, "teacher_loss": 0.2675032615661621 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.21871453523635864, "learning_rate": 1.3119849645800203e-05, "loss": 0.2737, "step": 3025, "teacher_loss": 0.2798258066177368 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.30443406105041504, "learning_rate": 1.3124186786178979e-05, "loss": 0.1568, "step": 3026, "teacher_loss": 0.14043620228767395 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.2812073826789856, "learning_rate": 1.3128523926557756e-05, "loss": 0.1861, "step": 3027, "teacher_loss": 0.17554882168769836 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.23737600445747375, "learning_rate": 1.3132861066936534e-05, "loss": 0.1734, "step": 3028, "teacher_loss": 0.16628527641296387 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.37612712383270264, "learning_rate": 1.313719820731531e-05, "loss": 0.1981, "step": 3029, "teacher_loss": 0.1783452332019806 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.5463454723358154, "learning_rate": 1.3141535347694087e-05, "loss": 0.317, "step": 3030, "teacher_loss": 0.29153114557266235 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.2918059229850769, "learning_rate": 1.3145872488072863e-05, "loss": 0.1992, "step": 3031, "teacher_loss": 0.18888552486896515 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.21595440804958344, "learning_rate": 1.3150209628451641e-05, "loss": 0.1933, "step": 3032, "teacher_loss": 0.19076672196388245 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.6261675357818604, "learning_rate": 1.3154546768830419e-05, "loss": 0.2623, "step": 3033, "teacher_loss": 0.22182686626911163 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.5985780954360962, "learning_rate": 1.3158883909209195e-05, "loss": 0.2902, "step": 3034, "teacher_loss": 0.2559766173362732 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.3944920003414154, "learning_rate": 1.3163221049587972e-05, "loss": 0.2814, "step": 3035, "teacher_loss": 0.2687896490097046 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.6342904567718506, "learning_rate": 1.316755818996675e-05, "loss": 0.3017, "step": 3036, "teacher_loss": 0.2647266089916229 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.341489315032959, "learning_rate": 1.3171895330345526e-05, "loss": 0.2288, "step": 3037, "teacher_loss": 0.21629779040813446 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.37064623832702637, "learning_rate": 1.3176232470724302e-05, "loss": 0.3243, "step": 3038, "teacher_loss": 0.31920325756073 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.24229909479618073, "learning_rate": 1.318056961110308e-05, "loss": 0.1651, "step": 3039, "teacher_loss": 0.15657153725624084 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.20493543148040771, "learning_rate": 1.3184906751481857e-05, "loss": 0.2168, "step": 3040, "teacher_loss": 0.2180669754743576 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.4140383005142212, "learning_rate": 1.3189243891860635e-05, "loss": 0.2481, "step": 3041, "teacher_loss": 0.22965312004089355 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.6068315505981445, "learning_rate": 1.319358103223941e-05, "loss": 0.2632, "step": 3042, "teacher_loss": 0.22502192854881287 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.4939553141593933, "learning_rate": 1.3197918172618186e-05, "loss": 0.2376, "step": 3043, "teacher_loss": 0.20913271605968475 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.4232443571090698, "learning_rate": 1.3202255312996964e-05, "loss": 0.2619, "step": 3044, "teacher_loss": 0.24401313066482544 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.4562731981277466, "learning_rate": 1.3206592453375742e-05, "loss": 0.2871, "step": 3045, "teacher_loss": 0.2682906985282898 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.2555849552154541, "learning_rate": 1.321092959375452e-05, "loss": 0.2005, "step": 3046, "teacher_loss": 0.1944049447774887 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.26248323917388916, "learning_rate": 1.3215266734133295e-05, "loss": 0.2798, "step": 3047, "teacher_loss": 0.28168004751205444 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.4334351420402527, "learning_rate": 1.3219603874512071e-05, "loss": 0.2543, "step": 3048, "teacher_loss": 0.23443761467933655 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.3107101023197174, "learning_rate": 1.3223941014890849e-05, "loss": 0.1804, "step": 3049, "teacher_loss": 0.16587281227111816 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.27195990085601807, "learning_rate": 1.3228278155269626e-05, "loss": 0.2028, "step": 3050, "teacher_loss": 0.1951400339603424 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.14790582656860352, "learning_rate": 1.3232615295648402e-05, "loss": 0.1787, "step": 3051, "teacher_loss": 0.1821078360080719 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.6512551307678223, "learning_rate": 1.323695243602718e-05, "loss": 0.418, "step": 3052, "teacher_loss": 0.39210885763168335 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.26082876324653625, "learning_rate": 1.3241289576405956e-05, "loss": 0.2952, "step": 3053, "teacher_loss": 0.29897040128707886 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.6199743747711182, "learning_rate": 1.3245626716784733e-05, "loss": 0.2705, "step": 3054, "teacher_loss": 0.2316872775554657 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.3557511568069458, "learning_rate": 1.3249963857163511e-05, "loss": 0.2101, "step": 3055, "teacher_loss": 0.19395655393600464 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.20252348482608795, "learning_rate": 1.3254300997542287e-05, "loss": 0.1899, "step": 3056, "teacher_loss": 0.18853795528411865 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.3717423975467682, "learning_rate": 1.3258638137921065e-05, "loss": 0.2233, "step": 3057, "teacher_loss": 0.2068050503730774 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.30125415325164795, "learning_rate": 1.3262975278299842e-05, "loss": 0.252, "step": 3058, "teacher_loss": 0.24657300114631653 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.2846919894218445, "learning_rate": 1.3267312418678618e-05, "loss": 0.236, "step": 3059, "teacher_loss": 0.23061969876289368 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.27283358573913574, "learning_rate": 1.3271649559057394e-05, "loss": 0.2082, "step": 3060, "teacher_loss": 0.20102459192276 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.27168700098991394, "learning_rate": 1.3275986699436172e-05, "loss": 0.2267, "step": 3061, "teacher_loss": 0.22175222635269165 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.13136842846870422, "learning_rate": 1.328032383981495e-05, "loss": 0.1728, "step": 3062, "teacher_loss": 0.17739540338516235 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.4940868616104126, "learning_rate": 1.3284660980193727e-05, "loss": 0.2778, "step": 3063, "teacher_loss": 0.25375664234161377 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.1723693311214447, "learning_rate": 1.3288998120572503e-05, "loss": 0.1601, "step": 3064, "teacher_loss": 0.15872237086296082 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.2709307074546814, "learning_rate": 1.3293335260951279e-05, "loss": 0.1732, "step": 3065, "teacher_loss": 0.16239234805107117 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.5543541312217712, "learning_rate": 1.3297672401330056e-05, "loss": 0.2946, "step": 3066, "teacher_loss": 0.26571282744407654 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.5398831367492676, "learning_rate": 1.3302009541708834e-05, "loss": 0.2352, "step": 3067, "teacher_loss": 0.20139722526073456 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.5789247751235962, "learning_rate": 1.3306346682087612e-05, "loss": 0.2728, "step": 3068, "teacher_loss": 0.23879292607307434 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.5951073169708252, "learning_rate": 1.3310683822466388e-05, "loss": 0.2471, "step": 3069, "teacher_loss": 0.20844855904579163 }, { "compression_loss": 0.0, "epoch": 0.55, "label_loss": 0.487831711769104, "learning_rate": 1.3315020962845164e-05, "loss": 0.2619, "step": 3070, "teacher_loss": 0.23681041598320007 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.7076802849769592, "learning_rate": 1.3319358103223941e-05, "loss": 0.2653, "step": 3071, "teacher_loss": 0.2161283940076828 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.7985213994979858, "learning_rate": 1.3323695243602719e-05, "loss": 0.3906, "step": 3072, "teacher_loss": 0.3452828824520111 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.4687846302986145, "learning_rate": 1.3328032383981495e-05, "loss": 0.2227, "step": 3073, "teacher_loss": 0.19536671042442322 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.35332876443862915, "learning_rate": 1.3332369524360272e-05, "loss": 0.1849, "step": 3074, "teacher_loss": 0.1662396490573883 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.4023820161819458, "learning_rate": 1.3336706664739048e-05, "loss": 0.2046, "step": 3075, "teacher_loss": 0.18261641263961792 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.27179956436157227, "learning_rate": 1.3341043805117826e-05, "loss": 0.2106, "step": 3076, "teacher_loss": 0.2037510722875595 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.48693007230758667, "learning_rate": 1.3345380945496604e-05, "loss": 0.2661, "step": 3077, "teacher_loss": 0.2415974885225296 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.6925801038742065, "learning_rate": 1.334971808587538e-05, "loss": 0.2513, "step": 3078, "teacher_loss": 0.2022903561592102 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.14765048027038574, "learning_rate": 1.3354055226254157e-05, "loss": 0.1714, "step": 3079, "teacher_loss": 0.17400771379470825 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.681926965713501, "learning_rate": 1.3358392366632933e-05, "loss": 0.2991, "step": 3080, "teacher_loss": 0.2566143274307251 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.2558896541595459, "learning_rate": 1.336272950701171e-05, "loss": 0.2099, "step": 3081, "teacher_loss": 0.20477989315986633 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.2703326940536499, "learning_rate": 1.3367066647390487e-05, "loss": 0.1562, "step": 3082, "teacher_loss": 0.14357003569602966 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.36100858449935913, "learning_rate": 1.3371403787769264e-05, "loss": 0.2037, "step": 3083, "teacher_loss": 0.1861998438835144 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.4562470614910126, "learning_rate": 1.3375740928148042e-05, "loss": 0.2177, "step": 3084, "teacher_loss": 0.19119438529014587 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.34820556640625, "learning_rate": 1.338007806852682e-05, "loss": 0.1766, "step": 3085, "teacher_loss": 0.1574985235929489 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.2374558448791504, "learning_rate": 1.3384415208905595e-05, "loss": 0.163, "step": 3086, "teacher_loss": 0.15469534695148468 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.2202252447605133, "learning_rate": 1.3388752349284371e-05, "loss": 0.2416, "step": 3087, "teacher_loss": 0.2439364194869995 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.49908357858657837, "learning_rate": 1.3393089489663149e-05, "loss": 0.2725, "step": 3088, "teacher_loss": 0.24732965230941772 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.8662938475608826, "learning_rate": 1.3397426630041927e-05, "loss": 0.38, "step": 3089, "teacher_loss": 0.3260199725627899 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.14701521396636963, "learning_rate": 1.3401763770420704e-05, "loss": 0.1682, "step": 3090, "teacher_loss": 0.170506089925766 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.2729068100452423, "learning_rate": 1.3406100910799478e-05, "loss": 0.1817, "step": 3091, "teacher_loss": 0.171578049659729 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.7264319658279419, "learning_rate": 1.3410438051178256e-05, "loss": 0.3385, "step": 3092, "teacher_loss": 0.2953737676143646 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.44986459612846375, "learning_rate": 1.3414775191557034e-05, "loss": 0.228, "step": 3093, "teacher_loss": 0.2033209204673767 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.8176664113998413, "learning_rate": 1.3419112331935811e-05, "loss": 0.2287, "step": 3094, "teacher_loss": 0.16329774260520935 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.36958047747612, "learning_rate": 1.3423449472314589e-05, "loss": 0.1774, "step": 3095, "teacher_loss": 0.15599533915519714 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.8095451593399048, "learning_rate": 1.3427786612693365e-05, "loss": 0.3054, "step": 3096, "teacher_loss": 0.24937686324119568 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.31955987215042114, "learning_rate": 1.343212375307214e-05, "loss": 0.1962, "step": 3097, "teacher_loss": 0.18253986537456512 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.3793798089027405, "learning_rate": 1.3436460893450918e-05, "loss": 0.2852, "step": 3098, "teacher_loss": 0.2747170031070709 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.2132205069065094, "learning_rate": 1.3440798033829696e-05, "loss": 0.1987, "step": 3099, "teacher_loss": 0.19707725942134857 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.7296171188354492, "learning_rate": 1.3445135174208472e-05, "loss": 0.2495, "step": 3100, "teacher_loss": 0.196100652217865 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.633063793182373, "learning_rate": 1.344947231458725e-05, "loss": 0.2454, "step": 3101, "teacher_loss": 0.20230242609977722 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.25027191638946533, "learning_rate": 1.3453809454966025e-05, "loss": 0.3152, "step": 3102, "teacher_loss": 0.32245850563049316 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.29171448945999146, "learning_rate": 1.3458146595344803e-05, "loss": 0.2663, "step": 3103, "teacher_loss": 0.26343053579330444 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.2954946756362915, "learning_rate": 1.3462483735723579e-05, "loss": 0.2435, "step": 3104, "teacher_loss": 0.2377127707004547 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.5202397108078003, "learning_rate": 1.3466820876102357e-05, "loss": 0.2237, "step": 3105, "teacher_loss": 0.19072537124156952 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.7288155555725098, "learning_rate": 1.3471158016481134e-05, "loss": 0.2913, "step": 3106, "teacher_loss": 0.24267855286598206 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.3162734806537628, "learning_rate": 1.3475495156859912e-05, "loss": 0.2385, "step": 3107, "teacher_loss": 0.22986571490764618 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.8069810271263123, "learning_rate": 1.3479832297238688e-05, "loss": 0.2565, "step": 3108, "teacher_loss": 0.19535022974014282 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.41321685910224915, "learning_rate": 1.3484169437617464e-05, "loss": 0.1877, "step": 3109, "teacher_loss": 0.16265472769737244 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.26473525166511536, "learning_rate": 1.3488506577996241e-05, "loss": 0.1824, "step": 3110, "teacher_loss": 0.1732155680656433 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.3609674572944641, "learning_rate": 1.3492843718375019e-05, "loss": 0.2285, "step": 3111, "teacher_loss": 0.21373626589775085 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.26164525747299194, "learning_rate": 1.3497180858753797e-05, "loss": 0.1918, "step": 3112, "teacher_loss": 0.184035524725914 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.3896588087081909, "learning_rate": 1.350151799913257e-05, "loss": 0.2141, "step": 3113, "teacher_loss": 0.19460904598236084 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.5366600751876831, "learning_rate": 1.3505855139511348e-05, "loss": 0.2662, "step": 3114, "teacher_loss": 0.2361549735069275 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.26788294315338135, "learning_rate": 1.3510192279890126e-05, "loss": 0.19, "step": 3115, "teacher_loss": 0.18137764930725098 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.07358293980360031, "learning_rate": 1.3514529420268904e-05, "loss": 0.1957, "step": 3116, "teacher_loss": 0.2093045711517334 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.5157208442687988, "learning_rate": 1.3518866560647681e-05, "loss": 0.2848, "step": 3117, "teacher_loss": 0.25917455554008484 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.21517476439476013, "learning_rate": 1.3523203701026457e-05, "loss": 0.3121, "step": 3118, "teacher_loss": 0.3228638470172882 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.16703195869922638, "learning_rate": 1.3527540841405233e-05, "loss": 0.2099, "step": 3119, "teacher_loss": 0.21471600234508514 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.2685527801513672, "learning_rate": 1.353187798178401e-05, "loss": 0.2036, "step": 3120, "teacher_loss": 0.1963837742805481 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.2674502432346344, "learning_rate": 1.3536215122162788e-05, "loss": 0.2638, "step": 3121, "teacher_loss": 0.26343005895614624 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.45980197191238403, "learning_rate": 1.3540552262541564e-05, "loss": 0.3683, "step": 3122, "teacher_loss": 0.35817086696624756 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 1.1843032836914062, "learning_rate": 1.3544889402920342e-05, "loss": 0.3411, "step": 3123, "teacher_loss": 0.24737712740898132 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.3766411542892456, "learning_rate": 1.3549226543299118e-05, "loss": 0.2733, "step": 3124, "teacher_loss": 0.2618699073791504 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.874194860458374, "learning_rate": 1.3553563683677896e-05, "loss": 0.2646, "step": 3125, "teacher_loss": 0.1969054490327835 }, { "compression_loss": 0.0, "epoch": 0.56, "label_loss": 0.36310744285583496, "learning_rate": 1.3557900824056671e-05, "loss": 0.2188, "step": 3126, "teacher_loss": 0.20271845161914825 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.4812440872192383, "learning_rate": 1.3562237964435449e-05, "loss": 0.1968, "step": 3127, "teacher_loss": 0.1652371883392334 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.20630908012390137, "learning_rate": 1.3566575104814227e-05, "loss": 0.2092, "step": 3128, "teacher_loss": 0.20946836471557617 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.38478463888168335, "learning_rate": 1.3570912245193003e-05, "loss": 0.2139, "step": 3129, "teacher_loss": 0.19489642977714539 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.3425915539264679, "learning_rate": 1.357524938557178e-05, "loss": 0.2832, "step": 3130, "teacher_loss": 0.27660250663757324 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.9745980501174927, "learning_rate": 1.3579586525950556e-05, "loss": 0.2874, "step": 3131, "teacher_loss": 0.21106813848018646 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.5787369608879089, "learning_rate": 1.3583923666329334e-05, "loss": 0.2073, "step": 3132, "teacher_loss": 0.16603747010231018 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.6792896389961243, "learning_rate": 1.3588260806708111e-05, "loss": 0.2626, "step": 3133, "teacher_loss": 0.21633067727088928 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.3057715892791748, "learning_rate": 1.3592597947086889e-05, "loss": 0.1739, "step": 3134, "teacher_loss": 0.15922175347805023 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.49490582942962646, "learning_rate": 1.3596935087465663e-05, "loss": 0.3221, "step": 3135, "teacher_loss": 0.3028768301010132 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.30658990144729614, "learning_rate": 1.3601272227844441e-05, "loss": 0.2477, "step": 3136, "teacher_loss": 0.24117395281791687 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.24417918920516968, "learning_rate": 1.3605609368223218e-05, "loss": 0.2684, "step": 3137, "teacher_loss": 0.27106496691703796 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.6911215782165527, "learning_rate": 1.3609946508601996e-05, "loss": 0.2589, "step": 3138, "teacher_loss": 0.2108924835920334 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.2780971825122833, "learning_rate": 1.3614283648980774e-05, "loss": 0.2289, "step": 3139, "teacher_loss": 0.22347579896450043 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.35225605964660645, "learning_rate": 1.3618620789359548e-05, "loss": 0.3281, "step": 3140, "teacher_loss": 0.3254607617855072 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.529262900352478, "learning_rate": 1.3622957929738326e-05, "loss": 0.2421, "step": 3141, "teacher_loss": 0.21015891432762146 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.303768128156662, "learning_rate": 1.3627295070117103e-05, "loss": 0.173, "step": 3142, "teacher_loss": 0.15846604108810425 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.21878615021705627, "learning_rate": 1.363163221049588e-05, "loss": 0.1914, "step": 3143, "teacher_loss": 0.18833668529987335 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.2296508252620697, "learning_rate": 1.3635969350874657e-05, "loss": 0.1651, "step": 3144, "teacher_loss": 0.15793374180793762 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.48295897245407104, "learning_rate": 1.3640306491253434e-05, "loss": 0.2587, "step": 3145, "teacher_loss": 0.23380841314792633 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.8142374753952026, "learning_rate": 1.364464363163221e-05, "loss": 0.3286, "step": 3146, "teacher_loss": 0.27459973096847534 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.45030349493026733, "learning_rate": 1.3648980772010988e-05, "loss": 0.2732, "step": 3147, "teacher_loss": 0.2535628080368042 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.5232150554656982, "learning_rate": 1.3653317912389764e-05, "loss": 0.2784, "step": 3148, "teacher_loss": 0.2511853575706482 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.31167078018188477, "learning_rate": 1.3657655052768541e-05, "loss": 0.2377, "step": 3149, "teacher_loss": 0.22949160635471344 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.15156973898410797, "learning_rate": 1.3661992193147319e-05, "loss": 0.2326, "step": 3150, "teacher_loss": 0.24163369834423065 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.3795466423034668, "learning_rate": 1.3666329333526095e-05, "loss": 0.2285, "step": 3151, "teacher_loss": 0.2116793692111969 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.32396993041038513, "learning_rate": 1.3670666473904873e-05, "loss": 0.2179, "step": 3152, "teacher_loss": 0.20613354444503784 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.600192666053772, "learning_rate": 1.3675003614283649e-05, "loss": 0.2238, "step": 3153, "teacher_loss": 0.1819387674331665 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.597110390663147, "learning_rate": 1.3679340754662426e-05, "loss": 0.3018, "step": 3154, "teacher_loss": 0.2690383791923523 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.5059059262275696, "learning_rate": 1.3683677895041204e-05, "loss": 0.2716, "step": 3155, "teacher_loss": 0.24560335278511047 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 1.1677502393722534, "learning_rate": 1.3688015035419981e-05, "loss": 0.4041, "step": 3156, "teacher_loss": 0.3192659914493561 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.518955647945404, "learning_rate": 1.3692352175798756e-05, "loss": 0.2569, "step": 3157, "teacher_loss": 0.2277565896511078 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 1.4068934917449951, "learning_rate": 1.3696689316177533e-05, "loss": 0.2939, "step": 3158, "teacher_loss": 0.17018303275108337 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.2963440418243408, "learning_rate": 1.3701026456556311e-05, "loss": 0.2047, "step": 3159, "teacher_loss": 0.19454213976860046 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.2932714819908142, "learning_rate": 1.3705363596935089e-05, "loss": 0.172, "step": 3160, "teacher_loss": 0.15849372744560242 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.4205343723297119, "learning_rate": 1.3709700737313866e-05, "loss": 0.243, "step": 3161, "teacher_loss": 0.22322650253772736 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.42705249786376953, "learning_rate": 1.371403787769264e-05, "loss": 0.3046, "step": 3162, "teacher_loss": 0.29094648361206055 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.5534102916717529, "learning_rate": 1.3718375018071418e-05, "loss": 0.2744, "step": 3163, "teacher_loss": 0.2434028685092926 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.24196718633174896, "learning_rate": 1.3722712158450196e-05, "loss": 0.1735, "step": 3164, "teacher_loss": 0.16590993106365204 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.1688467413187027, "learning_rate": 1.3727049298828973e-05, "loss": 0.1352, "step": 3165, "teacher_loss": 0.1314394772052765 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.7563605904579163, "learning_rate": 1.373138643920775e-05, "loss": 0.472, "step": 3166, "teacher_loss": 0.4404444396495819 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.436075896024704, "learning_rate": 1.3735723579586527e-05, "loss": 0.2833, "step": 3167, "teacher_loss": 0.26631230115890503 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.4914531111717224, "learning_rate": 1.3740060719965303e-05, "loss": 0.2927, "step": 3168, "teacher_loss": 0.2706148624420166 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.44022417068481445, "learning_rate": 1.374439786034408e-05, "loss": 0.3839, "step": 3169, "teacher_loss": 0.37767693400382996 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.29825326800346375, "learning_rate": 1.3748735000722856e-05, "loss": 0.2196, "step": 3170, "teacher_loss": 0.21085938811302185 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.5327640175819397, "learning_rate": 1.3753072141101634e-05, "loss": 0.2661, "step": 3171, "teacher_loss": 0.23651134967803955 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.3903202414512634, "learning_rate": 1.3757409281480412e-05, "loss": 0.203, "step": 3172, "teacher_loss": 0.18213404715061188 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.32969826459884644, "learning_rate": 1.3761746421859187e-05, "loss": 0.3237, "step": 3173, "teacher_loss": 0.323083758354187 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.3849000632762909, "learning_rate": 1.3766083562237965e-05, "loss": 0.2525, "step": 3174, "teacher_loss": 0.23778298497200012 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.3668939769268036, "learning_rate": 1.3770420702616741e-05, "loss": 0.2593, "step": 3175, "teacher_loss": 0.24739089608192444 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.3109775185585022, "learning_rate": 1.3774757842995519e-05, "loss": 0.3024, "step": 3176, "teacher_loss": 0.3014335632324219 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.2276211380958557, "learning_rate": 1.3779094983374296e-05, "loss": 0.205, "step": 3177, "teacher_loss": 0.20243845880031586 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.2896766662597656, "learning_rate": 1.3783432123753072e-05, "loss": 0.2031, "step": 3178, "teacher_loss": 0.19349798560142517 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.26275524497032166, "learning_rate": 1.3787769264131848e-05, "loss": 0.2082, "step": 3179, "teacher_loss": 0.20211520791053772 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.5010784268379211, "learning_rate": 1.3792106404510626e-05, "loss": 0.225, "step": 3180, "teacher_loss": 0.19430626928806305 }, { "compression_loss": 0.0, "epoch": 0.57, "label_loss": 0.5018423795700073, "learning_rate": 1.3796443544889403e-05, "loss": 0.2318, "step": 3181, "teacher_loss": 0.2017618864774704 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.43978336453437805, "learning_rate": 1.3800780685268181e-05, "loss": 0.2633, "step": 3182, "teacher_loss": 0.24363601207733154 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5357323288917542, "learning_rate": 1.3805117825646959e-05, "loss": 0.205, "step": 3183, "teacher_loss": 0.16820865869522095 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.4715900421142578, "learning_rate": 1.3809454966025733e-05, "loss": 0.3507, "step": 3184, "teacher_loss": 0.3372383713722229 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.6615829467773438, "learning_rate": 1.381379210640451e-05, "loss": 0.2678, "step": 3185, "teacher_loss": 0.2240346372127533 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.8390197157859802, "learning_rate": 1.3818129246783288e-05, "loss": 0.335, "step": 3186, "teacher_loss": 0.2789629101753235 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5573806762695312, "learning_rate": 1.3822466387162066e-05, "loss": 0.2952, "step": 3187, "teacher_loss": 0.2661147117614746 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5123276710510254, "learning_rate": 1.3826803527540842e-05, "loss": 0.2683, "step": 3188, "teacher_loss": 0.2411530762910843 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.9935404062271118, "learning_rate": 1.3831140667919618e-05, "loss": 0.3157, "step": 3189, "teacher_loss": 0.24041594564914703 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.31871193647384644, "learning_rate": 1.3835477808298395e-05, "loss": 0.2388, "step": 3190, "teacher_loss": 0.22993820905685425 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.15627586841583252, "learning_rate": 1.3839814948677173e-05, "loss": 0.1539, "step": 3191, "teacher_loss": 0.15366145968437195 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.41014066338539124, "learning_rate": 1.3844152089055949e-05, "loss": 0.1942, "step": 3192, "teacher_loss": 0.17022094130516052 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.7192935347557068, "learning_rate": 1.3848489229434726e-05, "loss": 0.3478, "step": 3193, "teacher_loss": 0.30654144287109375 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.741270124912262, "learning_rate": 1.3852826369813504e-05, "loss": 0.3203, "step": 3194, "teacher_loss": 0.273525208234787 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5931705832481384, "learning_rate": 1.385716351019228e-05, "loss": 0.3113, "step": 3195, "teacher_loss": 0.2799602448940277 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.275505930185318, "learning_rate": 1.3861500650571058e-05, "loss": 0.1604, "step": 3196, "teacher_loss": 0.14755554497241974 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.2299749255180359, "learning_rate": 1.3865837790949833e-05, "loss": 0.1712, "step": 3197, "teacher_loss": 0.16462913155555725 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.3422144651412964, "learning_rate": 1.3870174931328611e-05, "loss": 0.1901, "step": 3198, "teacher_loss": 0.17318254709243774 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.47733044624328613, "learning_rate": 1.3874512071707389e-05, "loss": 0.3827, "step": 3199, "teacher_loss": 0.37215638160705566 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.37766045331954956, "learning_rate": 1.3878849212086165e-05, "loss": 0.2265, "step": 3200, "teacher_loss": 0.20968547463417053 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.2449851632118225, "learning_rate": 1.388318635246494e-05, "loss": 0.1866, "step": 3201, "teacher_loss": 0.1801028549671173 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.20897750556468964, "learning_rate": 1.3887523492843718e-05, "loss": 0.2275, "step": 3202, "teacher_loss": 0.22951540350914001 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.574865460395813, "learning_rate": 1.3891860633222496e-05, "loss": 0.2897, "step": 3203, "teacher_loss": 0.25800028443336487 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5864170789718628, "learning_rate": 1.3896197773601273e-05, "loss": 0.1943, "step": 3204, "teacher_loss": 0.15075168013572693 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5868630409240723, "learning_rate": 1.3900534913980051e-05, "loss": 0.3765, "step": 3205, "teacher_loss": 0.35307732224464417 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.32759028673171997, "learning_rate": 1.3904872054358825e-05, "loss": 0.2724, "step": 3206, "teacher_loss": 0.2662566304206848 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.46525177359580994, "learning_rate": 1.3909209194737603e-05, "loss": 0.2294, "step": 3207, "teacher_loss": 0.20324186980724335 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.30805495381355286, "learning_rate": 1.391354633511638e-05, "loss": 0.1859, "step": 3208, "teacher_loss": 0.17229008674621582 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5878300666809082, "learning_rate": 1.3917883475495158e-05, "loss": 0.5055, "step": 3209, "teacher_loss": 0.4963799715042114 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.4832676351070404, "learning_rate": 1.3922220615873934e-05, "loss": 0.3259, "step": 3210, "teacher_loss": 0.30845046043395996 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.1943540722131729, "learning_rate": 1.392655775625271e-05, "loss": 0.1747, "step": 3211, "teacher_loss": 0.17246964573860168 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.40365171432495117, "learning_rate": 1.3930894896631488e-05, "loss": 0.2419, "step": 3212, "teacher_loss": 0.2238958477973938 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.2506798505783081, "learning_rate": 1.3935232037010265e-05, "loss": 0.2503, "step": 3213, "teacher_loss": 0.250217080116272 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.20932012796401978, "learning_rate": 1.3939569177389041e-05, "loss": 0.2078, "step": 3214, "teacher_loss": 0.20760831236839294 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.6060541272163391, "learning_rate": 1.3943906317767819e-05, "loss": 0.3163, "step": 3215, "teacher_loss": 0.28407227993011475 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5119735598564148, "learning_rate": 1.3948243458146596e-05, "loss": 0.2351, "step": 3216, "teacher_loss": 0.20438314974308014 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.25984299182891846, "learning_rate": 1.3952580598525372e-05, "loss": 0.2145, "step": 3217, "teacher_loss": 0.2094123363494873 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.3918399512767792, "learning_rate": 1.395691773890415e-05, "loss": 0.2248, "step": 3218, "teacher_loss": 0.20627890527248383 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.8759027719497681, "learning_rate": 1.3961254879282926e-05, "loss": 0.4759, "step": 3219, "teacher_loss": 0.43150097131729126 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.6954171657562256, "learning_rate": 1.3965592019661704e-05, "loss": 0.3354, "step": 3220, "teacher_loss": 0.2953682839870453 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.436975359916687, "learning_rate": 1.3969929160040481e-05, "loss": 0.2817, "step": 3221, "teacher_loss": 0.2644992470741272 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.41084232926368713, "learning_rate": 1.3974266300419257e-05, "loss": 0.1883, "step": 3222, "teacher_loss": 0.16359253227710724 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.33008110523223877, "learning_rate": 1.3978603440798033e-05, "loss": 0.1844, "step": 3223, "teacher_loss": 0.16815996170043945 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.45917463302612305, "learning_rate": 1.398294058117681e-05, "loss": 0.3418, "step": 3224, "teacher_loss": 0.32876139879226685 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.37146615982055664, "learning_rate": 1.3987277721555588e-05, "loss": 0.247, "step": 3225, "teacher_loss": 0.2331475019454956 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5741246342658997, "learning_rate": 1.3991614861934366e-05, "loss": 0.3961, "step": 3226, "teacher_loss": 0.37629234790802 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.46848800778388977, "learning_rate": 1.3995952002313142e-05, "loss": 0.2355, "step": 3227, "teacher_loss": 0.20961040258407593 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 1.062383770942688, "learning_rate": 1.4000289142691918e-05, "loss": 0.477, "step": 3228, "teacher_loss": 0.4119214415550232 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.6475208401679993, "learning_rate": 1.4004626283070695e-05, "loss": 0.2255, "step": 3229, "teacher_loss": 0.17866329848766327 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.23250412940979004, "learning_rate": 1.4008963423449473e-05, "loss": 0.239, "step": 3230, "teacher_loss": 0.23974217474460602 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.48672613501548767, "learning_rate": 1.401330056382825e-05, "loss": 0.2313, "step": 3231, "teacher_loss": 0.2028699815273285 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5655722618103027, "learning_rate": 1.4017637704207027e-05, "loss": 0.2356, "step": 3232, "teacher_loss": 0.19898763298988342 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.5639442205429077, "learning_rate": 1.4021974844585802e-05, "loss": 0.3068, "step": 3233, "teacher_loss": 0.27822068333625793 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.571675717830658, "learning_rate": 1.402631198496458e-05, "loss": 0.2412, "step": 3234, "teacher_loss": 0.2044321596622467 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.4403180480003357, "learning_rate": 1.4030649125343358e-05, "loss": 0.2156, "step": 3235, "teacher_loss": 0.19065716862678528 }, { "compression_loss": 0.0, "epoch": 0.58, "label_loss": 0.504184365272522, "learning_rate": 1.4034986265722135e-05, "loss": 0.2927, "step": 3236, "teacher_loss": 0.2692336440086365 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.5931829810142517, "learning_rate": 1.4039323406100911e-05, "loss": 0.3365, "step": 3237, "teacher_loss": 0.30800577998161316 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.16033734381198883, "learning_rate": 1.4043660546479687e-05, "loss": 0.173, "step": 3238, "teacher_loss": 0.17442336678504944 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.39114317297935486, "learning_rate": 1.4047997686858465e-05, "loss": 0.2597, "step": 3239, "teacher_loss": 0.24505002796649933 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.6499992609024048, "learning_rate": 1.4052334827237242e-05, "loss": 0.2077, "step": 3240, "teacher_loss": 0.1585073173046112 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.4508711099624634, "learning_rate": 1.4056671967616018e-05, "loss": 0.2465, "step": 3241, "teacher_loss": 0.22374692559242249 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.39568930864334106, "learning_rate": 1.4061009107994796e-05, "loss": 0.2125, "step": 3242, "teacher_loss": 0.19212275743484497 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.673302173614502, "learning_rate": 1.4065346248373574e-05, "loss": 0.4002, "step": 3243, "teacher_loss": 0.3698629140853882 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.33611422777175903, "learning_rate": 1.406968338875235e-05, "loss": 0.3088, "step": 3244, "teacher_loss": 0.30579808354377747 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.24481135606765747, "learning_rate": 1.4074020529131125e-05, "loss": 0.1895, "step": 3245, "teacher_loss": 0.18340739607810974 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.34882795810699463, "learning_rate": 1.4078357669509903e-05, "loss": 0.2426, "step": 3246, "teacher_loss": 0.2307722270488739 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.5321573615074158, "learning_rate": 1.408269480988868e-05, "loss": 0.3509, "step": 3247, "teacher_loss": 0.3308143615722656 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.4466513395309448, "learning_rate": 1.4087031950267458e-05, "loss": 0.2673, "step": 3248, "teacher_loss": 0.24736738204956055 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.60186767578125, "learning_rate": 1.4091369090646234e-05, "loss": 0.348, "step": 3249, "teacher_loss": 0.3197728991508484 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.32665306329727173, "learning_rate": 1.409570623102501e-05, "loss": 0.1722, "step": 3250, "teacher_loss": 0.15509089827537537 }, { "epoch": 0.59, "eval_exact_match": 79.36613055818354, "eval_f1": 86.82463777405437, "step": 3250 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.648729681968689, "learning_rate": 1.4100043371403788e-05, "loss": 0.246, "step": 3251, "teacher_loss": 0.20126372575759888 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.87117600440979, "learning_rate": 1.4104380511782565e-05, "loss": 0.2952, "step": 3252, "teacher_loss": 0.23118405044078827 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.7060363292694092, "learning_rate": 1.4108717652161343e-05, "loss": 0.232, "step": 3253, "teacher_loss": 0.1793704628944397 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 1.0135180950164795, "learning_rate": 1.4113054792540119e-05, "loss": 0.542, "step": 3254, "teacher_loss": 0.48965102434158325 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.23564046621322632, "learning_rate": 1.4117391932918895e-05, "loss": 0.2493, "step": 3255, "teacher_loss": 0.2507632374763489 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.4256027340888977, "learning_rate": 1.4121729073297673e-05, "loss": 0.1767, "step": 3256, "teacher_loss": 0.1490747183561325 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.43884116411209106, "learning_rate": 1.412606621367645e-05, "loss": 0.1902, "step": 3257, "teacher_loss": 0.16257108747959137 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.2848835289478302, "learning_rate": 1.4130403354055228e-05, "loss": 0.183, "step": 3258, "teacher_loss": 0.17172209918498993 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.15417338907718658, "learning_rate": 1.4134740494434004e-05, "loss": 0.2071, "step": 3259, "teacher_loss": 0.21302761137485504 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.37323999404907227, "learning_rate": 1.413907763481278e-05, "loss": 0.1956, "step": 3260, "teacher_loss": 0.1758425235748291 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.33266329765319824, "learning_rate": 1.4143414775191557e-05, "loss": 0.2197, "step": 3261, "teacher_loss": 0.2071089744567871 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.4028015732765198, "learning_rate": 1.4147751915570335e-05, "loss": 0.3019, "step": 3262, "teacher_loss": 0.29073506593704224 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.5626373887062073, "learning_rate": 1.415208905594911e-05, "loss": 0.2577, "step": 3263, "teacher_loss": 0.22385695576667786 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.11185181140899658, "learning_rate": 1.4156426196327888e-05, "loss": 0.185, "step": 3264, "teacher_loss": 0.1931128352880478 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.7400377988815308, "learning_rate": 1.4160763336706666e-05, "loss": 0.2806, "step": 3265, "teacher_loss": 0.22954051196575165 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.3646808862686157, "learning_rate": 1.4165100477085442e-05, "loss": 0.1755, "step": 3266, "teacher_loss": 0.15452909469604492 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.29807019233703613, "learning_rate": 1.4169437617464218e-05, "loss": 0.2813, "step": 3267, "teacher_loss": 0.27948975563049316 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.4631107449531555, "learning_rate": 1.4173774757842996e-05, "loss": 0.292, "step": 3268, "teacher_loss": 0.27296876907348633 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.5172280669212341, "learning_rate": 1.4178111898221773e-05, "loss": 0.3388, "step": 3269, "teacher_loss": 0.3189891576766968 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.2955642342567444, "learning_rate": 1.418244903860055e-05, "loss": 0.1977, "step": 3270, "teacher_loss": 0.18683956563472748 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.37230587005615234, "learning_rate": 1.4186786178979327e-05, "loss": 0.2962, "step": 3271, "teacher_loss": 0.28777503967285156 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.5284925103187561, "learning_rate": 1.4191123319358103e-05, "loss": 0.3393, "step": 3272, "teacher_loss": 0.31830745935440063 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.4318163990974426, "learning_rate": 1.419546045973688e-05, "loss": 0.4024, "step": 3273, "teacher_loss": 0.3991623520851135 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.323825865983963, "learning_rate": 1.4199797600115658e-05, "loss": 0.2059, "step": 3274, "teacher_loss": 0.19276204705238342 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.5438075065612793, "learning_rate": 1.4204134740494435e-05, "loss": 0.3048, "step": 3275, "teacher_loss": 0.2782895565032959 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.3739818334579468, "learning_rate": 1.4208471880873211e-05, "loss": 0.3196, "step": 3276, "teacher_loss": 0.3135823607444763 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.25560757517814636, "learning_rate": 1.4212809021251987e-05, "loss": 0.1945, "step": 3277, "teacher_loss": 0.18769824504852295 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.4864589273929596, "learning_rate": 1.4217146161630765e-05, "loss": 0.2565, "step": 3278, "teacher_loss": 0.23090685904026031 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.6192125678062439, "learning_rate": 1.4221483302009543e-05, "loss": 0.3342, "step": 3279, "teacher_loss": 0.30247747898101807 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.3190414607524872, "learning_rate": 1.422582044238832e-05, "loss": 0.23, "step": 3280, "teacher_loss": 0.22011640667915344 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.7085514068603516, "learning_rate": 1.4230157582767096e-05, "loss": 0.3197, "step": 3281, "teacher_loss": 0.2765168249607086 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.18702015280723572, "learning_rate": 1.4234494723145872e-05, "loss": 0.2312, "step": 3282, "teacher_loss": 0.2360638678073883 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.1558387726545334, "learning_rate": 1.423883186352465e-05, "loss": 0.2178, "step": 3283, "teacher_loss": 0.2246885895729065 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.8111177682876587, "learning_rate": 1.4243169003903427e-05, "loss": 0.2903, "step": 3284, "teacher_loss": 0.23241904377937317 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.5067479610443115, "learning_rate": 1.4247506144282203e-05, "loss": 0.2373, "step": 3285, "teacher_loss": 0.20737068355083466 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.27260512113571167, "learning_rate": 1.4251843284660981e-05, "loss": 0.1753, "step": 3286, "teacher_loss": 0.16451531648635864 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.3851807117462158, "learning_rate": 1.4256180425039757e-05, "loss": 0.2029, "step": 3287, "teacher_loss": 0.18259108066558838 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.32090073823928833, "learning_rate": 1.4260517565418534e-05, "loss": 0.2403, "step": 3288, "teacher_loss": 0.23139940202236176 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.6241574287414551, "learning_rate": 1.426485470579731e-05, "loss": 0.3382, "step": 3289, "teacher_loss": 0.30637410283088684 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.28775396943092346, "learning_rate": 1.4269191846176088e-05, "loss": 0.2106, "step": 3290, "teacher_loss": 0.20206031203269958 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.217560276389122, "learning_rate": 1.4273528986554866e-05, "loss": 0.1902, "step": 3291, "teacher_loss": 0.1871793419122696 }, { "compression_loss": 0.0, "epoch": 0.59, "label_loss": 0.20497804880142212, "learning_rate": 1.4277866126933643e-05, "loss": 0.1692, "step": 3292, "teacher_loss": 0.16525980830192566 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.39691948890686035, "learning_rate": 1.4282203267312419e-05, "loss": 0.2641, "step": 3293, "teacher_loss": 0.24930711090564728 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.21685676276683807, "learning_rate": 1.4286540407691195e-05, "loss": 0.2561, "step": 3294, "teacher_loss": 0.2604849934577942 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.3912780284881592, "learning_rate": 1.4290877548069973e-05, "loss": 0.2783, "step": 3295, "teacher_loss": 0.26579728722572327 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.22853434085845947, "learning_rate": 1.429521468844875e-05, "loss": 0.2228, "step": 3296, "teacher_loss": 0.22212429344654083 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.7538334131240845, "learning_rate": 1.4299551828827528e-05, "loss": 0.2655, "step": 3297, "teacher_loss": 0.21122616529464722 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.4660327434539795, "learning_rate": 1.4303888969206302e-05, "loss": 0.2171, "step": 3298, "teacher_loss": 0.1894235610961914 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.5630836486816406, "learning_rate": 1.430822610958508e-05, "loss": 0.2606, "step": 3299, "teacher_loss": 0.22695714235305786 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.2052677571773529, "learning_rate": 1.4312563249963857e-05, "loss": 0.1629, "step": 3300, "teacher_loss": 0.15818661451339722 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.5099402666091919, "learning_rate": 1.4316900390342635e-05, "loss": 0.2393, "step": 3301, "teacher_loss": 0.2092798948287964 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.10229835659265518, "learning_rate": 1.4321237530721413e-05, "loss": 0.1556, "step": 3302, "teacher_loss": 0.16149216890335083 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.47938284277915955, "learning_rate": 1.4325574671100189e-05, "loss": 0.2057, "step": 3303, "teacher_loss": 0.17533773183822632 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.5525467991828918, "learning_rate": 1.4329911811478964e-05, "loss": 0.2504, "step": 3304, "teacher_loss": 0.21685676276683807 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.7509241104125977, "learning_rate": 1.4334248951857742e-05, "loss": 0.2359, "step": 3305, "teacher_loss": 0.1787256896495819 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.37272554636001587, "learning_rate": 1.433858609223652e-05, "loss": 0.2192, "step": 3306, "teacher_loss": 0.20218569040298462 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.6773841381072998, "learning_rate": 1.4342923232615296e-05, "loss": 0.2566, "step": 3307, "teacher_loss": 0.2098880410194397 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.8599147200584412, "learning_rate": 1.4347260372994073e-05, "loss": 0.3662, "step": 3308, "teacher_loss": 0.3113848865032196 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.6239175200462341, "learning_rate": 1.435159751337285e-05, "loss": 0.2336, "step": 3309, "teacher_loss": 0.1902831494808197 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.41819119453430176, "learning_rate": 1.4355934653751627e-05, "loss": 0.3033, "step": 3310, "teacher_loss": 0.29053324460983276 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.33756381273269653, "learning_rate": 1.4360271794130403e-05, "loss": 0.235, "step": 3311, "teacher_loss": 0.22365188598632812 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.4412263333797455, "learning_rate": 1.436460893450918e-05, "loss": 0.536, "step": 3312, "teacher_loss": 0.546475350856781 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.8449076414108276, "learning_rate": 1.4368946074887958e-05, "loss": 0.3005, "step": 3313, "teacher_loss": 0.2399749904870987 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.4076206088066101, "learning_rate": 1.4373283215266736e-05, "loss": 0.2032, "step": 3314, "teacher_loss": 0.18047389388084412 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.2986910939216614, "learning_rate": 1.4377620355645512e-05, "loss": 0.2505, "step": 3315, "teacher_loss": 0.2451736330986023 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.22177907824516296, "learning_rate": 1.4381957496024287e-05, "loss": 0.189, "step": 3316, "teacher_loss": 0.18540169298648834 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.7372938394546509, "learning_rate": 1.4386294636403065e-05, "loss": 0.347, "step": 3317, "teacher_loss": 0.30368396639823914 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.21904560923576355, "learning_rate": 1.4390631776781843e-05, "loss": 0.2347, "step": 3318, "teacher_loss": 0.2364935725927353 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.27498117089271545, "learning_rate": 1.439496891716062e-05, "loss": 0.1977, "step": 3319, "teacher_loss": 0.18913918733596802 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.35954350233078003, "learning_rate": 1.4399306057539395e-05, "loss": 0.2243, "step": 3320, "teacher_loss": 0.20928901433944702 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.3509919047355652, "learning_rate": 1.4403643197918172e-05, "loss": 0.283, "step": 3321, "teacher_loss": 0.27545228600502014 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.2068735957145691, "learning_rate": 1.440798033829695e-05, "loss": 0.175, "step": 3322, "teacher_loss": 0.17141315340995789 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.3089473843574524, "learning_rate": 1.4412317478675727e-05, "loss": 0.2518, "step": 3323, "teacher_loss": 0.24548670649528503 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.4513481557369232, "learning_rate": 1.4416654619054505e-05, "loss": 0.2358, "step": 3324, "teacher_loss": 0.21183869242668152 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.6127954721450806, "learning_rate": 1.4420991759433281e-05, "loss": 0.2664, "step": 3325, "teacher_loss": 0.22791558504104614 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.6712700724601746, "learning_rate": 1.4425328899812057e-05, "loss": 0.2892, "step": 3326, "teacher_loss": 0.24672988057136536 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.36389631032943726, "learning_rate": 1.4429666040190835e-05, "loss": 0.2207, "step": 3327, "teacher_loss": 0.20480097830295563 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.2541850805282593, "learning_rate": 1.4434003180569612e-05, "loss": 0.2058, "step": 3328, "teacher_loss": 0.20037469267845154 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.38025254011154175, "learning_rate": 1.4438340320948388e-05, "loss": 0.2198, "step": 3329, "teacher_loss": 0.2020048201084137 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.46609577536582947, "learning_rate": 1.4442677461327166e-05, "loss": 0.2782, "step": 3330, "teacher_loss": 0.25734543800354004 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.22417768836021423, "learning_rate": 1.4447014601705942e-05, "loss": 0.1802, "step": 3331, "teacher_loss": 0.17527559399604797 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.22912713885307312, "learning_rate": 1.445135174208472e-05, "loss": 0.2432, "step": 3332, "teacher_loss": 0.24478529393672943 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.6016044616699219, "learning_rate": 1.4455688882463495e-05, "loss": 0.2983, "step": 3333, "teacher_loss": 0.26455914974212646 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.21600395441055298, "learning_rate": 1.4460026022842273e-05, "loss": 0.2025, "step": 3334, "teacher_loss": 0.2010117471218109 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.4427329897880554, "learning_rate": 1.446436316322105e-05, "loss": 0.2734, "step": 3335, "teacher_loss": 0.25455036759376526 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.38411688804626465, "learning_rate": 1.4468700303599826e-05, "loss": 0.3439, "step": 3336, "teacher_loss": 0.33941006660461426 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.8976858258247375, "learning_rate": 1.4473037443978604e-05, "loss": 0.3332, "step": 3337, "teacher_loss": 0.27052175998687744 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.35446709394454956, "learning_rate": 1.447737458435738e-05, "loss": 0.1938, "step": 3338, "teacher_loss": 0.17591030895709991 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.8310773372650146, "learning_rate": 1.4481711724736158e-05, "loss": 0.3417, "step": 3339, "teacher_loss": 0.2873011827468872 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.6898626089096069, "learning_rate": 1.4486048865114935e-05, "loss": 0.2246, "step": 3340, "teacher_loss": 0.17290669679641724 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.5938445329666138, "learning_rate": 1.4490386005493713e-05, "loss": 0.3124, "step": 3341, "teacher_loss": 0.28109943866729736 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.3888205885887146, "learning_rate": 1.4494723145872487e-05, "loss": 0.2463, "step": 3342, "teacher_loss": 0.23047898709774017 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.20008406043052673, "learning_rate": 1.4499060286251265e-05, "loss": 0.1841, "step": 3343, "teacher_loss": 0.18233180046081543 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.3519536852836609, "learning_rate": 1.4503397426630042e-05, "loss": 0.2286, "step": 3344, "teacher_loss": 0.2149081826210022 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.6598502397537231, "learning_rate": 1.450773456700882e-05, "loss": 0.2824, "step": 3345, "teacher_loss": 0.24050915241241455 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.33505779504776, "learning_rate": 1.4512071707387598e-05, "loss": 0.2648, "step": 3346, "teacher_loss": 0.25701916217803955 }, { "compression_loss": 0.0, "epoch": 0.6, "label_loss": 0.5194143652915955, "learning_rate": 1.4516408847766372e-05, "loss": 0.2231, "step": 3347, "teacher_loss": 0.19022509455680847 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.42489635944366455, "learning_rate": 1.452074598814515e-05, "loss": 0.2206, "step": 3348, "teacher_loss": 0.1978747844696045 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.4172751009464264, "learning_rate": 1.4525083128523927e-05, "loss": 0.188, "step": 3349, "teacher_loss": 0.16256208717823029 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.49236616492271423, "learning_rate": 1.4529420268902705e-05, "loss": 0.2505, "step": 3350, "teacher_loss": 0.22359010577201843 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.5252891778945923, "learning_rate": 1.453375740928148e-05, "loss": 0.2546, "step": 3351, "teacher_loss": 0.2245016098022461 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.31457340717315674, "learning_rate": 1.4538094549660258e-05, "loss": 0.2998, "step": 3352, "teacher_loss": 0.2981276214122772 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.25483405590057373, "learning_rate": 1.4542431690039034e-05, "loss": 0.2666, "step": 3353, "teacher_loss": 0.2678525745868683 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.1261131912469864, "learning_rate": 1.4546768830417812e-05, "loss": 0.2029, "step": 3354, "teacher_loss": 0.21145838499069214 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.6247426271438599, "learning_rate": 1.4551105970796588e-05, "loss": 0.2099, "step": 3355, "teacher_loss": 0.16385877132415771 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.31666791439056396, "learning_rate": 1.4555443111175365e-05, "loss": 0.2929, "step": 3356, "teacher_loss": 0.2903040945529938 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.4036228060722351, "learning_rate": 1.4559780251554143e-05, "loss": 0.2718, "step": 3357, "teacher_loss": 0.2571929693222046 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.40825363993644714, "learning_rate": 1.4564117391932919e-05, "loss": 0.2489, "step": 3358, "teacher_loss": 0.2312489151954651 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.33915138244628906, "learning_rate": 1.4568454532311696e-05, "loss": 0.2147, "step": 3359, "teacher_loss": 0.20091068744659424 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.3459758758544922, "learning_rate": 1.4572791672690472e-05, "loss": 0.1796, "step": 3360, "teacher_loss": 0.16113825142383575 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.4025750160217285, "learning_rate": 1.457712881306925e-05, "loss": 0.1916, "step": 3361, "teacher_loss": 0.16817405819892883 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.4482371509075165, "learning_rate": 1.4581465953448028e-05, "loss": 0.3028, "step": 3362, "teacher_loss": 0.2865876853466034 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.423972487449646, "learning_rate": 1.4585803093826805e-05, "loss": 0.294, "step": 3363, "teacher_loss": 0.2795642912387848 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.30741673707962036, "learning_rate": 1.459014023420558e-05, "loss": 0.2836, "step": 3364, "teacher_loss": 0.28096073865890503 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.42777758836746216, "learning_rate": 1.4594477374584357e-05, "loss": 0.2139, "step": 3365, "teacher_loss": 0.19011515378952026 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.1712648570537567, "learning_rate": 1.4598814514963135e-05, "loss": 0.2169, "step": 3366, "teacher_loss": 0.22198635339736938 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.30421602725982666, "learning_rate": 1.4603151655341912e-05, "loss": 0.2344, "step": 3367, "teacher_loss": 0.2266312688589096 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.40833213925361633, "learning_rate": 1.460748879572069e-05, "loss": 0.2707, "step": 3368, "teacher_loss": 0.2553955614566803 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.4344463348388672, "learning_rate": 1.4611825936099464e-05, "loss": 0.3083, "step": 3369, "teacher_loss": 0.29432785511016846 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.46722233295440674, "learning_rate": 1.4616163076478242e-05, "loss": 0.2445, "step": 3370, "teacher_loss": 0.2197730988264084 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.31049641966819763, "learning_rate": 1.462050021685702e-05, "loss": 0.1851, "step": 3371, "teacher_loss": 0.17112025618553162 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.4445427656173706, "learning_rate": 1.4624837357235797e-05, "loss": 0.3429, "step": 3372, "teacher_loss": 0.33159640431404114 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.3296544551849365, "learning_rate": 1.4629174497614573e-05, "loss": 0.2311, "step": 3373, "teacher_loss": 0.22016921639442444 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.7067635655403137, "learning_rate": 1.463351163799335e-05, "loss": 0.3513, "step": 3374, "teacher_loss": 0.31182870268821716 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.2632507085800171, "learning_rate": 1.4637848778372127e-05, "loss": 0.1555, "step": 3375, "teacher_loss": 0.14351242780685425 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.49469077587127686, "learning_rate": 1.4642185918750904e-05, "loss": 0.2288, "step": 3376, "teacher_loss": 0.19925202429294586 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.2655493915081024, "learning_rate": 1.464652305912968e-05, "loss": 0.2478, "step": 3377, "teacher_loss": 0.24585343897342682 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.12248153239488602, "learning_rate": 1.4650860199508458e-05, "loss": 0.1809, "step": 3378, "teacher_loss": 0.1874275654554367 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.262132465839386, "learning_rate": 1.4655197339887235e-05, "loss": 0.2307, "step": 3379, "teacher_loss": 0.22716563940048218 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.2127070426940918, "learning_rate": 1.4659534480266011e-05, "loss": 0.1859, "step": 3380, "teacher_loss": 0.18286865949630737 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.5640524625778198, "learning_rate": 1.4663871620644789e-05, "loss": 0.2994, "step": 3381, "teacher_loss": 0.27000921964645386 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.31749674677848816, "learning_rate": 1.4668208761023565e-05, "loss": 0.2185, "step": 3382, "teacher_loss": 0.2074557989835739 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.2831347584724426, "learning_rate": 1.4672545901402342e-05, "loss": 0.2387, "step": 3383, "teacher_loss": 0.23373189568519592 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.41861122846603394, "learning_rate": 1.467688304178112e-05, "loss": 0.2573, "step": 3384, "teacher_loss": 0.2393331527709961 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.40169963240623474, "learning_rate": 1.4681220182159896e-05, "loss": 0.1617, "step": 3385, "teacher_loss": 0.1349981129169464 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.5008865594863892, "learning_rate": 1.4685557322538672e-05, "loss": 0.2453, "step": 3386, "teacher_loss": 0.21694722771644592 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.23056113719940186, "learning_rate": 1.468989446291745e-05, "loss": 0.2116, "step": 3387, "teacher_loss": 0.20954419672489166 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.5144234895706177, "learning_rate": 1.4694231603296227e-05, "loss": 0.2964, "step": 3388, "teacher_loss": 0.27213501930236816 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.563983678817749, "learning_rate": 1.4698568743675005e-05, "loss": 0.2342, "step": 3389, "teacher_loss": 0.19753378629684448 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.17529204487800598, "learning_rate": 1.4702905884053782e-05, "loss": 0.2057, "step": 3390, "teacher_loss": 0.2090734988451004 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.9061448574066162, "learning_rate": 1.4707243024432557e-05, "loss": 0.4089, "step": 3391, "teacher_loss": 0.3536835312843323 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.5815277099609375, "learning_rate": 1.4711580164811334e-05, "loss": 0.2109, "step": 3392, "teacher_loss": 0.16971027851104736 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.4202399253845215, "learning_rate": 1.4715917305190112e-05, "loss": 0.2154, "step": 3393, "teacher_loss": 0.1926787793636322 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.2878839373588562, "learning_rate": 1.472025444556889e-05, "loss": 0.2824, "step": 3394, "teacher_loss": 0.28177952766418457 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.31564658880233765, "learning_rate": 1.4724591585947665e-05, "loss": 0.1564, "step": 3395, "teacher_loss": 0.13865971565246582 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.24211916327476501, "learning_rate": 1.4728928726326441e-05, "loss": 0.1774, "step": 3396, "teacher_loss": 0.17018786072731018 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.09590311348438263, "learning_rate": 1.4733265866705219e-05, "loss": 0.2186, "step": 3397, "teacher_loss": 0.23228822648525238 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.22575128078460693, "learning_rate": 1.4737603007083997e-05, "loss": 0.177, "step": 3398, "teacher_loss": 0.17160624265670776 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.7020028829574585, "learning_rate": 1.4741940147462774e-05, "loss": 0.2932, "step": 3399, "teacher_loss": 0.24777421355247498 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.20376479625701904, "learning_rate": 1.474627728784155e-05, "loss": 0.2622, "step": 3400, "teacher_loss": 0.2686937749385834 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.4309338331222534, "learning_rate": 1.4750614428220328e-05, "loss": 0.2503, "step": 3401, "teacher_loss": 0.2301778793334961 }, { "compression_loss": 0.0, "epoch": 0.61, "label_loss": 0.5975043177604675, "learning_rate": 1.4754951568599104e-05, "loss": 0.3061, "step": 3402, "teacher_loss": 0.27368927001953125 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.3087449073791504, "learning_rate": 1.4759288708977881e-05, "loss": 0.1939, "step": 3403, "teacher_loss": 0.18109653890132904 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.749091386795044, "learning_rate": 1.4763625849356657e-05, "loss": 0.462, "step": 3404, "teacher_loss": 0.4301269054412842 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.3045800030231476, "learning_rate": 1.4767962989735435e-05, "loss": 0.3074, "step": 3405, "teacher_loss": 0.3077341616153717 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.27656614780426025, "learning_rate": 1.4772300130114212e-05, "loss": 0.2216, "step": 3406, "teacher_loss": 0.2154550701379776 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.34204572439193726, "learning_rate": 1.4776637270492988e-05, "loss": 0.233, "step": 3407, "teacher_loss": 0.2208937406539917 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.6076553463935852, "learning_rate": 1.4780974410871764e-05, "loss": 0.2613, "step": 3408, "teacher_loss": 0.22278070449829102 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.3603961169719696, "learning_rate": 1.4785311551250542e-05, "loss": 0.4114, "step": 3409, "teacher_loss": 0.41710513830184937 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.41302379965782166, "learning_rate": 1.478964869162932e-05, "loss": 0.3728, "step": 3410, "teacher_loss": 0.3682812452316284 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.24380508065223694, "learning_rate": 1.4793985832008097e-05, "loss": 0.2926, "step": 3411, "teacher_loss": 0.29801759123802185 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.28050437569618225, "learning_rate": 1.4798322972386875e-05, "loss": 0.1697, "step": 3412, "teacher_loss": 0.1573762148618698 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.277423232793808, "learning_rate": 1.4802660112765649e-05, "loss": 0.2684, "step": 3413, "teacher_loss": 0.2673839330673218 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 1.0526862144470215, "learning_rate": 1.4806997253144427e-05, "loss": 0.2522, "step": 3414, "teacher_loss": 0.1632212996482849 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.3796747624874115, "learning_rate": 1.4811334393523204e-05, "loss": 0.2379, "step": 3415, "teacher_loss": 0.2221188098192215 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.34162837266921997, "learning_rate": 1.4815671533901982e-05, "loss": 0.3372, "step": 3416, "teacher_loss": 0.33666083216667175 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.29953110218048096, "learning_rate": 1.4820008674280758e-05, "loss": 0.1704, "step": 3417, "teacher_loss": 0.15602856874465942 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.7754138708114624, "learning_rate": 1.4824345814659534e-05, "loss": 0.2568, "step": 3418, "teacher_loss": 0.1991792619228363 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.2654818296432495, "learning_rate": 1.4828682955038311e-05, "loss": 0.2571, "step": 3419, "teacher_loss": 0.25615718960762024 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.18821729719638824, "learning_rate": 1.4833020095417089e-05, "loss": 0.1265, "step": 3420, "teacher_loss": 0.11963748186826706 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.2233489602804184, "learning_rate": 1.4837357235795867e-05, "loss": 0.1507, "step": 3421, "teacher_loss": 0.14262288808822632 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.4572219252586365, "learning_rate": 1.4841694376174643e-05, "loss": 0.2487, "step": 3422, "teacher_loss": 0.22548678517341614 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.4110633134841919, "learning_rate": 1.484603151655342e-05, "loss": 0.2503, "step": 3423, "teacher_loss": 0.23239555954933167 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.36263561248779297, "learning_rate": 1.4850368656932196e-05, "loss": 0.2048, "step": 3424, "teacher_loss": 0.18724486231803894 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.6267473697662354, "learning_rate": 1.4854705797310974e-05, "loss": 0.3407, "step": 3425, "teacher_loss": 0.30892235040664673 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.4371982216835022, "learning_rate": 1.485904293768975e-05, "loss": 0.3395, "step": 3426, "teacher_loss": 0.32864153385162354 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.5955886840820312, "learning_rate": 1.4863380078068527e-05, "loss": 0.31, "step": 3427, "teacher_loss": 0.2782576382160187 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.4124058485031128, "learning_rate": 1.4867717218447305e-05, "loss": 0.2636, "step": 3428, "teacher_loss": 0.24701912701129913 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.37787556648254395, "learning_rate": 1.4872054358826081e-05, "loss": 0.2658, "step": 3429, "teacher_loss": 0.2533859312534332 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.3903588056564331, "learning_rate": 1.4876391499204857e-05, "loss": 0.2042, "step": 3430, "teacher_loss": 0.18352849781513214 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.33652323484420776, "learning_rate": 1.4880728639583634e-05, "loss": 0.1875, "step": 3431, "teacher_loss": 0.17089374363422394 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.3109883666038513, "learning_rate": 1.4885065779962412e-05, "loss": 0.1796, "step": 3432, "teacher_loss": 0.1650347113609314 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.37500643730163574, "learning_rate": 1.488940292034119e-05, "loss": 0.2515, "step": 3433, "teacher_loss": 0.2377958595752716 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.2629029154777527, "learning_rate": 1.4893740060719966e-05, "loss": 0.3387, "step": 3434, "teacher_loss": 0.3470941483974457 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.7807692289352417, "learning_rate": 1.4898077201098742e-05, "loss": 0.3638, "step": 3435, "teacher_loss": 0.31749826669692993 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.4221172332763672, "learning_rate": 1.4902414341477519e-05, "loss": 0.2316, "step": 3436, "teacher_loss": 0.21043507754802704 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.5393366813659668, "learning_rate": 1.4906751481856297e-05, "loss": 0.3467, "step": 3437, "teacher_loss": 0.32529664039611816 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.18557564914226532, "learning_rate": 1.4911088622235074e-05, "loss": 0.2341, "step": 3438, "teacher_loss": 0.23945313692092896 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.5244229435920715, "learning_rate": 1.491542576261385e-05, "loss": 0.272, "step": 3439, "teacher_loss": 0.24395358562469482 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.3325469493865967, "learning_rate": 1.4919762902992626e-05, "loss": 0.2178, "step": 3440, "teacher_loss": 0.20505845546722412 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.5698983073234558, "learning_rate": 1.4924100043371404e-05, "loss": 0.1931, "step": 3441, "teacher_loss": 0.15125951170921326 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.24199610948562622, "learning_rate": 1.4928437183750181e-05, "loss": 0.2098, "step": 3442, "teacher_loss": 0.20624101161956787 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.4070262312889099, "learning_rate": 1.4932774324128959e-05, "loss": 0.2283, "step": 3443, "teacher_loss": 0.208482027053833 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.4251914918422699, "learning_rate": 1.4937111464507735e-05, "loss": 0.35, "step": 3444, "teacher_loss": 0.34165945649147034 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.20547810196876526, "learning_rate": 1.4941448604886511e-05, "loss": 0.1587, "step": 3445, "teacher_loss": 0.15346668660640717 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.37087687849998474, "learning_rate": 1.4945785745265289e-05, "loss": 0.2152, "step": 3446, "teacher_loss": 0.19793125987052917 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.479753315448761, "learning_rate": 1.4950122885644066e-05, "loss": 0.1859, "step": 3447, "teacher_loss": 0.15327255427837372 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.2640385031700134, "learning_rate": 1.4954460026022842e-05, "loss": 0.1954, "step": 3448, "teacher_loss": 0.1877971738576889 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.3383401334285736, "learning_rate": 1.495879716640162e-05, "loss": 0.2292, "step": 3449, "teacher_loss": 0.21702560782432556 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.5732656121253967, "learning_rate": 1.4963134306780397e-05, "loss": 0.2456, "step": 3450, "teacher_loss": 0.20914244651794434 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.25484299659729004, "learning_rate": 1.4967471447159173e-05, "loss": 0.2286, "step": 3451, "teacher_loss": 0.22570136189460754 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.2426004260778427, "learning_rate": 1.497180858753795e-05, "loss": 0.1862, "step": 3452, "teacher_loss": 0.17995625734329224 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.29108572006225586, "learning_rate": 1.4976145727916727e-05, "loss": 0.197, "step": 3453, "teacher_loss": 0.18654736876487732 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.14845633506774902, "learning_rate": 1.4980482868295504e-05, "loss": 0.1964, "step": 3454, "teacher_loss": 0.20168940722942352 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.2927010953426361, "learning_rate": 1.4984820008674282e-05, "loss": 0.2123, "step": 3455, "teacher_loss": 0.20332428812980652 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.3085951805114746, "learning_rate": 1.4989157149053058e-05, "loss": 0.2331, "step": 3456, "teacher_loss": 0.22469574213027954 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.2436988353729248, "learning_rate": 1.4993494289431834e-05, "loss": 0.2186, "step": 3457, "teacher_loss": 0.2158626914024353 }, { "compression_loss": 0.0, "epoch": 0.62, "label_loss": 0.2911866307258606, "learning_rate": 1.4997831429810612e-05, "loss": 0.2163, "step": 3458, "teacher_loss": 0.2079509049654007 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.623927116394043, "learning_rate": 1.5002168570189387e-05, "loss": 0.2495, "step": 3459, "teacher_loss": 0.20790499448776245 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.44187474250793457, "learning_rate": 1.5006505710568165e-05, "loss": 0.2109, "step": 3460, "teacher_loss": 0.1851940155029297 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3549068868160248, "learning_rate": 1.5010842850946943e-05, "loss": 0.2108, "step": 3461, "teacher_loss": 0.19476114213466644 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3677303194999695, "learning_rate": 1.5015179991325719e-05, "loss": 0.2465, "step": 3462, "teacher_loss": 0.23304778337478638 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.6731287240982056, "learning_rate": 1.5019517131704496e-05, "loss": 0.2178, "step": 3463, "teacher_loss": 0.1672612726688385 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3033241033554077, "learning_rate": 1.5023854272083274e-05, "loss": 0.3211, "step": 3464, "teacher_loss": 0.3230874538421631 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.47479310631752014, "learning_rate": 1.5028191412462052e-05, "loss": 0.2223, "step": 3465, "teacher_loss": 0.19427253305912018 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3262695074081421, "learning_rate": 1.503252855284083e-05, "loss": 0.2066, "step": 3466, "teacher_loss": 0.19325439631938934 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.42400845885276794, "learning_rate": 1.5036865693219605e-05, "loss": 0.2337, "step": 3467, "teacher_loss": 0.21253237128257751 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.44303131103515625, "learning_rate": 1.504120283359838e-05, "loss": 0.2158, "step": 3468, "teacher_loss": 0.19058318436145782 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.6803537607192993, "learning_rate": 1.5045539973977157e-05, "loss": 0.3079, "step": 3469, "teacher_loss": 0.2665376663208008 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.4041784107685089, "learning_rate": 1.5049877114355935e-05, "loss": 0.259, "step": 3470, "teacher_loss": 0.2428523600101471 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.5852808952331543, "learning_rate": 1.5054214254734712e-05, "loss": 0.1976, "step": 3471, "teacher_loss": 0.15454696118831635 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.4442068934440613, "learning_rate": 1.505855139511349e-05, "loss": 0.246, "step": 3472, "teacher_loss": 0.223946675658226 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.31871461868286133, "learning_rate": 1.5062888535492266e-05, "loss": 0.2145, "step": 3473, "teacher_loss": 0.20292532444000244 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.27461183071136475, "learning_rate": 1.5067225675871043e-05, "loss": 0.1957, "step": 3474, "teacher_loss": 0.18689903616905212 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3858081102371216, "learning_rate": 1.5071562816249821e-05, "loss": 0.2397, "step": 3475, "teacher_loss": 0.2234216332435608 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.301738977432251, "learning_rate": 1.5075899956628599e-05, "loss": 0.4482, "step": 3476, "teacher_loss": 0.46448424458503723 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3485831618309021, "learning_rate": 1.5080237097007373e-05, "loss": 0.3492, "step": 3477, "teacher_loss": 0.3492443859577179 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.42254817485809326, "learning_rate": 1.5084574237386149e-05, "loss": 0.2039, "step": 3478, "teacher_loss": 0.1795610785484314 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3052677810192108, "learning_rate": 1.5088911377764926e-05, "loss": 0.2172, "step": 3479, "teacher_loss": 0.20743393898010254 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.5425153970718384, "learning_rate": 1.5093248518143704e-05, "loss": 0.2933, "step": 3480, "teacher_loss": 0.2655656933784485 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.8153358697891235, "learning_rate": 1.5097585658522482e-05, "loss": 0.8623, "step": 3481, "teacher_loss": 0.8675612807273865 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3451419174671173, "learning_rate": 1.510192279890126e-05, "loss": 0.2072, "step": 3482, "teacher_loss": 0.1919066458940506 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.4334903955459595, "learning_rate": 1.5106259939280035e-05, "loss": 0.3073, "step": 3483, "teacher_loss": 0.29331082105636597 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.5016162395477295, "learning_rate": 1.5110597079658813e-05, "loss": 0.2573, "step": 3484, "teacher_loss": 0.23015466332435608 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3000132739543915, "learning_rate": 1.5114934220037587e-05, "loss": 0.2689, "step": 3485, "teacher_loss": 0.2654500901699066 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3497113585472107, "learning_rate": 1.5119271360416365e-05, "loss": 0.2219, "step": 3486, "teacher_loss": 0.2076825350522995 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.5079174041748047, "learning_rate": 1.5123608500795142e-05, "loss": 0.1968, "step": 3487, "teacher_loss": 0.1622769832611084 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.26312461495399475, "learning_rate": 1.512794564117392e-05, "loss": 0.2623, "step": 3488, "teacher_loss": 0.2622022032737732 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.32187777757644653, "learning_rate": 1.5132282781552696e-05, "loss": 0.3099, "step": 3489, "teacher_loss": 0.3085310161113739 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3103920817375183, "learning_rate": 1.5136619921931473e-05, "loss": 0.3342, "step": 3490, "teacher_loss": 0.33689871430397034 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3706433176994324, "learning_rate": 1.5140957062310251e-05, "loss": 0.2537, "step": 3491, "teacher_loss": 0.24070224165916443 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.2998778223991394, "learning_rate": 1.5145294202689029e-05, "loss": 0.328, "step": 3492, "teacher_loss": 0.33110368251800537 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3468497693538666, "learning_rate": 1.5149631343067806e-05, "loss": 0.2343, "step": 3493, "teacher_loss": 0.2218460738658905 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.6053321361541748, "learning_rate": 1.5153968483446579e-05, "loss": 0.3311, "step": 3494, "teacher_loss": 0.3006088137626648 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.8557692170143127, "learning_rate": 1.5158305623825356e-05, "loss": 0.282, "step": 3495, "teacher_loss": 0.2182486355304718 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.5796129107475281, "learning_rate": 1.5162642764204134e-05, "loss": 0.3189, "step": 3496, "teacher_loss": 0.2899051904678345 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.3411816954612732, "learning_rate": 1.5166979904582912e-05, "loss": 0.2684, "step": 3497, "teacher_loss": 0.2603408694267273 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.4847777485847473, "learning_rate": 1.517131704496169e-05, "loss": 0.2241, "step": 3498, "teacher_loss": 0.19509901106357574 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.4022645354270935, "learning_rate": 1.5175654185340467e-05, "loss": 0.2164, "step": 3499, "teacher_loss": 0.19578978419303894 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.15240633487701416, "learning_rate": 1.5179991325719243e-05, "loss": 0.1809, "step": 3500, "teacher_loss": 0.18403062224388123 }, { "epoch": 0.63, "eval_exact_match": 79.72563859981078, "eval_f1": 87.12878671331147, "step": 3500 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.21813340485095978, "learning_rate": 1.518432846609802e-05, "loss": 0.1765, "step": 3501, "teacher_loss": 0.1718859225511551 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.4939407706260681, "learning_rate": 1.5188665606476798e-05, "loss": 0.2498, "step": 3502, "teacher_loss": 0.22269971668720245 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.2664722204208374, "learning_rate": 1.5193002746855572e-05, "loss": 0.1908, "step": 3503, "teacher_loss": 0.1824444830417633 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.707490086555481, "learning_rate": 1.519733988723435e-05, "loss": 0.3369, "step": 3504, "teacher_loss": 0.29577672481536865 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.43466269969940186, "learning_rate": 1.5201677027613126e-05, "loss": 0.2525, "step": 3505, "teacher_loss": 0.23222452402114868 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.30984991788864136, "learning_rate": 1.5206014167991904e-05, "loss": 0.1998, "step": 3506, "teacher_loss": 0.18760037422180176 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.5077179670333862, "learning_rate": 1.5210351308370681e-05, "loss": 0.4354, "step": 3507, "teacher_loss": 0.42732805013656616 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.8567879796028137, "learning_rate": 1.5214688448749459e-05, "loss": 0.7833, "step": 3508, "teacher_loss": 0.7751364707946777 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.647532045841217, "learning_rate": 1.5219025589128236e-05, "loss": 0.3165, "step": 3509, "teacher_loss": 0.2797354459762573 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.8472211360931396, "learning_rate": 1.5223362729507014e-05, "loss": 0.3033, "step": 3510, "teacher_loss": 0.2428738921880722 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.1734766960144043, "learning_rate": 1.522769986988579e-05, "loss": 0.1694, "step": 3511, "teacher_loss": 0.16892538964748383 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.2166706919670105, "learning_rate": 1.5232037010264564e-05, "loss": 0.1902, "step": 3512, "teacher_loss": 0.18724998831748962 }, { "compression_loss": 0.0, "epoch": 0.63, "label_loss": 0.259573370218277, "learning_rate": 1.5236374150643342e-05, "loss": 0.1992, "step": 3513, "teacher_loss": 0.19252794981002808 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.740859866142273, "learning_rate": 1.524071129102212e-05, "loss": 0.3266, "step": 3514, "teacher_loss": 0.2805306315422058 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.42880597710609436, "learning_rate": 1.5245048431400897e-05, "loss": 0.2021, "step": 3515, "teacher_loss": 0.17694349586963654 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.6684502363204956, "learning_rate": 1.5249385571779673e-05, "loss": 0.3493, "step": 3516, "teacher_loss": 0.3138298988342285 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.22788703441619873, "learning_rate": 1.525372271215845e-05, "loss": 0.2297, "step": 3517, "teacher_loss": 0.2299208641052246 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.256854772567749, "learning_rate": 1.5258059852537228e-05, "loss": 0.1976, "step": 3518, "teacher_loss": 0.19104236364364624 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.3419114351272583, "learning_rate": 1.5262396992916006e-05, "loss": 0.2456, "step": 3519, "teacher_loss": 0.2348945289850235 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.8600484132766724, "learning_rate": 1.5266734133294783e-05, "loss": 0.346, "step": 3520, "teacher_loss": 0.28883543610572815 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.15551477670669556, "learning_rate": 1.5271071273673558e-05, "loss": 0.2121, "step": 3521, "teacher_loss": 0.21837365627288818 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.4352140724658966, "learning_rate": 1.5275408414052335e-05, "loss": 0.2487, "step": 3522, "teacher_loss": 0.22798162698745728 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.3538445234298706, "learning_rate": 1.5279745554431113e-05, "loss": 0.2407, "step": 3523, "teacher_loss": 0.22812360525131226 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.24155201017856598, "learning_rate": 1.5284082694809887e-05, "loss": 0.2013, "step": 3524, "teacher_loss": 0.19684141874313354 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.17609374225139618, "learning_rate": 1.5288419835188665e-05, "loss": 0.1865, "step": 3525, "teacher_loss": 0.18765880167484283 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.45157262682914734, "learning_rate": 1.5292756975567442e-05, "loss": 0.3323, "step": 3526, "teacher_loss": 0.3190820813179016 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.45473435521125793, "learning_rate": 1.529709411594622e-05, "loss": 0.2471, "step": 3527, "teacher_loss": 0.22407421469688416 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.44554850459098816, "learning_rate": 1.5301431256324998e-05, "loss": 0.2381, "step": 3528, "teacher_loss": 0.2150544375181198 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.6162223219871521, "learning_rate": 1.5305768396703772e-05, "loss": 0.2158, "step": 3529, "teacher_loss": 0.17126025259494781 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.3820152282714844, "learning_rate": 1.531010553708255e-05, "loss": 0.228, "step": 3530, "teacher_loss": 0.21084049344062805 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.16359582543373108, "learning_rate": 1.5314442677461327e-05, "loss": 0.122, "step": 3531, "teacher_loss": 0.1173916757106781 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.4035291075706482, "learning_rate": 1.5318779817840105e-05, "loss": 0.3627, "step": 3532, "teacher_loss": 0.35814833641052246 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.5482479929924011, "learning_rate": 1.5323116958218882e-05, "loss": 0.2288, "step": 3533, "teacher_loss": 0.1932622641324997 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.1804192066192627, "learning_rate": 1.532745409859766e-05, "loss": 0.1708, "step": 3534, "teacher_loss": 0.1697012484073639 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.848974347114563, "learning_rate": 1.5331791238976434e-05, "loss": 0.3246, "step": 3535, "teacher_loss": 0.26633256673812866 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.6149992942810059, "learning_rate": 1.5336128379355212e-05, "loss": 0.2398, "step": 3536, "teacher_loss": 0.19810134172439575 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.29140013456344604, "learning_rate": 1.534046551973399e-05, "loss": 0.1723, "step": 3537, "teacher_loss": 0.15904486179351807 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 1.1961380243301392, "learning_rate": 1.5344802660112764e-05, "loss": 0.3743, "step": 3538, "teacher_loss": 0.28298419713974 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.5583049058914185, "learning_rate": 1.534913980049154e-05, "loss": 0.2936, "step": 3539, "teacher_loss": 0.26417407393455505 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.35531988739967346, "learning_rate": 1.535347694087032e-05, "loss": 0.2256, "step": 3540, "teacher_loss": 0.21116071939468384 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.29444342851638794, "learning_rate": 1.5357814081249097e-05, "loss": 0.238, "step": 3541, "teacher_loss": 0.23168343305587769 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.45763248205184937, "learning_rate": 1.5362151221627874e-05, "loss": 0.1905, "step": 3542, "teacher_loss": 0.16076692938804626 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.4373299479484558, "learning_rate": 1.5366488362006652e-05, "loss": 0.2672, "step": 3543, "teacher_loss": 0.24832850694656372 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.36778074502944946, "learning_rate": 1.537082550238543e-05, "loss": 0.2311, "step": 3544, "teacher_loss": 0.21589943766593933 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.3620985150337219, "learning_rate": 1.5375162642764207e-05, "loss": 0.2549, "step": 3545, "teacher_loss": 0.24298998713493347 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.3954559862613678, "learning_rate": 1.537949978314298e-05, "loss": 0.2238, "step": 3546, "teacher_loss": 0.2047792226076126 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.23530299961566925, "learning_rate": 1.5383836923521756e-05, "loss": 0.2332, "step": 3547, "teacher_loss": 0.23301705718040466 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.38806092739105225, "learning_rate": 1.5388174063900533e-05, "loss": 0.2375, "step": 3548, "teacher_loss": 0.22077591717243195 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.2556641697883606, "learning_rate": 1.539251120427931e-05, "loss": 0.1874, "step": 3549, "teacher_loss": 0.17984139919281006 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.25484445691108704, "learning_rate": 1.539684834465809e-05, "loss": 0.1691, "step": 3550, "teacher_loss": 0.15962810814380646 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.8743991851806641, "learning_rate": 1.5401185485036866e-05, "loss": 0.5032, "step": 3551, "teacher_loss": 0.46190541982650757 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.4293147325515747, "learning_rate": 1.5405522625415644e-05, "loss": 0.2516, "step": 3552, "teacher_loss": 0.23189595341682434 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.5538841485977173, "learning_rate": 1.540985976579442e-05, "loss": 0.2314, "step": 3553, "teacher_loss": 0.19551338255405426 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.26218414306640625, "learning_rate": 1.54141969061732e-05, "loss": 0.2315, "step": 3554, "teacher_loss": 0.22808539867401123 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.3066752254962921, "learning_rate": 1.5418534046551977e-05, "loss": 0.2218, "step": 3555, "teacher_loss": 0.21231423318386078 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.2074851095676422, "learning_rate": 1.542287118693075e-05, "loss": 0.197, "step": 3556, "teacher_loss": 0.1958634853363037 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.4917834997177124, "learning_rate": 1.5427208327309525e-05, "loss": 0.4156, "step": 3557, "teacher_loss": 0.4071061611175537 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.33499565720558167, "learning_rate": 1.5431545467688303e-05, "loss": 0.1899, "step": 3558, "teacher_loss": 0.1737859547138214 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.2396567016839981, "learning_rate": 1.543588260806708e-05, "loss": 0.234, "step": 3559, "teacher_loss": 0.23334139585494995 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.32577937841415405, "learning_rate": 1.5440219748445858e-05, "loss": 0.3202, "step": 3560, "teacher_loss": 0.3196263909339905 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.5833309292793274, "learning_rate": 1.5444556888824635e-05, "loss": 0.2664, "step": 3561, "teacher_loss": 0.23121987283229828 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.21850012242794037, "learning_rate": 1.5448894029203413e-05, "loss": 0.2564, "step": 3562, "teacher_loss": 0.26059362292289734 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.9676458239555359, "learning_rate": 1.545323116958219e-05, "loss": 0.2854, "step": 3563, "teacher_loss": 0.20960737764835358 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.3769055902957916, "learning_rate": 1.545756830996097e-05, "loss": 0.2098, "step": 3564, "teacher_loss": 0.19120697677135468 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.3868241608142853, "learning_rate": 1.5461905450339743e-05, "loss": 0.2496, "step": 3565, "teacher_loss": 0.23430654406547546 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.6553928852081299, "learning_rate": 1.546624259071852e-05, "loss": 0.2624, "step": 3566, "teacher_loss": 0.21871358156204224 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.4719890356063843, "learning_rate": 1.5470579731097298e-05, "loss": 0.2315, "step": 3567, "teacher_loss": 0.20473061501979828 }, { "compression_loss": 0.0, "epoch": 0.64, "label_loss": 0.5522358417510986, "learning_rate": 1.5474916871476072e-05, "loss": 0.4031, "step": 3568, "teacher_loss": 0.3865233063697815 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.20686452090740204, "learning_rate": 1.547925401185485e-05, "loss": 0.2233, "step": 3569, "teacher_loss": 0.2251274734735489 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.6643775701522827, "learning_rate": 1.5483591152233627e-05, "loss": 0.3061, "step": 3570, "teacher_loss": 0.2662498354911804 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.46865981817245483, "learning_rate": 1.5487928292612405e-05, "loss": 0.2499, "step": 3571, "teacher_loss": 0.2255779206752777 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.5450855493545532, "learning_rate": 1.5492265432991183e-05, "loss": 0.3315, "step": 3572, "teacher_loss": 0.30775558948516846 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.33823856711387634, "learning_rate": 1.549660257336996e-05, "loss": 0.2693, "step": 3573, "teacher_loss": 0.2616141736507416 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.7502836585044861, "learning_rate": 1.5500939713748734e-05, "loss": 0.3328, "step": 3574, "teacher_loss": 0.2864474952220917 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.4689588248729706, "learning_rate": 1.5505276854127512e-05, "loss": 0.2321, "step": 3575, "teacher_loss": 0.20576024055480957 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.6034872531890869, "learning_rate": 1.550961399450629e-05, "loss": 0.2155, "step": 3576, "teacher_loss": 0.17239147424697876 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.5122894048690796, "learning_rate": 1.5513951134885067e-05, "loss": 0.2647, "step": 3577, "teacher_loss": 0.2371799349784851 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.5640295743942261, "learning_rate": 1.5518288275263845e-05, "loss": 0.2973, "step": 3578, "teacher_loss": 0.267711877822876 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.30804014205932617, "learning_rate": 1.552262541564262e-05, "loss": 0.2113, "step": 3579, "teacher_loss": 0.20055457949638367 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.6013041734695435, "learning_rate": 1.5526962556021397e-05, "loss": 0.2765, "step": 3580, "teacher_loss": 0.2404562383890152 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.3161754012107849, "learning_rate": 1.5531299696400174e-05, "loss": 0.227, "step": 3581, "teacher_loss": 0.21703986823558807 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.3295535445213318, "learning_rate": 1.553563683677895e-05, "loss": 0.2378, "step": 3582, "teacher_loss": 0.2276374101638794 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 1.0853112936019897, "learning_rate": 1.5539973977157726e-05, "loss": 0.3025, "step": 3583, "teacher_loss": 0.21556207537651062 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.6198538541793823, "learning_rate": 1.5544311117536504e-05, "loss": 0.2453, "step": 3584, "teacher_loss": 0.20370802283287048 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.24094390869140625, "learning_rate": 1.554864825791528e-05, "loss": 0.206, "step": 3585, "teacher_loss": 0.2021379917860031 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.2509448528289795, "learning_rate": 1.555298539829406e-05, "loss": 0.155, "step": 3586, "teacher_loss": 0.1442955583333969 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.46481892466545105, "learning_rate": 1.5557322538672837e-05, "loss": 0.2167, "step": 3587, "teacher_loss": 0.18916043639183044 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.5572981238365173, "learning_rate": 1.5561659679051614e-05, "loss": 0.21, "step": 3588, "teacher_loss": 0.17144931852817535 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.8200153708457947, "learning_rate": 1.5565996819430392e-05, "loss": 0.2644, "step": 3589, "teacher_loss": 0.20262448489665985 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.8211898803710938, "learning_rate": 1.5570333959809166e-05, "loss": 0.2491, "step": 3590, "teacher_loss": 0.18556594848632812 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.3262423872947693, "learning_rate": 1.557467110018794e-05, "loss": 0.2848, "step": 3591, "teacher_loss": 0.2802233099937439 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.674504280090332, "learning_rate": 1.5579008240566718e-05, "loss": 0.2646, "step": 3592, "teacher_loss": 0.2190292775630951 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.484086275100708, "learning_rate": 1.5583345380945496e-05, "loss": 0.2361, "step": 3593, "teacher_loss": 0.20854628086090088 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.7004497647285461, "learning_rate": 1.5587682521324273e-05, "loss": 0.2778, "step": 3594, "teacher_loss": 0.23078963160514832 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.6153299808502197, "learning_rate": 1.559201966170305e-05, "loss": 0.3487, "step": 3595, "teacher_loss": 0.3191266357898712 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.3894866108894348, "learning_rate": 1.559635680208183e-05, "loss": 0.2691, "step": 3596, "teacher_loss": 0.2556788921356201 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.5176110863685608, "learning_rate": 1.5600693942460606e-05, "loss": 0.3439, "step": 3597, "teacher_loss": 0.324614018201828 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.6580513119697571, "learning_rate": 1.5605031082839384e-05, "loss": 0.3072, "step": 3598, "teacher_loss": 0.2681823968887329 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.3736642003059387, "learning_rate": 1.560936822321816e-05, "loss": 0.2576, "step": 3599, "teacher_loss": 0.2447434663772583 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.47863340377807617, "learning_rate": 1.5613705363596936e-05, "loss": 0.2055, "step": 3600, "teacher_loss": 0.17515403032302856 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.7348597645759583, "learning_rate": 1.561804250397571e-05, "loss": 0.292, "step": 3601, "teacher_loss": 0.24282091856002808 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.767880916595459, "learning_rate": 1.5622379644354488e-05, "loss": 0.3102, "step": 3602, "teacher_loss": 0.2593521475791931 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.4968601167201996, "learning_rate": 1.5626716784733265e-05, "loss": 0.2183, "step": 3603, "teacher_loss": 0.18730475008487701 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.2474328577518463, "learning_rate": 1.5631053925112043e-05, "loss": 0.2305, "step": 3604, "teacher_loss": 0.22860580682754517 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.2972242534160614, "learning_rate": 1.563539106549082e-05, "loss": 0.2602, "step": 3605, "teacher_loss": 0.25604140758514404 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.354631245136261, "learning_rate": 1.5639728205869598e-05, "loss": 0.277, "step": 3606, "teacher_loss": 0.2684188485145569 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.4142345190048218, "learning_rate": 1.5644065346248376e-05, "loss": 0.2955, "step": 3607, "teacher_loss": 0.28236067295074463 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.6902436017990112, "learning_rate": 1.5648402486627153e-05, "loss": 0.3126, "step": 3608, "teacher_loss": 0.2706823945045471 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.42645570635795593, "learning_rate": 1.5652739627005927e-05, "loss": 0.2462, "step": 3609, "teacher_loss": 0.22611650824546814 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.4952872693538666, "learning_rate": 1.5657076767384705e-05, "loss": 0.2736, "step": 3610, "teacher_loss": 0.24896839261054993 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.7140209674835205, "learning_rate": 1.5661413907763483e-05, "loss": 0.3263, "step": 3611, "teacher_loss": 0.28323644399642944 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.5983753204345703, "learning_rate": 1.5665751048142257e-05, "loss": 0.2586, "step": 3612, "teacher_loss": 0.22081002593040466 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.6172915697097778, "learning_rate": 1.5670088188521035e-05, "loss": 0.2629, "step": 3613, "teacher_loss": 0.2235327959060669 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.49808406829833984, "learning_rate": 1.5674425328899812e-05, "loss": 0.3195, "step": 3614, "teacher_loss": 0.29966092109680176 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.5055669546127319, "learning_rate": 1.567876246927859e-05, "loss": 0.2478, "step": 3615, "teacher_loss": 0.2191312611103058 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.45741939544677734, "learning_rate": 1.5683099609657367e-05, "loss": 0.3231, "step": 3616, "teacher_loss": 0.30820775032043457 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.5473480224609375, "learning_rate": 1.5687436750036145e-05, "loss": 0.3724, "step": 3617, "teacher_loss": 0.3529682755470276 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.2577945590019226, "learning_rate": 1.569177389041492e-05, "loss": 0.2059, "step": 3618, "teacher_loss": 0.2001163363456726 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.519547700881958, "learning_rate": 1.5696111030793697e-05, "loss": 0.3138, "step": 3619, "teacher_loss": 0.2909475862979889 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.6143239736557007, "learning_rate": 1.5700448171172475e-05, "loss": 0.3349, "step": 3620, "teacher_loss": 0.30390843749046326 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.7262749671936035, "learning_rate": 1.5704785311551252e-05, "loss": 0.2743, "step": 3621, "teacher_loss": 0.22408419847488403 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.491428017616272, "learning_rate": 1.5709122451930026e-05, "loss": 0.2906, "step": 3622, "teacher_loss": 0.2682454586029053 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.1648576259613037, "learning_rate": 1.5713459592308804e-05, "loss": 0.1881, "step": 3623, "teacher_loss": 0.1906564086675644 }, { "compression_loss": 0.0, "epoch": 0.65, "label_loss": 0.45461952686309814, "learning_rate": 1.571779673268758e-05, "loss": 0.2144, "step": 3624, "teacher_loss": 0.18766288459300995 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.5948424339294434, "learning_rate": 1.572213387306636e-05, "loss": 0.2691, "step": 3625, "teacher_loss": 0.23289819061756134 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.5437354445457458, "learning_rate": 1.5726471013445133e-05, "loss": 0.2959, "step": 3626, "teacher_loss": 0.268393337726593 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.4256640076637268, "learning_rate": 1.573080815382391e-05, "loss": 0.2178, "step": 3627, "teacher_loss": 0.19470307230949402 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.6151233315467834, "learning_rate": 1.573514529420269e-05, "loss": 0.2822, "step": 3628, "teacher_loss": 0.24517272412776947 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.5243526697158813, "learning_rate": 1.5739482434581466e-05, "loss": 0.269, "step": 3629, "teacher_loss": 0.24066051840782166 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.32326650619506836, "learning_rate": 1.5743819574960244e-05, "loss": 0.1649, "step": 3630, "teacher_loss": 0.14732235670089722 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.48410725593566895, "learning_rate": 1.574815671533902e-05, "loss": 0.3168, "step": 3631, "teacher_loss": 0.2982001006603241 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.3797553777694702, "learning_rate": 1.57524938557178e-05, "loss": 0.1818, "step": 3632, "teacher_loss": 0.15977589786052704 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.42237675189971924, "learning_rate": 1.5756830996096573e-05, "loss": 0.3324, "step": 3633, "teacher_loss": 0.32245340943336487 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.1967628002166748, "learning_rate": 1.576116813647535e-05, "loss": 0.2138, "step": 3634, "teacher_loss": 0.21568498015403748 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.5049176216125488, "learning_rate": 1.5765505276854125e-05, "loss": 0.2319, "step": 3635, "teacher_loss": 0.20160309970378876 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.2041967511177063, "learning_rate": 1.5769842417232903e-05, "loss": 0.1761, "step": 3636, "teacher_loss": 0.17294825613498688 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.5666379928588867, "learning_rate": 1.577417955761168e-05, "loss": 0.2611, "step": 3637, "teacher_loss": 0.2271299809217453 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.8996134996414185, "learning_rate": 1.5778516697990458e-05, "loss": 0.3724, "step": 3638, "teacher_loss": 0.31387490034103394 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 1.3876512050628662, "learning_rate": 1.5782853838369236e-05, "loss": 0.4111, "step": 3639, "teacher_loss": 0.3026413917541504 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.637281596660614, "learning_rate": 1.5787190978748013e-05, "loss": 0.2142, "step": 3640, "teacher_loss": 0.16713929176330566 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.46090635657310486, "learning_rate": 1.579152811912679e-05, "loss": 0.2423, "step": 3641, "teacher_loss": 0.21799179911613464 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.35091036558151245, "learning_rate": 1.579586525950557e-05, "loss": 0.267, "step": 3642, "teacher_loss": 0.2576674818992615 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.2262699007987976, "learning_rate": 1.5800202399884346e-05, "loss": 0.2466, "step": 3643, "teacher_loss": 0.2488035261631012 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.346238374710083, "learning_rate": 1.5804539540263117e-05, "loss": 0.2416, "step": 3644, "teacher_loss": 0.22998301684856415 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.36392971873283386, "learning_rate": 1.5808876680641895e-05, "loss": 0.2901, "step": 3645, "teacher_loss": 0.281871497631073 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.514195442199707, "learning_rate": 1.5813213821020672e-05, "loss": 0.2338, "step": 3646, "teacher_loss": 0.20267260074615479 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.3243282437324524, "learning_rate": 1.581755096139945e-05, "loss": 0.2267, "step": 3647, "teacher_loss": 0.21585705876350403 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.5454914569854736, "learning_rate": 1.5821888101778228e-05, "loss": 0.3048, "step": 3648, "teacher_loss": 0.27808240056037903 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.4907207489013672, "learning_rate": 1.5826225242157005e-05, "loss": 0.1717, "step": 3649, "teacher_loss": 0.13626837730407715 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.49814796447753906, "learning_rate": 1.5830562382535783e-05, "loss": 0.2262, "step": 3650, "teacher_loss": 0.195997953414917 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.3613089323043823, "learning_rate": 1.583489952291456e-05, "loss": 0.1965, "step": 3651, "teacher_loss": 0.17814019322395325 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.3954395651817322, "learning_rate": 1.5839236663293338e-05, "loss": 0.2655, "step": 3652, "teacher_loss": 0.251077800989151 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.5293263792991638, "learning_rate": 1.5843573803672112e-05, "loss": 0.2742, "step": 3653, "teacher_loss": 0.24580731987953186 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.39331650733947754, "learning_rate": 1.584791094405089e-05, "loss": 0.2385, "step": 3654, "teacher_loss": 0.22134628891944885 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.28252631425857544, "learning_rate": 1.5852248084429664e-05, "loss": 0.2135, "step": 3655, "teacher_loss": 0.20582029223442078 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 1.1032443046569824, "learning_rate": 1.5856585224808442e-05, "loss": 0.3304, "step": 3656, "teacher_loss": 0.24451056122779846 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.19120267033576965, "learning_rate": 1.586092236518722e-05, "loss": 0.1815, "step": 3657, "teacher_loss": 0.1804046928882599 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.3292529582977295, "learning_rate": 1.5865259505565997e-05, "loss": 0.3004, "step": 3658, "teacher_loss": 0.2971589267253876 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.5031317472457886, "learning_rate": 1.5869596645944775e-05, "loss": 0.3029, "step": 3659, "teacher_loss": 0.2806471884250641 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.3349095582962036, "learning_rate": 1.5873933786323552e-05, "loss": 0.1937, "step": 3660, "teacher_loss": 0.17799094319343567 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.9139611721038818, "learning_rate": 1.587827092670233e-05, "loss": 0.3291, "step": 3661, "teacher_loss": 0.2641031742095947 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.6306778788566589, "learning_rate": 1.5882608067081104e-05, "loss": 0.4089, "step": 3662, "teacher_loss": 0.3842778503894806 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.29057514667510986, "learning_rate": 1.5886945207459882e-05, "loss": 0.2828, "step": 3663, "teacher_loss": 0.28189635276794434 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.40674033761024475, "learning_rate": 1.589128234783866e-05, "loss": 0.2813, "step": 3664, "teacher_loss": 0.2673141360282898 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.4503912031650543, "learning_rate": 1.5895619488217437e-05, "loss": 0.2392, "step": 3665, "teacher_loss": 0.21576957404613495 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.7979863882064819, "learning_rate": 1.589995662859621e-05, "loss": 0.2841, "step": 3666, "teacher_loss": 0.2270311415195465 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.3613271415233612, "learning_rate": 1.590429376897499e-05, "loss": 0.2886, "step": 3667, "teacher_loss": 0.28050917387008667 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.276175856590271, "learning_rate": 1.5908630909353767e-05, "loss": 0.2573, "step": 3668, "teacher_loss": 0.2552304267883301 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.17194274067878723, "learning_rate": 1.5912968049732544e-05, "loss": 0.1979, "step": 3669, "teacher_loss": 0.20079147815704346 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.43235665559768677, "learning_rate": 1.591730519011132e-05, "loss": 0.1966, "step": 3670, "teacher_loss": 0.1704035997390747 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.5696755647659302, "learning_rate": 1.5921642330490096e-05, "loss": 0.2245, "step": 3671, "teacher_loss": 0.18612641096115112 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.3324525058269501, "learning_rate": 1.5925979470868874e-05, "loss": 0.2732, "step": 3672, "teacher_loss": 0.2666308283805847 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.24366751313209534, "learning_rate": 1.593031661124765e-05, "loss": 0.2588, "step": 3673, "teacher_loss": 0.2605050504207611 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.3631001114845276, "learning_rate": 1.593465375162643e-05, "loss": 0.2284, "step": 3674, "teacher_loss": 0.2134379893541336 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.31601378321647644, "learning_rate": 1.5938990892005206e-05, "loss": 0.2063, "step": 3675, "teacher_loss": 0.19411563873291016 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.24740441143512726, "learning_rate": 1.5943328032383984e-05, "loss": 0.2328, "step": 3676, "teacher_loss": 0.2312251478433609 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.496989905834198, "learning_rate": 1.594766517276276e-05, "loss": 0.3291, "step": 3677, "teacher_loss": 0.31045952439308167 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.22733494639396667, "learning_rate": 1.5952002313141536e-05, "loss": 0.2471, "step": 3678, "teacher_loss": 0.24927252531051636 }, { "compression_loss": 0.0, "epoch": 0.66, "label_loss": 0.23540058732032776, "learning_rate": 1.595633945352031e-05, "loss": 0.2549, "step": 3679, "teacher_loss": 0.2570544481277466 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.40414029359817505, "learning_rate": 1.5960676593899088e-05, "loss": 0.2912, "step": 3680, "teacher_loss": 0.27861422300338745 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.5732398629188538, "learning_rate": 1.5965013734277865e-05, "loss": 0.349, "step": 3681, "teacher_loss": 0.3240451514720917 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.29784345626831055, "learning_rate": 1.5969350874656643e-05, "loss": 0.2169, "step": 3682, "teacher_loss": 0.20796120166778564 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.3988625407218933, "learning_rate": 1.597368801503542e-05, "loss": 0.2253, "step": 3683, "teacher_loss": 0.2060111165046692 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.3626754879951477, "learning_rate": 1.5978025155414198e-05, "loss": 0.1892, "step": 3684, "teacher_loss": 0.16995102167129517 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.17476633191108704, "learning_rate": 1.5982362295792976e-05, "loss": 0.1658, "step": 3685, "teacher_loss": 0.16474947333335876 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 1.1083035469055176, "learning_rate": 1.5986699436171754e-05, "loss": 0.339, "step": 3686, "teacher_loss": 0.2535526752471924 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.5120720267295837, "learning_rate": 1.599103657655053e-05, "loss": 0.2415, "step": 3687, "teacher_loss": 0.21138286590576172 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.31147801876068115, "learning_rate": 1.5995373716929302e-05, "loss": 0.2128, "step": 3688, "teacher_loss": 0.20180727541446686 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.2520235478878021, "learning_rate": 1.599971085730808e-05, "loss": 0.237, "step": 3689, "teacher_loss": 0.2353190779685974 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.6757015585899353, "learning_rate": 1.6004047997686857e-05, "loss": 0.2932, "step": 3690, "teacher_loss": 0.2507269084453583 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.32675647735595703, "learning_rate": 1.6008385138065635e-05, "loss": 0.2994, "step": 3691, "teacher_loss": 0.29632702469825745 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.49801209568977356, "learning_rate": 1.6012722278444413e-05, "loss": 0.303, "step": 3692, "teacher_loss": 0.28133079409599304 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.18499904870986938, "learning_rate": 1.601705941882319e-05, "loss": 0.2106, "step": 3693, "teacher_loss": 0.21348942816257477 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.43742239475250244, "learning_rate": 1.6021396559201968e-05, "loss": 0.2656, "step": 3694, "teacher_loss": 0.24650748074054718 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.47437185049057007, "learning_rate": 1.6025733699580745e-05, "loss": 0.2556, "step": 3695, "teacher_loss": 0.23133578896522522 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.28144145011901855, "learning_rate": 1.6030070839959523e-05, "loss": 0.2216, "step": 3696, "teacher_loss": 0.21491554379463196 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.698313295841217, "learning_rate": 1.6034407980338297e-05, "loss": 0.2697, "step": 3697, "teacher_loss": 0.22211724519729614 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.23900443315505981, "learning_rate": 1.6038745120717075e-05, "loss": 0.2505, "step": 3698, "teacher_loss": 0.2517843246459961 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.4733828008174896, "learning_rate": 1.604308226109585e-05, "loss": 0.2223, "step": 3699, "teacher_loss": 0.19442662596702576 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.6261241436004639, "learning_rate": 1.6047419401474627e-05, "loss": 0.3006, "step": 3700, "teacher_loss": 0.26447755098342896 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.7924472093582153, "learning_rate": 1.6051756541853404e-05, "loss": 0.352, "step": 3701, "teacher_loss": 0.3030155301094055 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.31122374534606934, "learning_rate": 1.6056093682232182e-05, "loss": 0.204, "step": 3702, "teacher_loss": 0.19211140275001526 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.428591251373291, "learning_rate": 1.606043082261096e-05, "loss": 0.2274, "step": 3703, "teacher_loss": 0.2050548642873764 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.4345467686653137, "learning_rate": 1.6064767962989737e-05, "loss": 0.1722, "step": 3704, "teacher_loss": 0.143045574426651 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.37999799847602844, "learning_rate": 1.6069105103368515e-05, "loss": 0.229, "step": 3705, "teacher_loss": 0.21225020289421082 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.27530303597450256, "learning_rate": 1.607344224374729e-05, "loss": 0.1973, "step": 3706, "teacher_loss": 0.18861901760101318 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.08717583864927292, "learning_rate": 1.6077779384126067e-05, "loss": 0.2106, "step": 3707, "teacher_loss": 0.2242676317691803 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.27346599102020264, "learning_rate": 1.6082116524504844e-05, "loss": 0.2096, "step": 3708, "teacher_loss": 0.20248793065547943 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.29856640100479126, "learning_rate": 1.6086453664883622e-05, "loss": 0.2314, "step": 3709, "teacher_loss": 0.2239629030227661 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.23751837015151978, "learning_rate": 1.6090790805262396e-05, "loss": 0.179, "step": 3710, "teacher_loss": 0.17245493829250336 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.24449267983436584, "learning_rate": 1.6095127945641174e-05, "loss": 0.1353, "step": 3711, "teacher_loss": 0.12312253564596176 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.22178930044174194, "learning_rate": 1.609946508601995e-05, "loss": 0.2283, "step": 3712, "teacher_loss": 0.22905665636062622 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 1.222865104675293, "learning_rate": 1.610380222639873e-05, "loss": 0.8242, "step": 3713, "teacher_loss": 0.7799502611160278 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.44414788484573364, "learning_rate": 1.6108139366777507e-05, "loss": 0.2449, "step": 3714, "teacher_loss": 0.22273987531661987 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.4587915539741516, "learning_rate": 1.611247650715628e-05, "loss": 0.2597, "step": 3715, "teacher_loss": 0.23758479952812195 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.21698248386383057, "learning_rate": 1.611681364753506e-05, "loss": 0.1828, "step": 3716, "teacher_loss": 0.1790435016155243 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.4231051206588745, "learning_rate": 1.6121150787913836e-05, "loss": 0.2469, "step": 3717, "teacher_loss": 0.2272929549217224 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.6544105410575867, "learning_rate": 1.6125487928292614e-05, "loss": 0.2112, "step": 3718, "teacher_loss": 0.1619209498167038 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.5293720364570618, "learning_rate": 1.612982506867139e-05, "loss": 0.2618, "step": 3719, "teacher_loss": 0.23203915357589722 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.3657592535018921, "learning_rate": 1.6134162209050166e-05, "loss": 0.2251, "step": 3720, "teacher_loss": 0.20950651168823242 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.5583572387695312, "learning_rate": 1.6138499349428943e-05, "loss": 0.4385, "step": 3721, "teacher_loss": 0.42514514923095703 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.38378602266311646, "learning_rate": 1.614283648980772e-05, "loss": 0.2203, "step": 3722, "teacher_loss": 0.2021283209323883 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.8183550834655762, "learning_rate": 1.6147173630186495e-05, "loss": 0.4684, "step": 3723, "teacher_loss": 0.42947232723236084 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.3264024257659912, "learning_rate": 1.6151510770565273e-05, "loss": 0.2316, "step": 3724, "teacher_loss": 0.22109563648700714 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.6472335457801819, "learning_rate": 1.615584791094405e-05, "loss": 0.3666, "step": 3725, "teacher_loss": 0.33537721633911133 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.27490532398223877, "learning_rate": 1.6160185051322828e-05, "loss": 0.2683, "step": 3726, "teacher_loss": 0.26755839586257935 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.8450971245765686, "learning_rate": 1.6164522191701606e-05, "loss": 0.2502, "step": 3727, "teacher_loss": 0.18413874506950378 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.5311121940612793, "learning_rate": 1.6168859332080383e-05, "loss": 0.3263, "step": 3728, "teacher_loss": 0.30357107520103455 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.3031958341598511, "learning_rate": 1.617319647245916e-05, "loss": 0.2363, "step": 3729, "teacher_loss": 0.2288408875465393 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.3416759669780731, "learning_rate": 1.617753361283794e-05, "loss": 0.1788, "step": 3730, "teacher_loss": 0.16065427660942078 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.4747071862220764, "learning_rate": 1.6181870753216713e-05, "loss": 0.2281, "step": 3731, "teacher_loss": 0.2007313072681427 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.5879024863243103, "learning_rate": 1.6186207893595487e-05, "loss": 0.2583, "step": 3732, "teacher_loss": 0.22163701057434082 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.34162527322769165, "learning_rate": 1.6190545033974265e-05, "loss": 0.2115, "step": 3733, "teacher_loss": 0.19700568914413452 }, { "compression_loss": 0.0, "epoch": 0.67, "label_loss": 0.24916669726371765, "learning_rate": 1.6194882174353042e-05, "loss": 0.3292, "step": 3734, "teacher_loss": 0.3380778431892395 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.5221686363220215, "learning_rate": 1.619921931473182e-05, "loss": 0.2508, "step": 3735, "teacher_loss": 0.22060123085975647 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.2968100309371948, "learning_rate": 1.6203556455110597e-05, "loss": 0.2629, "step": 3736, "teacher_loss": 0.2591715455055237 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.17533570528030396, "learning_rate": 1.6207893595489375e-05, "loss": 0.2154, "step": 3737, "teacher_loss": 0.21987247467041016 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.5514644384384155, "learning_rate": 1.6212230735868153e-05, "loss": 0.2708, "step": 3738, "teacher_loss": 0.23958361148834229 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.2269068956375122, "learning_rate": 1.621656787624693e-05, "loss": 0.2207, "step": 3739, "teacher_loss": 0.22006107866764069 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.2766556441783905, "learning_rate": 1.6220905016625708e-05, "loss": 0.2143, "step": 3740, "teacher_loss": 0.20733410120010376 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.3741043210029602, "learning_rate": 1.6225242157004482e-05, "loss": 0.2323, "step": 3741, "teacher_loss": 0.21649107336997986 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.5447278618812561, "learning_rate": 1.6229579297383256e-05, "loss": 0.2957, "step": 3742, "teacher_loss": 0.26798418164253235 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.8393168449401855, "learning_rate": 1.6233916437762034e-05, "loss": 0.273, "step": 3743, "teacher_loss": 0.21010853350162506 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.4853748679161072, "learning_rate": 1.623825357814081e-05, "loss": 0.2402, "step": 3744, "teacher_loss": 0.21292205154895782 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.5697771310806274, "learning_rate": 1.624259071851959e-05, "loss": 0.2511, "step": 3745, "teacher_loss": 0.21569518744945526 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.34384316205978394, "learning_rate": 1.6246927858898367e-05, "loss": 0.2566, "step": 3746, "teacher_loss": 0.24689540266990662 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.3202267587184906, "learning_rate": 1.6251264999277144e-05, "loss": 0.2682, "step": 3747, "teacher_loss": 0.2624393701553345 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.6127591729164124, "learning_rate": 1.6255602139655922e-05, "loss": 0.274, "step": 3748, "teacher_loss": 0.23634150624275208 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.5328792929649353, "learning_rate": 1.62599392800347e-05, "loss": 0.2923, "step": 3749, "teacher_loss": 0.2655397057533264 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.2766993045806885, "learning_rate": 1.6264276420413474e-05, "loss": 0.2154, "step": 3750, "teacher_loss": 0.20853738486766815 }, { "epoch": 0.68, "eval_exact_match": 79.80132450331126, "eval_f1": 87.25872130468825, "step": 3750 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.5422698259353638, "learning_rate": 1.626861356079225e-05, "loss": 0.3071, "step": 3751, "teacher_loss": 0.28099894523620605 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.18106874823570251, "learning_rate": 1.627295070117103e-05, "loss": 0.1843, "step": 3752, "teacher_loss": 0.184663325548172 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.4139200448989868, "learning_rate": 1.6277287841549803e-05, "loss": 0.2942, "step": 3753, "teacher_loss": 0.2808450758457184 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.3906204104423523, "learning_rate": 1.628162498192858e-05, "loss": 0.207, "step": 3754, "teacher_loss": 0.18658968806266785 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.5382830500602722, "learning_rate": 1.628596212230736e-05, "loss": 0.2549, "step": 3755, "teacher_loss": 0.22344517707824707 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.7961368560791016, "learning_rate": 1.6290299262686136e-05, "loss": 0.2384, "step": 3756, "teacher_loss": 0.17644289135932922 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.2446788251399994, "learning_rate": 1.6294636403064914e-05, "loss": 0.1743, "step": 3757, "teacher_loss": 0.16648009419441223 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.2495914101600647, "learning_rate": 1.629897354344369e-05, "loss": 0.3013, "step": 3758, "teacher_loss": 0.30702510476112366 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.3456244468688965, "learning_rate": 1.6303310683822466e-05, "loss": 0.215, "step": 3759, "teacher_loss": 0.2005200833082199 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.7934640645980835, "learning_rate": 1.6307647824201243e-05, "loss": 0.265, "step": 3760, "teacher_loss": 0.2062867283821106 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.6742236614227295, "learning_rate": 1.631198496458002e-05, "loss": 0.3627, "step": 3761, "teacher_loss": 0.3280717134475708 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.8747836351394653, "learning_rate": 1.63163221049588e-05, "loss": 0.3633, "step": 3762, "teacher_loss": 0.30644142627716064 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.2645156681537628, "learning_rate": 1.6320659245337576e-05, "loss": 0.2345, "step": 3763, "teacher_loss": 0.23113486170768738 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 1.065383791923523, "learning_rate": 1.632499638571635e-05, "loss": 0.4306, "step": 3764, "teacher_loss": 0.3600945472717285 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.49690085649490356, "learning_rate": 1.6329333526095128e-05, "loss": 0.2564, "step": 3765, "teacher_loss": 0.2296697348356247 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.42714372277259827, "learning_rate": 1.6333670666473906e-05, "loss": 0.4209, "step": 3766, "teacher_loss": 0.4202342629432678 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.8831572532653809, "learning_rate": 1.633800780685268e-05, "loss": 0.2951, "step": 3767, "teacher_loss": 0.22979632019996643 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.6498720049858093, "learning_rate": 1.6342344947231458e-05, "loss": 0.3911, "step": 3768, "teacher_loss": 0.36231428384780884 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.7952154874801636, "learning_rate": 1.6346682087610235e-05, "loss": 0.2841, "step": 3769, "teacher_loss": 0.22733429074287415 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.5044256448745728, "learning_rate": 1.6351019227989013e-05, "loss": 0.2529, "step": 3770, "teacher_loss": 0.22497303783893585 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.6302840709686279, "learning_rate": 1.635535636836779e-05, "loss": 0.3205, "step": 3771, "teacher_loss": 0.2860368490219116 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.46884235739707947, "learning_rate": 1.6359693508746568e-05, "loss": 0.2669, "step": 3772, "teacher_loss": 0.24443745613098145 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.2000223994255066, "learning_rate": 1.6364030649125346e-05, "loss": 0.1582, "step": 3773, "teacher_loss": 0.15359479188919067 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.6544150114059448, "learning_rate": 1.6368367789504123e-05, "loss": 0.3086, "step": 3774, "teacher_loss": 0.2701876759529114 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.4003007113933563, "learning_rate": 1.6372704929882898e-05, "loss": 0.2441, "step": 3775, "teacher_loss": 0.22669926285743713 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.6334882974624634, "learning_rate": 1.6377042070261672e-05, "loss": 0.2944, "step": 3776, "teacher_loss": 0.2566993832588196 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.49278002977371216, "learning_rate": 1.638137921064045e-05, "loss": 0.2544, "step": 3777, "teacher_loss": 0.2279680073261261 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.24746474623680115, "learning_rate": 1.6385716351019227e-05, "loss": 0.2567, "step": 3778, "teacher_loss": 0.257731556892395 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.43695637583732605, "learning_rate": 1.6390053491398005e-05, "loss": 0.2379, "step": 3779, "teacher_loss": 0.2158331274986267 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.3419543206691742, "learning_rate": 1.6394390631776782e-05, "loss": 0.2232, "step": 3780, "teacher_loss": 0.21001383662223816 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.47850456833839417, "learning_rate": 1.639872777215556e-05, "loss": 0.3036, "step": 3781, "teacher_loss": 0.2841114401817322 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.5205943584442139, "learning_rate": 1.6403064912534338e-05, "loss": 0.4213, "step": 3782, "teacher_loss": 0.4102362096309662 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.3042946457862854, "learning_rate": 1.6407402052913115e-05, "loss": 0.2381, "step": 3783, "teacher_loss": 0.2307826578617096 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.14177286624908447, "learning_rate": 1.6411739193291893e-05, "loss": 0.2254, "step": 3784, "teacher_loss": 0.23466037213802338 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.27628758549690247, "learning_rate": 1.6416076333670667e-05, "loss": 0.235, "step": 3785, "teacher_loss": 0.23043525218963623 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.5149080753326416, "learning_rate": 1.642041347404944e-05, "loss": 0.2615, "step": 3786, "teacher_loss": 0.2333323061466217 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.4056503176689148, "learning_rate": 1.642475061442822e-05, "loss": 0.2658, "step": 3787, "teacher_loss": 0.250255823135376 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.37076690793037415, "learning_rate": 1.6429087754806996e-05, "loss": 0.2991, "step": 3788, "teacher_loss": 0.29112833738327026 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.6789429187774658, "learning_rate": 1.6433424895185774e-05, "loss": 0.2476, "step": 3789, "teacher_loss": 0.1996225267648697 }, { "compression_loss": 0.0, "epoch": 0.68, "label_loss": 0.35509172081947327, "learning_rate": 1.6437762035564552e-05, "loss": 0.2053, "step": 3790, "teacher_loss": 0.1886591911315918 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.29993876814842224, "learning_rate": 1.644209917594333e-05, "loss": 0.2615, "step": 3791, "teacher_loss": 0.2572597861289978 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.27062827348709106, "learning_rate": 1.6446436316322107e-05, "loss": 0.2046, "step": 3792, "teacher_loss": 0.19727930426597595 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.4060091972351074, "learning_rate": 1.6450773456700885e-05, "loss": 0.2204, "step": 3793, "teacher_loss": 0.19974222779273987 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.3982829451560974, "learning_rate": 1.645511059707966e-05, "loss": 0.2433, "step": 3794, "teacher_loss": 0.22608813643455505 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.5104777812957764, "learning_rate": 1.6459447737458436e-05, "loss": 0.2382, "step": 3795, "teacher_loss": 0.2079637348651886 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.19846825301647186, "learning_rate": 1.6463784877837214e-05, "loss": 0.2074, "step": 3796, "teacher_loss": 0.20840394496917725 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.32102543115615845, "learning_rate": 1.6468122018215988e-05, "loss": 0.226, "step": 3797, "teacher_loss": 0.21544566750526428 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.2557727098464966, "learning_rate": 1.6472459158594766e-05, "loss": 0.218, "step": 3798, "teacher_loss": 0.21380352973937988 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.33325809240341187, "learning_rate": 1.6476796298973544e-05, "loss": 0.2319, "step": 3799, "teacher_loss": 0.2206037938594818 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.7315468788146973, "learning_rate": 1.648113343935232e-05, "loss": 0.3161, "step": 3800, "teacher_loss": 0.2699926495552063 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.4144538938999176, "learning_rate": 1.64854705797311e-05, "loss": 0.212, "step": 3801, "teacher_loss": 0.1895418018102646 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.2488737404346466, "learning_rate": 1.6489807720109876e-05, "loss": 0.1737, "step": 3802, "teacher_loss": 0.16532915830612183 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.3568645119667053, "learning_rate": 1.649414486048865e-05, "loss": 0.1975, "step": 3803, "teacher_loss": 0.1798364520072937 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.5763231515884399, "learning_rate": 1.6498482000867428e-05, "loss": 0.2668, "step": 3804, "teacher_loss": 0.23243044316768646 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.7315685153007507, "learning_rate": 1.6502819141246206e-05, "loss": 0.2342, "step": 3805, "teacher_loss": 0.1789829134941101 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.40255996584892273, "learning_rate": 1.6507156281624983e-05, "loss": 0.2568, "step": 3806, "teacher_loss": 0.24063995480537415 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.49115222692489624, "learning_rate": 1.651149342200376e-05, "loss": 0.2241, "step": 3807, "teacher_loss": 0.1944597363471985 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.24985545873641968, "learning_rate": 1.6515830562382535e-05, "loss": 0.2956, "step": 3808, "teacher_loss": 0.30067986249923706 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.17164045572280884, "learning_rate": 1.6520167702761313e-05, "loss": 0.1657, "step": 3809, "teacher_loss": 0.16505427658557892 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.5039825439453125, "learning_rate": 1.652450484314009e-05, "loss": 0.2691, "step": 3810, "teacher_loss": 0.2430124431848526 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.34272128343582153, "learning_rate": 1.6528841983518865e-05, "loss": 0.2189, "step": 3811, "teacher_loss": 0.20516183972358704 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.36600738763809204, "learning_rate": 1.6533179123897642e-05, "loss": 0.2262, "step": 3812, "teacher_loss": 0.21065667271614075 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.5470266342163086, "learning_rate": 1.653751626427642e-05, "loss": 0.3718, "step": 3813, "teacher_loss": 0.3523100018501282 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.27601104974746704, "learning_rate": 1.6541853404655198e-05, "loss": 0.1888, "step": 3814, "teacher_loss": 0.17915800213813782 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.37931209802627563, "learning_rate": 1.6546190545033975e-05, "loss": 0.2098, "step": 3815, "teacher_loss": 0.19093045592308044 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.3809993267059326, "learning_rate": 1.6550527685412753e-05, "loss": 0.2825, "step": 3816, "teacher_loss": 0.27150198817253113 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.4369770288467407, "learning_rate": 1.655486482579153e-05, "loss": 0.2191, "step": 3817, "teacher_loss": 0.19484980404376984 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.39471060037612915, "learning_rate": 1.6559201966170305e-05, "loss": 0.2919, "step": 3818, "teacher_loss": 0.2804563045501709 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.5885171294212341, "learning_rate": 1.6563539106549082e-05, "loss": 0.2662, "step": 3819, "teacher_loss": 0.2304316759109497 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.3859420120716095, "learning_rate": 1.6567876246927857e-05, "loss": 0.2486, "step": 3820, "teacher_loss": 0.2333768606185913 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.8798807859420776, "learning_rate": 1.6572213387306634e-05, "loss": 0.2929, "step": 3821, "teacher_loss": 0.22762683033943176 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.2645268738269806, "learning_rate": 1.6576550527685412e-05, "loss": 0.14, "step": 3822, "teacher_loss": 0.1261518895626068 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.2211216688156128, "learning_rate": 1.658088766806419e-05, "loss": 0.2434, "step": 3823, "teacher_loss": 0.24587324261665344 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.20005196332931519, "learning_rate": 1.6585224808442967e-05, "loss": 0.215, "step": 3824, "teacher_loss": 0.2166530042886734 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.33525094389915466, "learning_rate": 1.6589561948821745e-05, "loss": 0.1988, "step": 3825, "teacher_loss": 0.18364958465099335 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.6016664505004883, "learning_rate": 1.6593899089200522e-05, "loss": 0.2154, "step": 3826, "teacher_loss": 0.17246496677398682 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.6605978608131409, "learning_rate": 1.65982362295793e-05, "loss": 0.3174, "step": 3827, "teacher_loss": 0.2792982757091522 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.8585014343261719, "learning_rate": 1.6602573369958078e-05, "loss": 0.333, "step": 3828, "teacher_loss": 0.27466362714767456 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.3495725989341736, "learning_rate": 1.660691051033685e-05, "loss": 0.1991, "step": 3829, "teacher_loss": 0.1823853999376297 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 1.454666018486023, "learning_rate": 1.6611247650715626e-05, "loss": 0.377, "step": 3830, "teacher_loss": 0.2572720944881439 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.5205580592155457, "learning_rate": 1.6615584791094404e-05, "loss": 0.3385, "step": 3831, "teacher_loss": 0.31829530000686646 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.279083788394928, "learning_rate": 1.661992193147318e-05, "loss": 0.2292, "step": 3832, "teacher_loss": 0.22361242771148682 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.5379554629325867, "learning_rate": 1.662425907185196e-05, "loss": 0.3581, "step": 3833, "teacher_loss": 0.33814409375190735 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.4368254244327545, "learning_rate": 1.6628596212230737e-05, "loss": 0.2082, "step": 3834, "teacher_loss": 0.1828201562166214 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.39830639958381653, "learning_rate": 1.6632933352609514e-05, "loss": 0.2783, "step": 3835, "teacher_loss": 0.2650046944618225 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.24708837270736694, "learning_rate": 1.6637270492988292e-05, "loss": 0.2415, "step": 3836, "teacher_loss": 0.24087324738502502 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.3038734197616577, "learning_rate": 1.664160763336707e-05, "loss": 0.2703, "step": 3837, "teacher_loss": 0.26660460233688354 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.3258134126663208, "learning_rate": 1.6645944773745844e-05, "loss": 0.3155, "step": 3838, "teacher_loss": 0.31432080268859863 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.25600236654281616, "learning_rate": 1.665028191412462e-05, "loss": 0.3679, "step": 3839, "teacher_loss": 0.3803739547729492 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.31062108278274536, "learning_rate": 1.6654619054503396e-05, "loss": 0.2346, "step": 3840, "teacher_loss": 0.22618205845355988 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.4096793532371521, "learning_rate": 1.6658956194882173e-05, "loss": 0.2591, "step": 3841, "teacher_loss": 0.24235542118549347 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.39860469102859497, "learning_rate": 1.666329333526095e-05, "loss": 0.2758, "step": 3842, "teacher_loss": 0.2621324062347412 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.9226051568984985, "learning_rate": 1.666763047563973e-05, "loss": 0.3211, "step": 3843, "teacher_loss": 0.2542799115180969 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.5054365992546082, "learning_rate": 1.6671967616018506e-05, "loss": 0.209, "step": 3844, "teacher_loss": 0.17602741718292236 }, { "compression_loss": 0.0, "epoch": 0.69, "label_loss": 0.29463323950767517, "learning_rate": 1.6676304756397284e-05, "loss": 0.2589, "step": 3845, "teacher_loss": 0.25491857528686523 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5439555644989014, "learning_rate": 1.668064189677606e-05, "loss": 0.2678, "step": 3846, "teacher_loss": 0.2370859682559967 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.44368356466293335, "learning_rate": 1.6684979037154836e-05, "loss": 0.2363, "step": 3847, "teacher_loss": 0.2132757157087326 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5373855829238892, "learning_rate": 1.6689316177533613e-05, "loss": 0.2403, "step": 3848, "teacher_loss": 0.2072800248861313 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.1720067858695984, "learning_rate": 1.669365331791239e-05, "loss": 0.1374, "step": 3849, "teacher_loss": 0.1335633397102356 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.36526960134506226, "learning_rate": 1.669799045829117e-05, "loss": 0.2658, "step": 3850, "teacher_loss": 0.25472697615623474 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.3932795226573944, "learning_rate": 1.6702327598669943e-05, "loss": 0.2397, "step": 3851, "teacher_loss": 0.22262638807296753 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5964947938919067, "learning_rate": 1.670666473904872e-05, "loss": 0.3338, "step": 3852, "teacher_loss": 0.30466634035110474 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.3706209063529968, "learning_rate": 1.6711001879427498e-05, "loss": 0.2018, "step": 3853, "teacher_loss": 0.18306578695774078 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5303115844726562, "learning_rate": 1.6715339019806275e-05, "loss": 0.2795, "step": 3854, "teacher_loss": 0.25157755613327026 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5843782424926758, "learning_rate": 1.6719676160185053e-05, "loss": 0.2644, "step": 3855, "teacher_loss": 0.22886034846305847 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.3444828987121582, "learning_rate": 1.6724013300563827e-05, "loss": 0.3409, "step": 3856, "teacher_loss": 0.34050512313842773 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.27396342158317566, "learning_rate": 1.6728350440942605e-05, "loss": 0.1724, "step": 3857, "teacher_loss": 0.16109535098075867 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.6663780212402344, "learning_rate": 1.6732687581321383e-05, "loss": 0.4939, "step": 3858, "teacher_loss": 0.47471052408218384 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.48984646797180176, "learning_rate": 1.673702472170016e-05, "loss": 0.229, "step": 3859, "teacher_loss": 0.20005908608436584 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.6088159680366516, "learning_rate": 1.6741361862078938e-05, "loss": 0.2461, "step": 3860, "teacher_loss": 0.2058192491531372 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.18091699481010437, "learning_rate": 1.6745699002457715e-05, "loss": 0.1785, "step": 3861, "teacher_loss": 0.17825454473495483 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.16881585121154785, "learning_rate": 1.675003614283649e-05, "loss": 0.1912, "step": 3862, "teacher_loss": 0.1937391310930252 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.25088709592819214, "learning_rate": 1.6754373283215267e-05, "loss": 0.2879, "step": 3863, "teacher_loss": 0.29205167293548584 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.41045036911964417, "learning_rate": 1.675871042359404e-05, "loss": 0.2649, "step": 3864, "teacher_loss": 0.24874994158744812 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.12417268753051758, "learning_rate": 1.676304756397282e-05, "loss": 0.1968, "step": 3865, "teacher_loss": 0.20484042167663574 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.2121150940656662, "learning_rate": 1.6767384704351597e-05, "loss": 0.2203, "step": 3866, "teacher_loss": 0.22119294106960297 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.9646523594856262, "learning_rate": 1.6771721844730374e-05, "loss": 0.2763, "step": 3867, "teacher_loss": 0.1998457908630371 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.3803498148918152, "learning_rate": 1.6776058985109152e-05, "loss": 0.2534, "step": 3868, "teacher_loss": 0.23930513858795166 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.7884403467178345, "learning_rate": 1.678039612548793e-05, "loss": 0.3291, "step": 3869, "teacher_loss": 0.27804964780807495 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.3104196786880493, "learning_rate": 1.6784733265866707e-05, "loss": 0.2356, "step": 3870, "teacher_loss": 0.2272539734840393 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.1442958265542984, "learning_rate": 1.6789070406245485e-05, "loss": 0.2038, "step": 3871, "teacher_loss": 0.21043211221694946 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5211657285690308, "learning_rate": 1.6793407546624263e-05, "loss": 0.2457, "step": 3872, "teacher_loss": 0.21509389579296112 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.413181871175766, "learning_rate": 1.6797744687003033e-05, "loss": 0.3, "step": 3873, "teacher_loss": 0.28739339113235474 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.69828200340271, "learning_rate": 1.680208182738181e-05, "loss": 0.3273, "step": 3874, "teacher_loss": 0.286125510931015 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.3417855203151703, "learning_rate": 1.680641896776059e-05, "loss": 0.2462, "step": 3875, "teacher_loss": 0.23559951782226562 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.19873076677322388, "learning_rate": 1.6810756108139366e-05, "loss": 0.1843, "step": 3876, "teacher_loss": 0.18274015188217163 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.44638437032699585, "learning_rate": 1.6815093248518144e-05, "loss": 0.1962, "step": 3877, "teacher_loss": 0.1683947741985321 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.25737982988357544, "learning_rate": 1.681943038889692e-05, "loss": 0.2638, "step": 3878, "teacher_loss": 0.26449745893478394 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.4711773991584778, "learning_rate": 1.68237675292757e-05, "loss": 0.2916, "step": 3879, "teacher_loss": 0.2716251313686371 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.3741251230239868, "learning_rate": 1.6828104669654477e-05, "loss": 0.2083, "step": 3880, "teacher_loss": 0.18984462320804596 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.41397538781166077, "learning_rate": 1.6832441810033254e-05, "loss": 0.2108, "step": 3881, "teacher_loss": 0.1881740689277649 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.3928160071372986, "learning_rate": 1.683677895041203e-05, "loss": 0.232, "step": 3882, "teacher_loss": 0.21408069133758545 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.6389734148979187, "learning_rate": 1.6841116090790806e-05, "loss": 0.2732, "step": 3883, "teacher_loss": 0.2325562685728073 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.7795952558517456, "learning_rate": 1.684545323116958e-05, "loss": 0.3268, "step": 3884, "teacher_loss": 0.27649807929992676 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.6031370162963867, "learning_rate": 1.6849790371548358e-05, "loss": 0.294, "step": 3885, "teacher_loss": 0.259596586227417 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5631281137466431, "learning_rate": 1.6854127511927136e-05, "loss": 0.2587, "step": 3886, "teacher_loss": 0.22484168410301208 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.6453170776367188, "learning_rate": 1.6858464652305913e-05, "loss": 0.2936, "step": 3887, "teacher_loss": 0.25451862812042236 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.17686045169830322, "learning_rate": 1.686280179268469e-05, "loss": 0.2089, "step": 3888, "teacher_loss": 0.21243956685066223 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5445213913917542, "learning_rate": 1.686713893306347e-05, "loss": 0.3213, "step": 3889, "teacher_loss": 0.29651808738708496 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.6258115768432617, "learning_rate": 1.6871476073442246e-05, "loss": 0.2448, "step": 3890, "teacher_loss": 0.2024383544921875 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.18060241639614105, "learning_rate": 1.687581321382102e-05, "loss": 0.2, "step": 3891, "teacher_loss": 0.20210134983062744 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5958251953125, "learning_rate": 1.6880150354199798e-05, "loss": 0.24, "step": 3892, "teacher_loss": 0.20042569935321808 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5163853168487549, "learning_rate": 1.6884487494578576e-05, "loss": 0.2714, "step": 3893, "teacher_loss": 0.2442055642604828 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.8214160203933716, "learning_rate": 1.6888824634957353e-05, "loss": 0.3049, "step": 3894, "teacher_loss": 0.24752146005630493 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.3378959894180298, "learning_rate": 1.6893161775336127e-05, "loss": 0.1907, "step": 3895, "teacher_loss": 0.17437592148780823 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5107637047767639, "learning_rate": 1.6897498915714905e-05, "loss": 0.2255, "step": 3896, "teacher_loss": 0.19381266832351685 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.44685596227645874, "learning_rate": 1.6901836056093683e-05, "loss": 0.2186, "step": 3897, "teacher_loss": 0.1932271122932434 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.22791126370429993, "learning_rate": 1.690617319647246e-05, "loss": 0.301, "step": 3898, "teacher_loss": 0.3091421127319336 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.8121433854103088, "learning_rate": 1.6910510336851238e-05, "loss": 0.2013, "step": 3899, "teacher_loss": 0.13345089554786682 }, { "compression_loss": 0.0, "epoch": 0.7, "label_loss": 0.5818511247634888, "learning_rate": 1.6914847477230012e-05, "loss": 0.2323, "step": 3900, "teacher_loss": 0.19344978034496307 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.26719602942466736, "learning_rate": 1.691918461760879e-05, "loss": 0.2449, "step": 3901, "teacher_loss": 0.24246114492416382 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.5554804801940918, "learning_rate": 1.6923521757987567e-05, "loss": 0.2934, "step": 3902, "teacher_loss": 0.26430991291999817 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.3490039110183716, "learning_rate": 1.6927858898366345e-05, "loss": 0.2409, "step": 3903, "teacher_loss": 0.2288811206817627 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.40259408950805664, "learning_rate": 1.6932196038745123e-05, "loss": 0.2663, "step": 3904, "teacher_loss": 0.25113362073898315 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.2171630859375, "learning_rate": 1.69365331791239e-05, "loss": 0.3032, "step": 3905, "teacher_loss": 0.3127533495426178 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.6709685325622559, "learning_rate": 1.6940870319502675e-05, "loss": 0.224, "step": 3906, "teacher_loss": 0.17436185479164124 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.3068908452987671, "learning_rate": 1.6945207459881452e-05, "loss": 0.308, "step": 3907, "teacher_loss": 0.3080999255180359 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.44132161140441895, "learning_rate": 1.6949544600260226e-05, "loss": 0.2075, "step": 3908, "teacher_loss": 0.18150848150253296 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.6858330965042114, "learning_rate": 1.6953881740639004e-05, "loss": 0.3212, "step": 3909, "teacher_loss": 0.28069499135017395 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.48784172534942627, "learning_rate": 1.695821888101778e-05, "loss": 0.2352, "step": 3910, "teacher_loss": 0.20714910328388214 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.8955060243606567, "learning_rate": 1.696255602139656e-05, "loss": 0.2575, "step": 3911, "teacher_loss": 0.18657651543617249 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.4782736897468567, "learning_rate": 1.6966893161775337e-05, "loss": 0.287, "step": 3912, "teacher_loss": 0.26575610041618347 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.5863543152809143, "learning_rate": 1.6971230302154115e-05, "loss": 0.2682, "step": 3913, "teacher_loss": 0.23284326493740082 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.5234898924827576, "learning_rate": 1.6975567442532892e-05, "loss": 0.2985, "step": 3914, "teacher_loss": 0.2735252380371094 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.3895675539970398, "learning_rate": 1.697990458291167e-05, "loss": 0.2825, "step": 3915, "teacher_loss": 0.27055951952934265 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.4092468321323395, "learning_rate": 1.6984241723290444e-05, "loss": 0.3383, "step": 3916, "teacher_loss": 0.33041566610336304 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.6894538998603821, "learning_rate": 1.6988578863669218e-05, "loss": 0.3198, "step": 3917, "teacher_loss": 0.2786828875541687 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.5031434297561646, "learning_rate": 1.6992916004047996e-05, "loss": 0.1988, "step": 3918, "teacher_loss": 0.1650250256061554 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.40064242482185364, "learning_rate": 1.6997253144426773e-05, "loss": 0.2786, "step": 3919, "teacher_loss": 0.26506125926971436 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.554537296295166, "learning_rate": 1.700159028480555e-05, "loss": 0.3811, "step": 3920, "teacher_loss": 0.3617980182170868 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.5182766318321228, "learning_rate": 1.700592742518433e-05, "loss": 0.312, "step": 3921, "teacher_loss": 0.2890707552433014 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.13691446185112, "learning_rate": 1.7010264565563106e-05, "loss": 0.237, "step": 3922, "teacher_loss": 0.24807268381118774 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.09030114114284515, "learning_rate": 1.7014601705941884e-05, "loss": 0.1685, "step": 3923, "teacher_loss": 0.1771407276391983 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.33797401189804077, "learning_rate": 1.701893884632066e-05, "loss": 0.2142, "step": 3924, "teacher_loss": 0.20049187541007996 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 1.1636279821395874, "learning_rate": 1.702327598669944e-05, "loss": 0.3683, "step": 3925, "teacher_loss": 0.27993345260620117 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.4771818220615387, "learning_rate": 1.7027613127078213e-05, "loss": 0.2436, "step": 3926, "teacher_loss": 0.21764010190963745 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.5608323812484741, "learning_rate": 1.7031950267456988e-05, "loss": 0.2476, "step": 3927, "teacher_loss": 0.21278738975524902 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.5386644601821899, "learning_rate": 1.7036287407835765e-05, "loss": 0.2293, "step": 3928, "teacher_loss": 0.19491825997829437 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.30903929471969604, "learning_rate": 1.7040624548214543e-05, "loss": 0.2177, "step": 3929, "teacher_loss": 0.20751143991947174 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.42455366253852844, "learning_rate": 1.704496168859332e-05, "loss": 0.1805, "step": 3930, "teacher_loss": 0.15340778231620789 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.527298629283905, "learning_rate": 1.7049298828972098e-05, "loss": 0.2276, "step": 3931, "teacher_loss": 0.19428777694702148 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.2847594916820526, "learning_rate": 1.7053635969350876e-05, "loss": 0.2124, "step": 3932, "teacher_loss": 0.2043876051902771 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.5373703837394714, "learning_rate": 1.7057973109729653e-05, "loss": 0.2624, "step": 3933, "teacher_loss": 0.23188257217407227 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.4049030542373657, "learning_rate": 1.706231025010843e-05, "loss": 0.315, "step": 3934, "teacher_loss": 0.3050132989883423 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.4367401897907257, "learning_rate": 1.7066647390487205e-05, "loss": 0.2255, "step": 3935, "teacher_loss": 0.20203456282615662 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.2832879424095154, "learning_rate": 1.7070984530865983e-05, "loss": 0.221, "step": 3936, "teacher_loss": 0.21404916048049927 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.5835138559341431, "learning_rate": 1.707532167124476e-05, "loss": 0.2519, "step": 3937, "teacher_loss": 0.21507278084754944 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.644747257232666, "learning_rate": 1.7079658811623535e-05, "loss": 0.2807, "step": 3938, "teacher_loss": 0.24024377763271332 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.34050512313842773, "learning_rate": 1.7083995952002312e-05, "loss": 0.3425, "step": 3939, "teacher_loss": 0.34271591901779175 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.32466745376586914, "learning_rate": 1.708833309238109e-05, "loss": 0.1948, "step": 3940, "teacher_loss": 0.1804220974445343 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.45505404472351074, "learning_rate": 1.7092670232759868e-05, "loss": 0.3913, "step": 3941, "teacher_loss": 0.3842456042766571 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.49860769510269165, "learning_rate": 1.7097007373138645e-05, "loss": 0.2175, "step": 3942, "teacher_loss": 0.18628600239753723 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.3566739559173584, "learning_rate": 1.7101344513517423e-05, "loss": 0.1973, "step": 3943, "teacher_loss": 0.17964181303977966 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.5710713863372803, "learning_rate": 1.7105681653896197e-05, "loss": 0.2433, "step": 3944, "teacher_loss": 0.2068319469690323 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.6545693874359131, "learning_rate": 1.7110018794274975e-05, "loss": 0.3603, "step": 3945, "teacher_loss": 0.3275928497314453 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.1751236617565155, "learning_rate": 1.7114355934653752e-05, "loss": 0.2148, "step": 3946, "teacher_loss": 0.21917343139648438 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.3753398060798645, "learning_rate": 1.711869307503253e-05, "loss": 0.2433, "step": 3947, "teacher_loss": 0.2286595106124878 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.2913745641708374, "learning_rate": 1.7123030215411308e-05, "loss": 0.2641, "step": 3948, "teacher_loss": 0.2610868215560913 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.7679098844528198, "learning_rate": 1.7127367355790082e-05, "loss": 0.3244, "step": 3949, "teacher_loss": 0.2750801742076874 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.7221739292144775, "learning_rate": 1.713170449616886e-05, "loss": 0.437, "step": 3950, "teacher_loss": 0.4052681624889374 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.40543872117996216, "learning_rate": 1.7136041636547637e-05, "loss": 0.2897, "step": 3951, "teacher_loss": 0.2768423557281494 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.36040055751800537, "learning_rate": 1.714037877692641e-05, "loss": 0.2737, "step": 3952, "teacher_loss": 0.2640827000141144 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.32016587257385254, "learning_rate": 1.714471591730519e-05, "loss": 0.2422, "step": 3953, "teacher_loss": 0.23359136283397675 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.41428905725479126, "learning_rate": 1.7149053057683967e-05, "loss": 0.2175, "step": 3954, "teacher_loss": 0.19563668966293335 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.4965195059776306, "learning_rate": 1.7153390198062744e-05, "loss": 0.2791, "step": 3955, "teacher_loss": 0.2549506723880768 }, { "compression_loss": 0.0, "epoch": 0.71, "label_loss": 0.11466330289840698, "learning_rate": 1.7157727338441522e-05, "loss": 0.1659, "step": 3956, "teacher_loss": 0.171542227268219 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.26543858647346497, "learning_rate": 1.71620644788203e-05, "loss": 0.181, "step": 3957, "teacher_loss": 0.17160111665725708 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.44704151153564453, "learning_rate": 1.7166401619199077e-05, "loss": 0.2407, "step": 3958, "teacher_loss": 0.21775957942008972 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.23912313580513, "learning_rate": 1.7170738759577855e-05, "loss": 0.1673, "step": 3959, "teacher_loss": 0.15928784012794495 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.5461708903312683, "learning_rate": 1.717507589995663e-05, "loss": 0.2266, "step": 3960, "teacher_loss": 0.19106240570545197 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.6859482526779175, "learning_rate": 1.7179413040335403e-05, "loss": 0.3065, "step": 3961, "teacher_loss": 0.2643158435821533 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.3557426929473877, "learning_rate": 1.718375018071418e-05, "loss": 0.1966, "step": 3962, "teacher_loss": 0.1788983941078186 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.2776501178741455, "learning_rate": 1.718808732109296e-05, "loss": 0.2024, "step": 3963, "teacher_loss": 0.19399815797805786 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.19211947917938232, "learning_rate": 1.7192424461471736e-05, "loss": 0.153, "step": 3964, "teacher_loss": 0.14860612154006958 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.7693924307823181, "learning_rate": 1.7196761601850514e-05, "loss": 0.2738, "step": 3965, "teacher_loss": 0.21872790157794952 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.5709037780761719, "learning_rate": 1.720109874222929e-05, "loss": 0.2598, "step": 3966, "teacher_loss": 0.22521045804023743 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.3617135286331177, "learning_rate": 1.720543588260807e-05, "loss": 0.1982, "step": 3967, "teacher_loss": 0.18006360530853271 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.5754263401031494, "learning_rate": 1.7209773022986846e-05, "loss": 0.3296, "step": 3968, "teacher_loss": 0.30225008726119995 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.3671974837779999, "learning_rate": 1.7214110163365624e-05, "loss": 0.2475, "step": 3969, "teacher_loss": 0.23424479365348816 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.7586290836334229, "learning_rate": 1.72184473037444e-05, "loss": 0.2548, "step": 3970, "teacher_loss": 0.1987859606742859 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.5211168527603149, "learning_rate": 1.7222784444123173e-05, "loss": 0.2696, "step": 3971, "teacher_loss": 0.241624116897583 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.19971995055675507, "learning_rate": 1.722712158450195e-05, "loss": 0.1787, "step": 3972, "teacher_loss": 0.1763882339000702 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.7566083669662476, "learning_rate": 1.7231458724880728e-05, "loss": 0.2395, "step": 3973, "teacher_loss": 0.18208879232406616 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.13347968459129333, "learning_rate": 1.7235795865259505e-05, "loss": 0.1625, "step": 3974, "teacher_loss": 0.16568827629089355 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.38059964776039124, "learning_rate": 1.7240133005638283e-05, "loss": 0.2691, "step": 3975, "teacher_loss": 0.2567032277584076 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.5140262246131897, "learning_rate": 1.724447014601706e-05, "loss": 0.2827, "step": 3976, "teacher_loss": 0.256994366645813 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.8959448337554932, "learning_rate": 1.7248807286395838e-05, "loss": 0.288, "step": 3977, "teacher_loss": 0.22042669355869293 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.20358942449092865, "learning_rate": 1.7253144426774616e-05, "loss": 0.1868, "step": 3978, "teacher_loss": 0.18489934504032135 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.4068831205368042, "learning_rate": 1.725748156715339e-05, "loss": 0.3297, "step": 3979, "teacher_loss": 0.32108765840530396 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.2756415605545044, "learning_rate": 1.7261818707532168e-05, "loss": 0.2931, "step": 3980, "teacher_loss": 0.2950747013092041 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.3574574291706085, "learning_rate": 1.7266155847910945e-05, "loss": 0.2402, "step": 3981, "teacher_loss": 0.22721371054649353 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.5514946579933167, "learning_rate": 1.727049298828972e-05, "loss": 0.2963, "step": 3982, "teacher_loss": 0.2679723799228668 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.8199466466903687, "learning_rate": 1.7274830128668497e-05, "loss": 0.2943, "step": 3983, "teacher_loss": 0.23588624596595764 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.3846074938774109, "learning_rate": 1.7279167269047275e-05, "loss": 0.3388, "step": 3984, "teacher_loss": 0.33373701572418213 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.729701578617096, "learning_rate": 1.7283504409426052e-05, "loss": 0.3053, "step": 3985, "teacher_loss": 0.2581731081008911 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.5193918943405151, "learning_rate": 1.728784154980483e-05, "loss": 0.2593, "step": 3986, "teacher_loss": 0.2303738296031952 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 1.0310242176055908, "learning_rate": 1.7292178690183608e-05, "loss": 0.3085, "step": 3987, "teacher_loss": 0.22822466492652893 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.33354344964027405, "learning_rate": 1.7296515830562382e-05, "loss": 0.2206, "step": 3988, "teacher_loss": 0.20810367166996002 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.38283175230026245, "learning_rate": 1.730085297094116e-05, "loss": 0.2679, "step": 3989, "teacher_loss": 0.25515997409820557 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.5748469233512878, "learning_rate": 1.7305190111319937e-05, "loss": 0.2861, "step": 3990, "teacher_loss": 0.25400805473327637 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.45908045768737793, "learning_rate": 1.7309527251698715e-05, "loss": 0.2959, "step": 3991, "teacher_loss": 0.27780085802078247 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.4164584279060364, "learning_rate": 1.7313864392077492e-05, "loss": 0.2771, "step": 3992, "teacher_loss": 0.2615690529346466 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.5435278415679932, "learning_rate": 1.7318201532456267e-05, "loss": 0.3015, "step": 3993, "teacher_loss": 0.27455759048461914 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.4714326858520508, "learning_rate": 1.7322538672835044e-05, "loss": 0.2458, "step": 3994, "teacher_loss": 0.22067826986312866 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.46516501903533936, "learning_rate": 1.7326875813213822e-05, "loss": 0.3276, "step": 3995, "teacher_loss": 0.3123038411140442 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.44686880707740784, "learning_rate": 1.7331212953592596e-05, "loss": 0.2469, "step": 3996, "teacher_loss": 0.22468581795692444 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.46535807847976685, "learning_rate": 1.7335550093971374e-05, "loss": 0.1952, "step": 3997, "teacher_loss": 0.16521766781806946 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.34706300497055054, "learning_rate": 1.733988723435015e-05, "loss": 0.2445, "step": 3998, "teacher_loss": 0.2331579178571701 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.3269864618778229, "learning_rate": 1.734422437472893e-05, "loss": 0.2136, "step": 3999, "teacher_loss": 0.2010076344013214 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.7116924524307251, "learning_rate": 1.7348561515107707e-05, "loss": 0.3018, "step": 4000, "teacher_loss": 0.2562645375728607 }, { "epoch": 0.72, "eval_exact_match": 79.75402081362347, "eval_f1": 87.30717881985169, "step": 4000 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.33770906925201416, "learning_rate": 1.7352898655486484e-05, "loss": 0.224, "step": 4001, "teacher_loss": 0.21131166815757751 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.5675424933433533, "learning_rate": 1.7357235795865262e-05, "loss": 0.1815, "step": 4002, "teacher_loss": 0.1386110782623291 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.7261936664581299, "learning_rate": 1.736157293624404e-05, "loss": 0.4138, "step": 4003, "teacher_loss": 0.379089891910553 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.39315661787986755, "learning_rate": 1.7365910076622814e-05, "loss": 0.3442, "step": 4004, "teacher_loss": 0.33879828453063965 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.4980563223361969, "learning_rate": 1.7370247217001588e-05, "loss": 0.2667, "step": 4005, "teacher_loss": 0.24095144867897034 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.22820442914962769, "learning_rate": 1.7374584357380366e-05, "loss": 0.1743, "step": 4006, "teacher_loss": 0.16832825541496277 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.7101324796676636, "learning_rate": 1.7378921497759143e-05, "loss": 0.2602, "step": 4007, "teacher_loss": 0.21025151014328003 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.22062525153160095, "learning_rate": 1.738325863813792e-05, "loss": 0.1651, "step": 4008, "teacher_loss": 0.15893635153770447 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.34375327825546265, "learning_rate": 1.73875957785167e-05, "loss": 0.2606, "step": 4009, "teacher_loss": 0.25138455629348755 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.260866641998291, "learning_rate": 1.7391932918895476e-05, "loss": 0.2097, "step": 4010, "teacher_loss": 0.20406612753868103 }, { "compression_loss": 0.0, "epoch": 0.72, "label_loss": 0.24984990060329437, "learning_rate": 1.7396270059274254e-05, "loss": 0.2282, "step": 4011, "teacher_loss": 0.2258007973432541 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.9095983505249023, "learning_rate": 1.740060719965303e-05, "loss": 0.3237, "step": 4012, "teacher_loss": 0.25863415002822876 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.4446542263031006, "learning_rate": 1.740494434003181e-05, "loss": 0.2234, "step": 4013, "teacher_loss": 0.19883191585540771 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.28768840432167053, "learning_rate": 1.7409281480410583e-05, "loss": 0.1391, "step": 4014, "teacher_loss": 0.12255939096212387 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.27036651968955994, "learning_rate": 1.7413618620789357e-05, "loss": 0.2673, "step": 4015, "teacher_loss": 0.2669178545475006 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.35449501872062683, "learning_rate": 1.7417955761168135e-05, "loss": 0.2001, "step": 4016, "teacher_loss": 0.1829456090927124 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.7128984928131104, "learning_rate": 1.7422292901546913e-05, "loss": 0.288, "step": 4017, "teacher_loss": 0.24080851674079895 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.520004153251648, "learning_rate": 1.742663004192569e-05, "loss": 0.2954, "step": 4018, "teacher_loss": 0.2704889178276062 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.5462685227394104, "learning_rate": 1.7430967182304468e-05, "loss": 0.275, "step": 4019, "teacher_loss": 0.24483177065849304 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.7000532746315002, "learning_rate": 1.7435304322683246e-05, "loss": 0.3071, "step": 4020, "teacher_loss": 0.2634274959564209 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.8150086998939514, "learning_rate": 1.7439641463062023e-05, "loss": 0.3161, "step": 4021, "teacher_loss": 0.2606182098388672 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.7050122022628784, "learning_rate": 1.74439786034408e-05, "loss": 0.3248, "step": 4022, "teacher_loss": 0.28258079290390015 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.3940303921699524, "learning_rate": 1.7448315743819575e-05, "loss": 0.2612, "step": 4023, "teacher_loss": 0.24647334218025208 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.4136486053466797, "learning_rate": 1.7452652884198353e-05, "loss": 0.3057, "step": 4024, "teacher_loss": 0.2937406897544861 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.40887176990509033, "learning_rate": 1.7456990024577127e-05, "loss": 0.2973, "step": 4025, "teacher_loss": 0.28495338559150696 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.265727698802948, "learning_rate": 1.7461327164955904e-05, "loss": 0.2324, "step": 4026, "teacher_loss": 0.22871285676956177 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.3892470896244049, "learning_rate": 1.7465664305334682e-05, "loss": 0.232, "step": 4027, "teacher_loss": 0.2145436406135559 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.35933834314346313, "learning_rate": 1.747000144571346e-05, "loss": 0.3416, "step": 4028, "teacher_loss": 0.33963119983673096 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.6742483377456665, "learning_rate": 1.7474338586092237e-05, "loss": 0.2296, "step": 4029, "teacher_loss": 0.18020948767662048 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.2300499528646469, "learning_rate": 1.7478675726471015e-05, "loss": 0.3051, "step": 4030, "teacher_loss": 0.3134298324584961 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.16709429025650024, "learning_rate": 1.7483012866849793e-05, "loss": 0.2203, "step": 4031, "teacher_loss": 0.22620849311351776 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.3943938612937927, "learning_rate": 1.7487350007228567e-05, "loss": 0.1997, "step": 4032, "teacher_loss": 0.1780540645122528 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.5219937562942505, "learning_rate": 1.7491687147607344e-05, "loss": 0.2249, "step": 4033, "teacher_loss": 0.19186696410179138 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 1.0082002878189087, "learning_rate": 1.7496024287986122e-05, "loss": 0.356, "step": 4034, "teacher_loss": 0.2835494875907898 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.372824490070343, "learning_rate": 1.75003614283649e-05, "loss": 0.2724, "step": 4035, "teacher_loss": 0.26122957468032837 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.803097665309906, "learning_rate": 1.7504698568743674e-05, "loss": 0.4271, "step": 4036, "teacher_loss": 0.3852939307689667 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.30804795026779175, "learning_rate": 1.750903570912245e-05, "loss": 0.1848, "step": 4037, "teacher_loss": 0.17106834053993225 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.22174109518527985, "learning_rate": 1.751337284950123e-05, "loss": 0.2254, "step": 4038, "teacher_loss": 0.22582527995109558 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.2759023904800415, "learning_rate": 1.7517709989880007e-05, "loss": 0.2374, "step": 4039, "teacher_loss": 0.23315656185150146 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.23789766430854797, "learning_rate": 1.7522047130258784e-05, "loss": 0.2512, "step": 4040, "teacher_loss": 0.2526342272758484 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.5043667554855347, "learning_rate": 1.752638427063756e-05, "loss": 0.1902, "step": 4041, "teacher_loss": 0.15527993440628052 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.4522740840911865, "learning_rate": 1.7530721411016336e-05, "loss": 0.2334, "step": 4042, "teacher_loss": 0.20903506875038147 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.2933364808559418, "learning_rate": 1.7535058551395114e-05, "loss": 0.2601, "step": 4043, "teacher_loss": 0.25644904375076294 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.3178500235080719, "learning_rate": 1.753939569177389e-05, "loss": 0.2282, "step": 4044, "teacher_loss": 0.21819926798343658 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.3540683090686798, "learning_rate": 1.754373283215267e-05, "loss": 0.2301, "step": 4045, "teacher_loss": 0.21635138988494873 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.6484888792037964, "learning_rate": 1.7548069972531447e-05, "loss": 0.3186, "step": 4046, "teacher_loss": 0.28195860981941223 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.3333244323730469, "learning_rate": 1.755240711291022e-05, "loss": 0.1922, "step": 4047, "teacher_loss": 0.17654263973236084 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.6256155371665955, "learning_rate": 1.7556744253289e-05, "loss": 0.2163, "step": 4048, "teacher_loss": 0.1708238422870636 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.5275435447692871, "learning_rate": 1.7561081393667773e-05, "loss": 0.3682, "step": 4049, "teacher_loss": 0.3505405783653259 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.334852397441864, "learning_rate": 1.756541853404655e-05, "loss": 0.2029, "step": 4050, "teacher_loss": 0.1882268488407135 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.834480881690979, "learning_rate": 1.7569755674425328e-05, "loss": 0.3045, "step": 4051, "teacher_loss": 0.24565812945365906 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.9154723286628723, "learning_rate": 1.7574092814804106e-05, "loss": 0.3502, "step": 4052, "teacher_loss": 0.28736215829849243 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.2546464204788208, "learning_rate": 1.7578429955182883e-05, "loss": 0.2324, "step": 4053, "teacher_loss": 0.22994284331798553 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.2698632478713989, "learning_rate": 1.758276709556166e-05, "loss": 0.2088, "step": 4054, "teacher_loss": 0.20201200246810913 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.43870335817337036, "learning_rate": 1.758710423594044e-05, "loss": 0.3067, "step": 4055, "teacher_loss": 0.2920305132865906 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.5013998746871948, "learning_rate": 1.7591441376319216e-05, "loss": 0.3039, "step": 4056, "teacher_loss": 0.28196966648101807 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.36489903926849365, "learning_rate": 1.7595778516697994e-05, "loss": 0.2677, "step": 4057, "teacher_loss": 0.25687360763549805 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.3993161916732788, "learning_rate": 1.7600115657076765e-05, "loss": 0.3249, "step": 4058, "teacher_loss": 0.31662583351135254 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.356062114238739, "learning_rate": 1.7604452797455542e-05, "loss": 0.2502, "step": 4059, "teacher_loss": 0.23846256732940674 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.9452967643737793, "learning_rate": 1.760878993783432e-05, "loss": 0.2715, "step": 4060, "teacher_loss": 0.19665464758872986 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.46517735719680786, "learning_rate": 1.7613127078213098e-05, "loss": 0.2347, "step": 4061, "teacher_loss": 0.20907726883888245 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 1.092389702796936, "learning_rate": 1.7617464218591875e-05, "loss": 0.3699, "step": 4062, "teacher_loss": 0.28963568806648254 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.409944087266922, "learning_rate": 1.7621801358970653e-05, "loss": 0.2182, "step": 4063, "teacher_loss": 0.19692330062389374 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.46763208508491516, "learning_rate": 1.762613849934943e-05, "loss": 0.2412, "step": 4064, "teacher_loss": 0.2160019725561142 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.22305750846862793, "learning_rate": 1.7630475639728208e-05, "loss": 0.2236, "step": 4065, "teacher_loss": 0.22365880012512207 }, { "compression_loss": 0.0, "epoch": 0.73, "label_loss": 0.4423673152923584, "learning_rate": 1.7634812780106986e-05, "loss": 0.2448, "step": 4066, "teacher_loss": 0.22282114624977112 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.37302684783935547, "learning_rate": 1.763914992048576e-05, "loss": 0.2066, "step": 4067, "teacher_loss": 0.18813541531562805 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.5116618871688843, "learning_rate": 1.7643487060864538e-05, "loss": 0.3366, "step": 4068, "teacher_loss": 0.31709977984428406 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.36667102575302124, "learning_rate": 1.7647824201243312e-05, "loss": 0.2133, "step": 4069, "teacher_loss": 0.19627505540847778 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.3286817669868469, "learning_rate": 1.765216134162209e-05, "loss": 0.2192, "step": 4070, "teacher_loss": 0.20701350271701813 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6520019769668579, "learning_rate": 1.7656498482000867e-05, "loss": 0.2232, "step": 4071, "teacher_loss": 0.17553207278251648 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6495643854141235, "learning_rate": 1.7660835622379645e-05, "loss": 0.2921, "step": 4072, "teacher_loss": 0.25240421295166016 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.4447469413280487, "learning_rate": 1.7665172762758422e-05, "loss": 0.1859, "step": 4073, "teacher_loss": 0.15714462101459503 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.4054219126701355, "learning_rate": 1.76695099031372e-05, "loss": 0.217, "step": 4074, "teacher_loss": 0.19610343873500824 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.2489941418170929, "learning_rate": 1.7673847043515977e-05, "loss": 0.196, "step": 4075, "teacher_loss": 0.19012480974197388 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.7123292684555054, "learning_rate": 1.7678184183894752e-05, "loss": 0.3415, "step": 4076, "teacher_loss": 0.3002733290195465 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.517052173614502, "learning_rate": 1.768252132427353e-05, "loss": 0.3496, "step": 4077, "teacher_loss": 0.330968976020813 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6957550048828125, "learning_rate": 1.7686858464652307e-05, "loss": 0.5273, "step": 4078, "teacher_loss": 0.5085822343826294 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.707707405090332, "learning_rate": 1.7691195605031085e-05, "loss": 0.3184, "step": 4079, "teacher_loss": 0.2751516103744507 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.22795212268829346, "learning_rate": 1.769553274540986e-05, "loss": 0.162, "step": 4080, "teacher_loss": 0.15472549200057983 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.40044939517974854, "learning_rate": 1.7699869885788636e-05, "loss": 0.2142, "step": 4081, "teacher_loss": 0.19347555935382843 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.31254029273986816, "learning_rate": 1.7704207026167414e-05, "loss": 0.1956, "step": 4082, "teacher_loss": 0.18261130154132843 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.9501831531524658, "learning_rate": 1.770854416654619e-05, "loss": 0.2485, "step": 4083, "teacher_loss": 0.17052507400512695 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.5576575994491577, "learning_rate": 1.771288130692497e-05, "loss": 0.386, "step": 4084, "teacher_loss": 0.36688530445098877 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.24088335037231445, "learning_rate": 1.7717218447303744e-05, "loss": 0.1493, "step": 4085, "teacher_loss": 0.1391381025314331 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.42695024609565735, "learning_rate": 1.772155558768252e-05, "loss": 0.1858, "step": 4086, "teacher_loss": 0.15901176631450653 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.40484851598739624, "learning_rate": 1.77258927280613e-05, "loss": 0.1983, "step": 4087, "teacher_loss": 0.17537343502044678 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.4246392846107483, "learning_rate": 1.7730229868440076e-05, "loss": 0.2343, "step": 4088, "teacher_loss": 0.21316301822662354 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.2727547883987427, "learning_rate": 1.7734567008818854e-05, "loss": 0.2921, "step": 4089, "teacher_loss": 0.2942776083946228 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6109724640846252, "learning_rate": 1.773890414919763e-05, "loss": 0.237, "step": 4090, "teacher_loss": 0.1954127997159958 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.33399826288223267, "learning_rate": 1.7743241289576406e-05, "loss": 0.2703, "step": 4091, "teacher_loss": 0.2632274925708771 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.5302850008010864, "learning_rate": 1.7747578429955184e-05, "loss": 0.2868, "step": 4092, "teacher_loss": 0.25974148511886597 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6457614302635193, "learning_rate": 1.7751915570333958e-05, "loss": 0.2318, "step": 4093, "teacher_loss": 0.18582448363304138 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.5607349872589111, "learning_rate": 1.7756252710712735e-05, "loss": 0.2212, "step": 4094, "teacher_loss": 0.18351811170578003 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6887947916984558, "learning_rate": 1.7760589851091513e-05, "loss": 0.4157, "step": 4095, "teacher_loss": 0.3854042589664459 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.189820796251297, "learning_rate": 1.776492699147029e-05, "loss": 0.2697, "step": 4096, "teacher_loss": 0.2785395383834839 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.2410605251789093, "learning_rate": 1.7769264131849068e-05, "loss": 0.2587, "step": 4097, "teacher_loss": 0.2607034146785736 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.19291429221630096, "learning_rate": 1.7773601272227846e-05, "loss": 0.1876, "step": 4098, "teacher_loss": 0.18701593577861786 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.3723694682121277, "learning_rate": 1.7777938412606623e-05, "loss": 0.2649, "step": 4099, "teacher_loss": 0.2529558837413788 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6752137541770935, "learning_rate": 1.77822755529854e-05, "loss": 0.3135, "step": 4100, "teacher_loss": 0.2733563184738159 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.17171594500541687, "learning_rate": 1.778661269336418e-05, "loss": 0.1699, "step": 4101, "teacher_loss": 0.1696939766407013 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6029078960418701, "learning_rate": 1.779094983374295e-05, "loss": 0.2709, "step": 4102, "teacher_loss": 0.23398740589618683 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.503454327583313, "learning_rate": 1.7795286974121727e-05, "loss": 0.3285, "step": 4103, "teacher_loss": 0.3090372085571289 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6840324401855469, "learning_rate": 1.7799624114500505e-05, "loss": 0.2289, "step": 4104, "teacher_loss": 0.1783227175474167 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.7135021686553955, "learning_rate": 1.7803961254879282e-05, "loss": 0.2781, "step": 4105, "teacher_loss": 0.22974857687950134 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.7254519462585449, "learning_rate": 1.780829839525806e-05, "loss": 0.2955, "step": 4106, "teacher_loss": 0.24771904945373535 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.7794185280799866, "learning_rate": 1.7812635535636838e-05, "loss": 0.2183, "step": 4107, "teacher_loss": 0.15597784519195557 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.4867413640022278, "learning_rate": 1.7816972676015615e-05, "loss": 0.2977, "step": 4108, "teacher_loss": 0.2767234146595001 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.3026319444179535, "learning_rate": 1.7821309816394393e-05, "loss": 0.1936, "step": 4109, "teacher_loss": 0.18148526549339294 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6156463623046875, "learning_rate": 1.782564695677317e-05, "loss": 0.3303, "step": 4110, "teacher_loss": 0.2986099421977997 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.4075435996055603, "learning_rate": 1.7829984097151945e-05, "loss": 0.2092, "step": 4111, "teacher_loss": 0.1871265470981598 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.1646701693534851, "learning_rate": 1.7834321237530722e-05, "loss": 0.2901, "step": 4112, "teacher_loss": 0.3040759563446045 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.9276775121688843, "learning_rate": 1.7838658377909497e-05, "loss": 0.332, "step": 4113, "teacher_loss": 0.2658011317253113 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.23450957238674164, "learning_rate": 1.7842995518288274e-05, "loss": 0.2282, "step": 4114, "teacher_loss": 0.22748714685440063 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.3063355088233948, "learning_rate": 1.7847332658667052e-05, "loss": 0.2024, "step": 4115, "teacher_loss": 0.19082927703857422 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6012687087059021, "learning_rate": 1.785166979904583e-05, "loss": 0.3315, "step": 4116, "teacher_loss": 0.3015064597129822 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.29128801822662354, "learning_rate": 1.7856006939424607e-05, "loss": 0.2198, "step": 4117, "teacher_loss": 0.21189172565937042 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.3383934199810028, "learning_rate": 1.7860344079803385e-05, "loss": 0.2403, "step": 4118, "teacher_loss": 0.2294088900089264 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.27994000911712646, "learning_rate": 1.7864681220182162e-05, "loss": 0.2066, "step": 4119, "teacher_loss": 0.19841763377189636 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.21849367022514343, "learning_rate": 1.7869018360560937e-05, "loss": 0.1768, "step": 4120, "teacher_loss": 0.17214488983154297 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.5366904735565186, "learning_rate": 1.7873355500939714e-05, "loss": 0.2914, "step": 4121, "teacher_loss": 0.26419180631637573 }, { "compression_loss": 0.0, "epoch": 0.74, "label_loss": 0.6963645219802856, "learning_rate": 1.7877692641318492e-05, "loss": 0.2736, "step": 4122, "teacher_loss": 0.22660967707633972 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.5423459410667419, "learning_rate": 1.7882029781697266e-05, "loss": 0.3279, "step": 4123, "teacher_loss": 0.30402815341949463 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.5324422717094421, "learning_rate": 1.7886366922076044e-05, "loss": 0.3405, "step": 4124, "teacher_loss": 0.3191503882408142 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.646681547164917, "learning_rate": 1.789070406245482e-05, "loss": 0.2399, "step": 4125, "teacher_loss": 0.19468306005001068 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.29867154359817505, "learning_rate": 1.78950412028336e-05, "loss": 0.1801, "step": 4126, "teacher_loss": 0.16696974635124207 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.9584167003631592, "learning_rate": 1.7899378343212377e-05, "loss": 0.352, "step": 4127, "teacher_loss": 0.2845892906188965 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.18704788386821747, "learning_rate": 1.7903715483591154e-05, "loss": 0.174, "step": 4128, "teacher_loss": 0.1725316345691681 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.311404287815094, "learning_rate": 1.790805262396993e-05, "loss": 0.2225, "step": 4129, "teacher_loss": 0.21261435747146606 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.42546606063842773, "learning_rate": 1.7912389764348706e-05, "loss": 0.1985, "step": 4130, "teacher_loss": 0.17323258519172668 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.32190102338790894, "learning_rate": 1.7916726904727484e-05, "loss": 0.2371, "step": 4131, "teacher_loss": 0.22767257690429688 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.4125993251800537, "learning_rate": 1.792106404510626e-05, "loss": 0.2652, "step": 4132, "teacher_loss": 0.24884022772312164 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.528472900390625, "learning_rate": 1.792540118548504e-05, "loss": 0.2224, "step": 4133, "teacher_loss": 0.18836882710456848 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.2622644901275635, "learning_rate": 1.7929738325863813e-05, "loss": 0.27, "step": 4134, "teacher_loss": 0.2708420753479004 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.46562889218330383, "learning_rate": 1.793407546624259e-05, "loss": 0.2739, "step": 4135, "teacher_loss": 0.25260233879089355 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.3772982060909271, "learning_rate": 1.793841260662137e-05, "loss": 0.2383, "step": 4136, "teacher_loss": 0.2228030562400818 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.6222391128540039, "learning_rate": 1.7942749747000143e-05, "loss": 0.374, "step": 4137, "teacher_loss": 0.34645402431488037 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.1699390858411789, "learning_rate": 1.794708688737892e-05, "loss": 0.1548, "step": 4138, "teacher_loss": 0.15308451652526855 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.5850667953491211, "learning_rate": 1.7951424027757698e-05, "loss": 0.3584, "step": 4139, "teacher_loss": 0.3332614302635193 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 1.1333680152893066, "learning_rate": 1.7955761168136475e-05, "loss": 0.3943, "step": 4140, "teacher_loss": 0.3121543526649475 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.32772552967071533, "learning_rate": 1.7960098308515253e-05, "loss": 0.2606, "step": 4141, "teacher_loss": 0.2531731128692627 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.2984614074230194, "learning_rate": 1.796443544889403e-05, "loss": 0.2214, "step": 4142, "teacher_loss": 0.21281316876411438 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.5480458736419678, "learning_rate": 1.796877258927281e-05, "loss": 0.2742, "step": 4143, "teacher_loss": 0.24381019175052643 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.7649442553520203, "learning_rate": 1.7973109729651586e-05, "loss": 0.2611, "step": 4144, "teacher_loss": 0.20515291392803192 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.616247296333313, "learning_rate": 1.797744687003036e-05, "loss": 0.2843, "step": 4145, "teacher_loss": 0.24743354320526123 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.3074440360069275, "learning_rate": 1.7981784010409134e-05, "loss": 0.2713, "step": 4146, "teacher_loss": 0.2672742009162903 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.4931604266166687, "learning_rate": 1.7986121150787912e-05, "loss": 0.3286, "step": 4147, "teacher_loss": 0.3102909326553345 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.7713298201560974, "learning_rate": 1.799045829116669e-05, "loss": 0.3562, "step": 4148, "teacher_loss": 0.31005924940109253 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.6208812594413757, "learning_rate": 1.7994795431545467e-05, "loss": 0.3075, "step": 4149, "teacher_loss": 0.2726662755012512 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.5932831764221191, "learning_rate": 1.7999132571924245e-05, "loss": 0.3224, "step": 4150, "teacher_loss": 0.2922472059726715 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.41951775550842285, "learning_rate": 1.8003469712303023e-05, "loss": 0.2359, "step": 4151, "teacher_loss": 0.21553251147270203 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.3235367238521576, "learning_rate": 1.80078068526818e-05, "loss": 0.1936, "step": 4152, "teacher_loss": 0.17914780974388123 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.6134922504425049, "learning_rate": 1.8012143993060578e-05, "loss": 0.3397, "step": 4153, "teacher_loss": 0.30927425622940063 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.9352678060531616, "learning_rate": 1.8016481133439355e-05, "loss": 0.3284, "step": 4154, "teacher_loss": 0.2609473466873169 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.6744242906570435, "learning_rate": 1.802081827381813e-05, "loss": 0.6153, "step": 4155, "teacher_loss": 0.608716607093811 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.2243228554725647, "learning_rate": 1.8025155414196904e-05, "loss": 0.1874, "step": 4156, "teacher_loss": 0.18333221971988678 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.8162073493003845, "learning_rate": 1.802949255457568e-05, "loss": 0.2931, "step": 4157, "teacher_loss": 0.2349727600812912 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.7589199542999268, "learning_rate": 1.803382969495446e-05, "loss": 0.6153, "step": 4158, "teacher_loss": 0.5993785858154297 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.7566730380058289, "learning_rate": 1.8038166835333237e-05, "loss": 0.3598, "step": 4159, "teacher_loss": 0.3157259225845337 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.4488690197467804, "learning_rate": 1.8042503975712014e-05, "loss": 0.2876, "step": 4160, "teacher_loss": 0.2697228789329529 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.3223251402378082, "learning_rate": 1.8046841116090792e-05, "loss": 0.184, "step": 4161, "teacher_loss": 0.16859588027000427 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.6922459602355957, "learning_rate": 1.805117825646957e-05, "loss": 0.2506, "step": 4162, "teacher_loss": 0.2015790194272995 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.3889782726764679, "learning_rate": 1.8055515396848347e-05, "loss": 0.2218, "step": 4163, "teacher_loss": 0.20322087407112122 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.4009304642677307, "learning_rate": 1.805985253722712e-05, "loss": 0.2259, "step": 4164, "teacher_loss": 0.20640188455581665 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.5789691805839539, "learning_rate": 1.80641896776059e-05, "loss": 0.2399, "step": 4165, "teacher_loss": 0.20227554440498352 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.3846268653869629, "learning_rate": 1.8068526817984677e-05, "loss": 0.2227, "step": 4166, "teacher_loss": 0.20473584532737732 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.750187873840332, "learning_rate": 1.807286395836345e-05, "loss": 0.3149, "step": 4167, "teacher_loss": 0.26654112339019775 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.3824586272239685, "learning_rate": 1.807720109874223e-05, "loss": 0.2905, "step": 4168, "teacher_loss": 0.28028547763824463 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.432822048664093, "learning_rate": 1.8081538239121006e-05, "loss": 0.1926, "step": 4169, "teacher_loss": 0.1659039705991745 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.6052897572517395, "learning_rate": 1.8085875379499784e-05, "loss": 0.4069, "step": 4170, "teacher_loss": 0.38488519191741943 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.41225871443748474, "learning_rate": 1.809021251987856e-05, "loss": 0.224, "step": 4171, "teacher_loss": 0.2030371129512787 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.4907906651496887, "learning_rate": 1.809454966025734e-05, "loss": 0.3597, "step": 4172, "teacher_loss": 0.3451845645904541 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.45901599526405334, "learning_rate": 1.8098886800636113e-05, "loss": 0.1972, "step": 4173, "teacher_loss": 0.16811567544937134 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.36044102907180786, "learning_rate": 1.810322394101489e-05, "loss": 0.2454, "step": 4174, "teacher_loss": 0.23265400528907776 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.7801012992858887, "learning_rate": 1.810756108139367e-05, "loss": 0.3481, "step": 4175, "teacher_loss": 0.3000839054584503 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.7795083522796631, "learning_rate": 1.8111898221772446e-05, "loss": 0.2888, "step": 4176, "teacher_loss": 0.23425626754760742 }, { "compression_loss": 0.0, "epoch": 0.75, "label_loss": 0.4698697626590729, "learning_rate": 1.8116235362151224e-05, "loss": 0.2827, "step": 4177, "teacher_loss": 0.2618792653083801 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 1.0276384353637695, "learning_rate": 1.8120572502529998e-05, "loss": 0.4081, "step": 4178, "teacher_loss": 0.33921635150909424 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.44592103362083435, "learning_rate": 1.8124909642908776e-05, "loss": 0.1803, "step": 4179, "teacher_loss": 0.1507425308227539 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.48160549998283386, "learning_rate": 1.8129246783287553e-05, "loss": 0.2409, "step": 4180, "teacher_loss": 0.21416831016540527 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.6662040948867798, "learning_rate": 1.813358392366633e-05, "loss": 0.3531, "step": 4181, "teacher_loss": 0.3183550238609314 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.35104310512542725, "learning_rate": 1.8137921064045105e-05, "loss": 0.2688, "step": 4182, "teacher_loss": 0.2596573233604431 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.48213931918144226, "learning_rate": 1.8142258204423883e-05, "loss": 0.2757, "step": 4183, "teacher_loss": 0.25270798802375793 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.845737636089325, "learning_rate": 1.814659534480266e-05, "loss": 0.2799, "step": 4184, "teacher_loss": 0.21700571477413177 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.34580889344215393, "learning_rate": 1.8150932485181438e-05, "loss": 0.1925, "step": 4185, "teacher_loss": 0.17543494701385498 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.4003676176071167, "learning_rate": 1.8155269625560216e-05, "loss": 0.2833, "step": 4186, "teacher_loss": 0.2702893614768982 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.7470386028289795, "learning_rate": 1.8159606765938993e-05, "loss": 0.3804, "step": 4187, "teacher_loss": 0.33962613344192505 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.3823215365409851, "learning_rate": 1.816394390631777e-05, "loss": 0.2035, "step": 4188, "teacher_loss": 0.18361711502075195 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.4612460434436798, "learning_rate": 1.8168281046696545e-05, "loss": 0.2919, "step": 4189, "teacher_loss": 0.2730613350868225 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.46669965982437134, "learning_rate": 1.817261818707532e-05, "loss": 0.3236, "step": 4190, "teacher_loss": 0.3077358901500702 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.5682482719421387, "learning_rate": 1.8176955327454097e-05, "loss": 0.3003, "step": 4191, "teacher_loss": 0.27056723833084106 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.3474375605583191, "learning_rate": 1.8181292467832875e-05, "loss": 0.2011, "step": 4192, "teacher_loss": 0.18484455347061157 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.5370516777038574, "learning_rate": 1.8185629608211652e-05, "loss": 0.3064, "step": 4193, "teacher_loss": 0.28077375888824463 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.2800413966178894, "learning_rate": 1.818996674859043e-05, "loss": 0.2229, "step": 4194, "teacher_loss": 0.21660469472408295 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.5470927357673645, "learning_rate": 1.8194303888969207e-05, "loss": 0.2936, "step": 4195, "teacher_loss": 0.26544079184532166 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.4033074676990509, "learning_rate": 1.8198641029347985e-05, "loss": 0.2495, "step": 4196, "teacher_loss": 0.2324121594429016 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.46007657051086426, "learning_rate": 1.8202978169726763e-05, "loss": 0.2429, "step": 4197, "teacher_loss": 0.21881017088890076 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.40594303607940674, "learning_rate": 1.820731531010554e-05, "loss": 0.2359, "step": 4198, "teacher_loss": 0.21699343621730804 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.21994228661060333, "learning_rate": 1.8211652450484315e-05, "loss": 0.1583, "step": 4199, "teacher_loss": 0.15144497156143188 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.7465957403182983, "learning_rate": 1.821598959086309e-05, "loss": 0.3495, "step": 4200, "teacher_loss": 0.30540865659713745 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.2482997477054596, "learning_rate": 1.8220326731241866e-05, "loss": 0.1527, "step": 4201, "teacher_loss": 0.14203517138957977 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.15918280184268951, "learning_rate": 1.8224663871620644e-05, "loss": 0.1606, "step": 4202, "teacher_loss": 0.16076231002807617 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.17935830354690552, "learning_rate": 1.822900101199942e-05, "loss": 0.1941, "step": 4203, "teacher_loss": 0.19575420022010803 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.5091278553009033, "learning_rate": 1.82333381523782e-05, "loss": 0.3381, "step": 4204, "teacher_loss": 0.31904828548431396 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.2724432051181793, "learning_rate": 1.8237675292756977e-05, "loss": 0.2053, "step": 4205, "teacher_loss": 0.1978590488433838 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.5350421071052551, "learning_rate": 1.8242012433135754e-05, "loss": 0.2348, "step": 4206, "teacher_loss": 0.20147663354873657 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.3698226511478424, "learning_rate": 1.8246349573514532e-05, "loss": 0.1819, "step": 4207, "teacher_loss": 0.1610046923160553 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.1945790797472, "learning_rate": 1.8250686713893306e-05, "loss": 0.138, "step": 4208, "teacher_loss": 0.1317148506641388 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.5742756128311157, "learning_rate": 1.8255023854272084e-05, "loss": 0.269, "step": 4209, "teacher_loss": 0.23512838780879974 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.7809941172599792, "learning_rate": 1.825936099465086e-05, "loss": 0.252, "step": 4210, "teacher_loss": 0.1931881308555603 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.4742361903190613, "learning_rate": 1.8263698135029636e-05, "loss": 0.243, "step": 4211, "teacher_loss": 0.2172536700963974 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.6045116186141968, "learning_rate": 1.8268035275408413e-05, "loss": 0.2794, "step": 4212, "teacher_loss": 0.24326679110527039 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.9009673595428467, "learning_rate": 1.827237241578719e-05, "loss": 0.3543, "step": 4213, "teacher_loss": 0.29360055923461914 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 1.0596833229064941, "learning_rate": 1.827670955616597e-05, "loss": 0.3608, "step": 4214, "teacher_loss": 0.28312447667121887 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.5905895233154297, "learning_rate": 1.8281046696544746e-05, "loss": 0.2753, "step": 4215, "teacher_loss": 0.24023553729057312 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.33441784977912903, "learning_rate": 1.8285383836923524e-05, "loss": 0.211, "step": 4216, "teacher_loss": 0.19732201099395752 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.3466756045818329, "learning_rate": 1.8289720977302298e-05, "loss": 0.2018, "step": 4217, "teacher_loss": 0.18565401434898376 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.46034321188926697, "learning_rate": 1.8294058117681076e-05, "loss": 0.1828, "step": 4218, "teacher_loss": 0.15200430154800415 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.24992728233337402, "learning_rate": 1.8298395258059853e-05, "loss": 0.1572, "step": 4219, "teacher_loss": 0.1468605101108551 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.30746909976005554, "learning_rate": 1.830273239843863e-05, "loss": 0.3255, "step": 4220, "teacher_loss": 0.32745373249053955 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.1922600120306015, "learning_rate": 1.8307069538817405e-05, "loss": 0.2117, "step": 4221, "teacher_loss": 0.21388307213783264 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.46663790941238403, "learning_rate": 1.8311406679196183e-05, "loss": 0.3957, "step": 4222, "teacher_loss": 0.3878590762615204 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.46185216307640076, "learning_rate": 1.831574381957496e-05, "loss": 0.2853, "step": 4223, "teacher_loss": 0.26571404933929443 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.5915626883506775, "learning_rate": 1.8320080959953738e-05, "loss": 0.3798, "step": 4224, "teacher_loss": 0.3563128113746643 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 1.1518059968948364, "learning_rate": 1.8324418100332516e-05, "loss": 0.7159, "step": 4225, "teacher_loss": 0.6674721240997314 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.5562552809715271, "learning_rate": 1.832875524071129e-05, "loss": 0.3238, "step": 4226, "teacher_loss": 0.2979458272457123 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.20705249905586243, "learning_rate": 1.8333092381090068e-05, "loss": 0.1829, "step": 4227, "teacher_loss": 0.1801748275756836 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.6600640416145325, "learning_rate": 1.8337429521468845e-05, "loss": 0.3064, "step": 4228, "teacher_loss": 0.2671135663986206 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.3625275790691376, "learning_rate": 1.8341766661847623e-05, "loss": 0.2495, "step": 4229, "teacher_loss": 0.23694270849227905 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.38866570591926575, "learning_rate": 1.83461038022264e-05, "loss": 0.2127, "step": 4230, "teacher_loss": 0.19310730695724487 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.3258657157421112, "learning_rate": 1.8350440942605178e-05, "loss": 0.2647, "step": 4231, "teacher_loss": 0.25786906480789185 }, { "compression_loss": 0.0, "epoch": 0.76, "label_loss": 0.582978367805481, "learning_rate": 1.8354778082983952e-05, "loss": 0.303, "step": 4232, "teacher_loss": 0.2718997299671173 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.4374690651893616, "learning_rate": 1.835911522336273e-05, "loss": 0.2043, "step": 4233, "teacher_loss": 0.17836229503154755 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.5784834027290344, "learning_rate": 1.8363452363741504e-05, "loss": 0.2032, "step": 4234, "teacher_loss": 0.1615065187215805 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.7325087189674377, "learning_rate": 1.8367789504120282e-05, "loss": 0.2505, "step": 4235, "teacher_loss": 0.19689740240573883 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 1.058279037475586, "learning_rate": 1.837212664449906e-05, "loss": 0.5036, "step": 4236, "teacher_loss": 0.44200754165649414 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.5671568512916565, "learning_rate": 1.8376463784877837e-05, "loss": 0.2633, "step": 4237, "teacher_loss": 0.22953970730304718 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.7244428396224976, "learning_rate": 1.8380800925256615e-05, "loss": 0.2668, "step": 4238, "teacher_loss": 0.21591055393218994 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.7140688300132751, "learning_rate": 1.8385138065635392e-05, "loss": 0.2976, "step": 4239, "teacher_loss": 0.25131598114967346 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 1.1110929250717163, "learning_rate": 1.838947520601417e-05, "loss": 0.3138, "step": 4240, "teacher_loss": 0.2252422571182251 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.3740127980709076, "learning_rate": 1.8393812346392948e-05, "loss": 0.2078, "step": 4241, "teacher_loss": 0.18930116295814514 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.6471554040908813, "learning_rate": 1.8398149486771725e-05, "loss": 0.4757, "step": 4242, "teacher_loss": 0.45665058493614197 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.3991910219192505, "learning_rate": 1.8402486627150496e-05, "loss": 0.1933, "step": 4243, "teacher_loss": 0.17043735086917877 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.10748796164989471, "learning_rate": 1.8406823767529274e-05, "loss": 0.1068, "step": 4244, "teacher_loss": 0.1067567691206932 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.1807546615600586, "learning_rate": 1.841116090790805e-05, "loss": 0.2771, "step": 4245, "teacher_loss": 0.28783339262008667 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.3415837585926056, "learning_rate": 1.841549804828683e-05, "loss": 0.2782, "step": 4246, "teacher_loss": 0.2711701989173889 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.26681897044181824, "learning_rate": 1.8419835188665607e-05, "loss": 0.2756, "step": 4247, "teacher_loss": 0.27655911445617676 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.4825130105018616, "learning_rate": 1.8424172329044384e-05, "loss": 0.2137, "step": 4248, "teacher_loss": 0.18379493057727814 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.4611910581588745, "learning_rate": 1.8428509469423162e-05, "loss": 0.2992, "step": 4249, "teacher_loss": 0.28123822808265686 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.29843151569366455, "learning_rate": 1.843284660980194e-05, "loss": 0.1978, "step": 4250, "teacher_loss": 0.18664315342903137 }, { "epoch": 0.77, "eval_exact_match": 79.70671712393566, "eval_f1": 87.20175780173301, "step": 4250 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.8695374727249146, "learning_rate": 1.8437183750180717e-05, "loss": 0.2438, "step": 4251, "teacher_loss": 0.17428690195083618 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.32610562443733215, "learning_rate": 1.844152089055949e-05, "loss": 0.2326, "step": 4252, "teacher_loss": 0.2221592217683792 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.3297593295574188, "learning_rate": 1.844585803093827e-05, "loss": 0.2234, "step": 4253, "teacher_loss": 0.2115522027015686 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.8076539039611816, "learning_rate": 1.8450195171317043e-05, "loss": 0.346, "step": 4254, "teacher_loss": 0.2947551906108856 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.20926666259765625, "learning_rate": 1.845453231169582e-05, "loss": 0.2008, "step": 4255, "teacher_loss": 0.1998097151517868 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.2916312515735626, "learning_rate": 1.84588694520746e-05, "loss": 0.2409, "step": 4256, "teacher_loss": 0.2352708876132965 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.5025413036346436, "learning_rate": 1.8463206592453376e-05, "loss": 0.2446, "step": 4257, "teacher_loss": 0.21592681109905243 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.1686260998249054, "learning_rate": 1.8467543732832154e-05, "loss": 0.1563, "step": 4258, "teacher_loss": 0.1549108773469925 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.5989721417427063, "learning_rate": 1.847188087321093e-05, "loss": 0.2573, "step": 4259, "teacher_loss": 0.21933308243751526 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.8840031623840332, "learning_rate": 1.847621801358971e-05, "loss": 0.2707, "step": 4260, "teacher_loss": 0.2025284469127655 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.400000661611557, "learning_rate": 1.8480555153968483e-05, "loss": 0.2763, "step": 4261, "teacher_loss": 0.2625943422317505 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.803421676158905, "learning_rate": 1.848489229434726e-05, "loss": 0.4833, "step": 4262, "teacher_loss": 0.4477643370628357 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.24858370423316956, "learning_rate": 1.8489229434726038e-05, "loss": 0.1734, "step": 4263, "teacher_loss": 0.16503044962882996 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.4687172770500183, "learning_rate": 1.8493566575104816e-05, "loss": 0.2631, "step": 4264, "teacher_loss": 0.240285724401474 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.33411869406700134, "learning_rate": 1.849790371548359e-05, "loss": 0.2899, "step": 4265, "teacher_loss": 0.28500452637672424 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.19061389565467834, "learning_rate": 1.8502240855862368e-05, "loss": 0.1804, "step": 4266, "teacher_loss": 0.17924764752388 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.4153069853782654, "learning_rate": 1.8506577996241145e-05, "loss": 0.3503, "step": 4267, "teacher_loss": 0.34306925535202026 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.42755013704299927, "learning_rate": 1.8510915136619923e-05, "loss": 0.2727, "step": 4268, "teacher_loss": 0.2555101811885834 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.39209532737731934, "learning_rate": 1.85152522769987e-05, "loss": 0.1872, "step": 4269, "teacher_loss": 0.16439926624298096 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.46185368299484253, "learning_rate": 1.8519589417377475e-05, "loss": 0.2132, "step": 4270, "teacher_loss": 0.18558794260025024 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.36124539375305176, "learning_rate": 1.8523926557756252e-05, "loss": 0.26, "step": 4271, "teacher_loss": 0.2487795650959015 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.3830479383468628, "learning_rate": 1.852826369813503e-05, "loss": 0.2743, "step": 4272, "teacher_loss": 0.2621828317642212 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.28602516651153564, "learning_rate": 1.8532600838513808e-05, "loss": 0.1981, "step": 4273, "teacher_loss": 0.18838083744049072 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.46078264713287354, "learning_rate": 1.8536937978892585e-05, "loss": 0.287, "step": 4274, "teacher_loss": 0.26765817403793335 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.6124197244644165, "learning_rate": 1.8541275119271363e-05, "loss": 0.2696, "step": 4275, "teacher_loss": 0.23152336478233337 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.655608057975769, "learning_rate": 1.8545612259650137e-05, "loss": 0.3377, "step": 4276, "teacher_loss": 0.30232250690460205 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.8387855887413025, "learning_rate": 1.8549949400028915e-05, "loss": 0.2682, "step": 4277, "teacher_loss": 0.2047930657863617 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.2550361454486847, "learning_rate": 1.855428654040769e-05, "loss": 0.2114, "step": 4278, "teacher_loss": 0.206498384475708 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.6667413711547852, "learning_rate": 1.8558623680786467e-05, "loss": 0.3013, "step": 4279, "teacher_loss": 0.26074278354644775 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.46464070677757263, "learning_rate": 1.8562960821165244e-05, "loss": 0.2526, "step": 4280, "teacher_loss": 0.2290804088115692 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.4909101724624634, "learning_rate": 1.8567297961544022e-05, "loss": 0.2234, "step": 4281, "teacher_loss": 0.19365081191062927 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.7285306453704834, "learning_rate": 1.85716351019228e-05, "loss": 0.2986, "step": 4282, "teacher_loss": 0.2508789300918579 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.1851373016834259, "learning_rate": 1.8575972242301577e-05, "loss": 0.2098, "step": 4283, "teacher_loss": 0.21254321932792664 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.505715012550354, "learning_rate": 1.8580309382680355e-05, "loss": 0.2558, "step": 4284, "teacher_loss": 0.22800683975219727 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.7456421256065369, "learning_rate": 1.8584646523059132e-05, "loss": 0.4703, "step": 4285, "teacher_loss": 0.43973881006240845 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.40645524859428406, "learning_rate": 1.858898366343791e-05, "loss": 0.2197, "step": 4286, "teacher_loss": 0.19894936680793762 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.6293581128120422, "learning_rate": 1.859332080381668e-05, "loss": 0.6297, "step": 4287, "teacher_loss": 0.629779577255249 }, { "compression_loss": 0.0, "epoch": 0.77, "label_loss": 0.6857258677482605, "learning_rate": 1.859765794419546e-05, "loss": 0.328, "step": 4288, "teacher_loss": 0.2882636785507202 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.510424017906189, "learning_rate": 1.8601995084574236e-05, "loss": 0.2482, "step": 4289, "teacher_loss": 0.21901285648345947 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.611987292766571, "learning_rate": 1.8606332224953014e-05, "loss": 0.2844, "step": 4290, "teacher_loss": 0.24800626933574677 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.4525676369667053, "learning_rate": 1.861066936533179e-05, "loss": 0.2028, "step": 4291, "teacher_loss": 0.17501947283744812 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.47354036569595337, "learning_rate": 1.861500650571057e-05, "loss": 0.3284, "step": 4292, "teacher_loss": 0.3122839629650116 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.213621586561203, "learning_rate": 1.8619343646089347e-05, "loss": 0.2245, "step": 4293, "teacher_loss": 0.22573347389698029 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.6952092051506042, "learning_rate": 1.8623680786468124e-05, "loss": 0.3585, "step": 4294, "teacher_loss": 0.32106664776802063 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.18327361345291138, "learning_rate": 1.8628017926846902e-05, "loss": 0.1669, "step": 4295, "teacher_loss": 0.16503292322158813 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.4421996772289276, "learning_rate": 1.8632355067225676e-05, "loss": 0.2507, "step": 4296, "teacher_loss": 0.2294618785381317 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.5327869653701782, "learning_rate": 1.8636692207604454e-05, "loss": 0.2367, "step": 4297, "teacher_loss": 0.2037869691848755 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.40128207206726074, "learning_rate": 1.8641029347983228e-05, "loss": 0.1956, "step": 4298, "teacher_loss": 0.17278623580932617 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.2973548173904419, "learning_rate": 1.8645366488362006e-05, "loss": 0.2575, "step": 4299, "teacher_loss": 0.25308579206466675 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.4530125558376312, "learning_rate": 1.8649703628740783e-05, "loss": 0.2296, "step": 4300, "teacher_loss": 0.20473836362361908 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.2388342022895813, "learning_rate": 1.865404076911956e-05, "loss": 0.1925, "step": 4301, "teacher_loss": 0.187351256608963 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.368000864982605, "learning_rate": 1.865837790949834e-05, "loss": 0.2046, "step": 4302, "teacher_loss": 0.18639764189720154 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.6789907217025757, "learning_rate": 1.8662715049877116e-05, "loss": 0.2883, "step": 4303, "teacher_loss": 0.24485091865062714 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.7069035768508911, "learning_rate": 1.8667052190255894e-05, "loss": 0.3865, "step": 4304, "teacher_loss": 0.3508761525154114 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.3162829577922821, "learning_rate": 1.8671389330634668e-05, "loss": 0.1527, "step": 4305, "teacher_loss": 0.13452647626399994 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.2658165693283081, "learning_rate": 1.8675726471013446e-05, "loss": 0.2238, "step": 4306, "teacher_loss": 0.21911801397800446 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.8025497198104858, "learning_rate": 1.8680063611392223e-05, "loss": 0.3371, "step": 4307, "teacher_loss": 0.2854340076446533 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.3647902011871338, "learning_rate": 1.8684400751771e-05, "loss": 0.2645, "step": 4308, "teacher_loss": 0.25339561700820923 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.4634060859680176, "learning_rate": 1.8688737892149775e-05, "loss": 0.2643, "step": 4309, "teacher_loss": 0.24221907556056976 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.5004192590713501, "learning_rate": 1.8693075032528553e-05, "loss": 0.2147, "step": 4310, "teacher_loss": 0.18291622400283813 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.5047429203987122, "learning_rate": 1.869741217290733e-05, "loss": 0.2629, "step": 4311, "teacher_loss": 0.23603002727031708 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.29406580328941345, "learning_rate": 1.8701749313286108e-05, "loss": 0.201, "step": 4312, "teacher_loss": 0.1907017081975937 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.22989502549171448, "learning_rate": 1.8706086453664886e-05, "loss": 0.2088, "step": 4313, "teacher_loss": 0.20644724369049072 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.5856242179870605, "learning_rate": 1.871042359404366e-05, "loss": 0.2436, "step": 4314, "teacher_loss": 0.20557467639446259 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.7974061369895935, "learning_rate": 1.8714760734422437e-05, "loss": 0.5391, "step": 4315, "teacher_loss": 0.5103491544723511 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.3375333547592163, "learning_rate": 1.8719097874801215e-05, "loss": 0.2046, "step": 4316, "teacher_loss": 0.18983879685401917 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.4497114419937134, "learning_rate": 1.8723435015179993e-05, "loss": 0.3025, "step": 4317, "teacher_loss": 0.28611230850219727 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.37641435861587524, "learning_rate": 1.872777215555877e-05, "loss": 0.4076, "step": 4318, "teacher_loss": 0.4110143780708313 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.38564983010292053, "learning_rate": 1.8732109295937548e-05, "loss": 0.2584, "step": 4319, "teacher_loss": 0.24423296749591827 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.9457871913909912, "learning_rate": 1.8736446436316322e-05, "loss": 0.4431, "step": 4320, "teacher_loss": 0.38722193241119385 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.19923396408557892, "learning_rate": 1.87407835766951e-05, "loss": 0.1703, "step": 4321, "teacher_loss": 0.16704685986042023 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.4149247407913208, "learning_rate": 1.8745120717073877e-05, "loss": 0.2437, "step": 4322, "teacher_loss": 0.2247181087732315 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.2385530173778534, "learning_rate": 1.874945785745265e-05, "loss": 0.2707, "step": 4323, "teacher_loss": 0.2742440700531006 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.2685689330101013, "learning_rate": 1.875379499783143e-05, "loss": 0.1763, "step": 4324, "teacher_loss": 0.16601906716823578 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.3340357840061188, "learning_rate": 1.8758132138210207e-05, "loss": 0.2406, "step": 4325, "teacher_loss": 0.2302560806274414 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.2699044942855835, "learning_rate": 1.8762469278588984e-05, "loss": 0.2055, "step": 4326, "teacher_loss": 0.1983424872159958 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.11705869436264038, "learning_rate": 1.8766806418967762e-05, "loss": 0.1649, "step": 4327, "teacher_loss": 0.17017850279808044 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.45480579137802124, "learning_rate": 1.877114355934654e-05, "loss": 0.2887, "step": 4328, "teacher_loss": 0.27021324634552 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.6701047420501709, "learning_rate": 1.8775480699725317e-05, "loss": 0.2893, "step": 4329, "teacher_loss": 0.24704372882843018 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.4310171902179718, "learning_rate": 1.877981784010409e-05, "loss": 0.2244, "step": 4330, "teacher_loss": 0.20142894983291626 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.4295814037322998, "learning_rate": 1.8784154980482866e-05, "loss": 0.2449, "step": 4331, "teacher_loss": 0.22436144948005676 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.1813291609287262, "learning_rate": 1.8788492120861643e-05, "loss": 0.1505, "step": 4332, "teacher_loss": 0.14709439873695374 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.5309654474258423, "learning_rate": 1.879282926124042e-05, "loss": 0.2564, "step": 4333, "teacher_loss": 0.22584135830402374 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.3411603569984436, "learning_rate": 1.87971664016192e-05, "loss": 0.2055, "step": 4334, "teacher_loss": 0.19046136736869812 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.5316437482833862, "learning_rate": 1.8801503541997976e-05, "loss": 0.3266, "step": 4335, "teacher_loss": 0.3037664294242859 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.5856420993804932, "learning_rate": 1.8805840682376754e-05, "loss": 0.2513, "step": 4336, "teacher_loss": 0.21410749852657318 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.12336384505033493, "learning_rate": 1.881017782275553e-05, "loss": 0.2165, "step": 4337, "teacher_loss": 0.22685931622982025 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.4408440589904785, "learning_rate": 1.881451496313431e-05, "loss": 0.1913, "step": 4338, "teacher_loss": 0.16360166668891907 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.31678009033203125, "learning_rate": 1.8818852103513087e-05, "loss": 0.1868, "step": 4339, "teacher_loss": 0.17239277064800262 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.3320227265357971, "learning_rate": 1.882318924389186e-05, "loss": 0.2661, "step": 4340, "teacher_loss": 0.25879985094070435 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.6310441493988037, "learning_rate": 1.8827526384270635e-05, "loss": 0.2476, "step": 4341, "teacher_loss": 0.2049868404865265 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.4937819838523865, "learning_rate": 1.8831863524649413e-05, "loss": 0.3782, "step": 4342, "teacher_loss": 0.36540526151657104 }, { "compression_loss": 0.0, "epoch": 0.78, "label_loss": 0.3894301950931549, "learning_rate": 1.883620066502819e-05, "loss": 0.3566, "step": 4343, "teacher_loss": 0.35293471813201904 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.7206357717514038, "learning_rate": 1.8840537805406968e-05, "loss": 0.3451, "step": 4344, "teacher_loss": 0.3033825159072876 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.26766490936279297, "learning_rate": 1.8844874945785746e-05, "loss": 0.1951, "step": 4345, "teacher_loss": 0.18700119853019714 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.4767746031284332, "learning_rate": 1.8849212086164523e-05, "loss": 0.2943, "step": 4346, "teacher_loss": 0.2739824652671814 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.45957350730895996, "learning_rate": 1.88535492265433e-05, "loss": 0.2478, "step": 4347, "teacher_loss": 0.22431239485740662 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.5060524344444275, "learning_rate": 1.885788636692208e-05, "loss": 0.4291, "step": 4348, "teacher_loss": 0.42052415013313293 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.3664916157722473, "learning_rate": 1.8862223507300853e-05, "loss": 0.1967, "step": 4349, "teacher_loss": 0.1778573989868164 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.16738006472587585, "learning_rate": 1.886656064767963e-05, "loss": 0.1769, "step": 4350, "teacher_loss": 0.1779191941022873 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.4389292001724243, "learning_rate": 1.8870897788058408e-05, "loss": 0.3306, "step": 4351, "teacher_loss": 0.31855449080467224 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.18302121758460999, "learning_rate": 1.8875234928437182e-05, "loss": 0.1943, "step": 4352, "teacher_loss": 0.1955508589744568 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.2137041985988617, "learning_rate": 1.887957206881596e-05, "loss": 0.2202, "step": 4353, "teacher_loss": 0.22086820006370544 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.31997427344322205, "learning_rate": 1.8883909209194738e-05, "loss": 0.1451, "step": 4354, "teacher_loss": 0.12569984793663025 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.4039191007614136, "learning_rate": 1.8888246349573515e-05, "loss": 0.1827, "step": 4355, "teacher_loss": 0.15808013081550598 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.24972745776176453, "learning_rate": 1.8892583489952293e-05, "loss": 0.2618, "step": 4356, "teacher_loss": 0.2631451487541199 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.5144719481468201, "learning_rate": 1.889692063033107e-05, "loss": 0.21, "step": 4357, "teacher_loss": 0.17612047493457794 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.37136709690093994, "learning_rate": 1.8901257770709845e-05, "loss": 0.2455, "step": 4358, "teacher_loss": 0.2315484881401062 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.37611523270606995, "learning_rate": 1.8905594911088622e-05, "loss": 0.2167, "step": 4359, "teacher_loss": 0.19899940490722656 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.1258644163608551, "learning_rate": 1.89099320514674e-05, "loss": 0.1974, "step": 4360, "teacher_loss": 0.2053266167640686 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 1.2536430358886719, "learning_rate": 1.8914269191846178e-05, "loss": 0.3545, "step": 4361, "teacher_loss": 0.2545698583126068 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.4635392427444458, "learning_rate": 1.8918606332224955e-05, "loss": 0.2167, "step": 4362, "teacher_loss": 0.18924319744110107 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.20551873743534088, "learning_rate": 1.892294347260373e-05, "loss": 0.2025, "step": 4363, "teacher_loss": 0.20219628512859344 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 1.0362813472747803, "learning_rate": 1.8927280612982507e-05, "loss": 0.375, "step": 4364, "teacher_loss": 0.30153319239616394 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.6008142828941345, "learning_rate": 1.8931617753361285e-05, "loss": 0.2931, "step": 4365, "teacher_loss": 0.2588798403739929 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.4625106453895569, "learning_rate": 1.8935954893740062e-05, "loss": 0.3069, "step": 4366, "teacher_loss": 0.2896609306335449 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.1407613456249237, "learning_rate": 1.8940292034118836e-05, "loss": 0.2725, "step": 4367, "teacher_loss": 0.2870877981185913 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.10765865445137024, "learning_rate": 1.8944629174497614e-05, "loss": 0.17, "step": 4368, "teacher_loss": 0.17694024741649628 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.32489126920700073, "learning_rate": 1.8948966314876392e-05, "loss": 0.1743, "step": 4369, "teacher_loss": 0.15759655833244324 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.9479628205299377, "learning_rate": 1.895330345525517e-05, "loss": 0.309, "step": 4370, "teacher_loss": 0.23800595104694366 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.3562701940536499, "learning_rate": 1.8957640595633947e-05, "loss": 0.2658, "step": 4371, "teacher_loss": 0.25572866201400757 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.7417806386947632, "learning_rate": 1.8961977736012725e-05, "loss": 0.2852, "step": 4372, "teacher_loss": 0.23442819714546204 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.28376078605651855, "learning_rate": 1.8966314876391502e-05, "loss": 0.1882, "step": 4373, "teacher_loss": 0.1775858998298645 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.2423812448978424, "learning_rate": 1.8970652016770276e-05, "loss": 0.2022, "step": 4374, "teacher_loss": 0.19768668711185455 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.6269419193267822, "learning_rate": 1.897498915714905e-05, "loss": 0.2639, "step": 4375, "teacher_loss": 0.2235143482685089 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.9645397663116455, "learning_rate": 1.8979326297527828e-05, "loss": 0.3536, "step": 4376, "teacher_loss": 0.285714715719223 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.4486308693885803, "learning_rate": 1.8983663437906606e-05, "loss": 0.2232, "step": 4377, "teacher_loss": 0.19817957282066345 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.7429558634757996, "learning_rate": 1.8988000578285384e-05, "loss": 0.3039, "step": 4378, "teacher_loss": 0.2551349997520447 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.46867793798446655, "learning_rate": 1.899233771866416e-05, "loss": 0.3604, "step": 4379, "teacher_loss": 0.34834498167037964 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.35948532819747925, "learning_rate": 1.899667485904294e-05, "loss": 0.2138, "step": 4380, "teacher_loss": 0.1976063847541809 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.5096275806427002, "learning_rate": 1.9001011999421716e-05, "loss": 0.2612, "step": 4381, "teacher_loss": 0.23363693058490753 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.2687079906463623, "learning_rate": 1.9005349139800494e-05, "loss": 0.2021, "step": 4382, "teacher_loss": 0.19468240439891815 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.4174654185771942, "learning_rate": 1.900968628017927e-05, "loss": 0.2269, "step": 4383, "teacher_loss": 0.20571528375148773 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.34681329131126404, "learning_rate": 1.9014023420558046e-05, "loss": 0.1947, "step": 4384, "teacher_loss": 0.17775966227054596 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.47667616605758667, "learning_rate": 1.901836056093682e-05, "loss": 0.3132, "step": 4385, "teacher_loss": 0.2949827015399933 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.5764468908309937, "learning_rate": 1.9022697701315598e-05, "loss": 0.2507, "step": 4386, "teacher_loss": 0.2144799530506134 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.20920279622077942, "learning_rate": 1.9027034841694375e-05, "loss": 0.217, "step": 4387, "teacher_loss": 0.21788394451141357 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.4802161455154419, "learning_rate": 1.9031371982073153e-05, "loss": 0.3171, "step": 4388, "teacher_loss": 0.2989689111709595 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.3222218155860901, "learning_rate": 1.903570912245193e-05, "loss": 0.245, "step": 4389, "teacher_loss": 0.2364031821489334 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.692144513130188, "learning_rate": 1.9040046262830708e-05, "loss": 0.3058, "step": 4390, "teacher_loss": 0.26290571689605713 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.49712514877319336, "learning_rate": 1.9044383403209486e-05, "loss": 0.289, "step": 4391, "teacher_loss": 0.26592397689819336 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.6742779016494751, "learning_rate": 1.9048720543588263e-05, "loss": 0.3311, "step": 4392, "teacher_loss": 0.292987585067749 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.18801485002040863, "learning_rate": 1.9053057683967038e-05, "loss": 0.2078, "step": 4393, "teacher_loss": 0.2099807858467102 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.5727881789207458, "learning_rate": 1.9057394824345815e-05, "loss": 0.2674, "step": 4394, "teacher_loss": 0.23346683382987976 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.41732239723205566, "learning_rate": 1.9061731964724593e-05, "loss": 0.2591, "step": 4395, "teacher_loss": 0.2415492683649063 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.5799175500869751, "learning_rate": 1.9066069105103367e-05, "loss": 0.3929, "step": 4396, "teacher_loss": 0.37212449312210083 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.27822983264923096, "learning_rate": 1.9070406245482145e-05, "loss": 0.2241, "step": 4397, "teacher_loss": 0.21813462674617767 }, { "compression_loss": 0.0, "epoch": 0.79, "label_loss": 0.47497743368148804, "learning_rate": 1.9074743385860922e-05, "loss": 0.221, "step": 4398, "teacher_loss": 0.19275131821632385 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.46600785851478577, "learning_rate": 1.90790805262397e-05, "loss": 0.2605, "step": 4399, "teacher_loss": 0.23764127492904663 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.5026023983955383, "learning_rate": 1.9083417666618478e-05, "loss": 0.2039, "step": 4400, "teacher_loss": 0.1706666499376297 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.3185422718524933, "learning_rate": 1.9087754806997255e-05, "loss": 0.1971, "step": 4401, "teacher_loss": 0.183591827750206 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.17662450671195984, "learning_rate": 1.909209194737603e-05, "loss": 0.1764, "step": 4402, "teacher_loss": 0.17642992734909058 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.6701233983039856, "learning_rate": 1.9096429087754807e-05, "loss": 0.2296, "step": 4403, "teacher_loss": 0.18062585592269897 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.3591124415397644, "learning_rate": 1.9100766228133585e-05, "loss": 0.2458, "step": 4404, "teacher_loss": 0.23325462639331818 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.3126135468482971, "learning_rate": 1.9105103368512362e-05, "loss": 0.2359, "step": 4405, "teacher_loss": 0.22732990980148315 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.606193482875824, "learning_rate": 1.910944050889114e-05, "loss": 0.3175, "step": 4406, "teacher_loss": 0.28538602590560913 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.5236458778381348, "learning_rate": 1.9113777649269914e-05, "loss": 0.3086, "step": 4407, "teacher_loss": 0.2846558094024658 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.7959356307983398, "learning_rate": 1.9118114789648692e-05, "loss": 0.2892, "step": 4408, "teacher_loss": 0.232901930809021 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.11919496953487396, "learning_rate": 1.912245193002747e-05, "loss": 0.2229, "step": 4409, "teacher_loss": 0.23442482948303223 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.1469697207212448, "learning_rate": 1.9126789070406247e-05, "loss": 0.1882, "step": 4410, "teacher_loss": 0.19283178448677063 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.6454266309738159, "learning_rate": 1.913112621078502e-05, "loss": 0.223, "step": 4411, "teacher_loss": 0.17607924342155457 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.4967457354068756, "learning_rate": 1.91354633511638e-05, "loss": 0.2718, "step": 4412, "teacher_loss": 0.24685239791870117 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.20355309545993805, "learning_rate": 1.9139800491542577e-05, "loss": 0.1974, "step": 4413, "teacher_loss": 0.19666561484336853 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.5515495538711548, "learning_rate": 1.9144137631921354e-05, "loss": 0.2753, "step": 4414, "teacher_loss": 0.24464194476604462 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.7646429538726807, "learning_rate": 1.9148474772300132e-05, "loss": 0.3109, "step": 4415, "teacher_loss": 0.26045793294906616 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.48189833760261536, "learning_rate": 1.915281191267891e-05, "loss": 0.2156, "step": 4416, "teacher_loss": 0.18598628044128418 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.23272691667079926, "learning_rate": 1.9157149053057687e-05, "loss": 0.1379, "step": 4417, "teacher_loss": 0.12737099826335907 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.1874842494726181, "learning_rate": 1.916148619343646e-05, "loss": 0.1954, "step": 4418, "teacher_loss": 0.1963045299053192 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 1.0665611028671265, "learning_rate": 1.9165823333815236e-05, "loss": 0.2963, "step": 4419, "teacher_loss": 0.2107498198747635 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.4177074432373047, "learning_rate": 1.9170160474194013e-05, "loss": 0.2361, "step": 4420, "teacher_loss": 0.215923011302948 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.3376600742340088, "learning_rate": 1.917449761457279e-05, "loss": 0.1924, "step": 4421, "teacher_loss": 0.1762087643146515 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.4542146921157837, "learning_rate": 1.917883475495157e-05, "loss": 0.3138, "step": 4422, "teacher_loss": 0.29821836948394775 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.466405987739563, "learning_rate": 1.9183171895330346e-05, "loss": 0.2414, "step": 4423, "teacher_loss": 0.21644729375839233 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.427219420671463, "learning_rate": 1.9187509035709124e-05, "loss": 0.2153, "step": 4424, "teacher_loss": 0.19176019728183746 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.40870997309684753, "learning_rate": 1.91918461760879e-05, "loss": 0.2934, "step": 4425, "teacher_loss": 0.28061914443969727 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.25932276248931885, "learning_rate": 1.919618331646668e-05, "loss": 0.1597, "step": 4426, "teacher_loss": 0.14860644936561584 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.29136013984680176, "learning_rate": 1.9200520456845457e-05, "loss": 0.2701, "step": 4427, "teacher_loss": 0.26779091358184814 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.16029603779315948, "learning_rate": 1.920485759722423e-05, "loss": 0.1946, "step": 4428, "teacher_loss": 0.19842372834682465 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.4553366005420685, "learning_rate": 1.9209194737603005e-05, "loss": 0.2888, "step": 4429, "teacher_loss": 0.27033478021621704 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.49068546295166016, "learning_rate": 1.9213531877981783e-05, "loss": 0.2491, "step": 4430, "teacher_loss": 0.22228951752185822 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.15509988367557526, "learning_rate": 1.921786901836056e-05, "loss": 0.1933, "step": 4431, "teacher_loss": 0.19758708775043488 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.5387653708457947, "learning_rate": 1.9222206158739338e-05, "loss": 0.2188, "step": 4432, "teacher_loss": 0.18327876925468445 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.3255700469017029, "learning_rate": 1.9226543299118115e-05, "loss": 0.1667, "step": 4433, "teacher_loss": 0.14904238283634186 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.6602993607521057, "learning_rate": 1.9230880439496893e-05, "loss": 0.2633, "step": 4434, "teacher_loss": 0.21921959519386292 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.29587626457214355, "learning_rate": 1.923521757987567e-05, "loss": 0.3065, "step": 4435, "teacher_loss": 0.3076714277267456 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.32020118832588196, "learning_rate": 1.923955472025445e-05, "loss": 0.2694, "step": 4436, "teacher_loss": 0.2637360692024231 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.4616737961769104, "learning_rate": 1.9243891860633223e-05, "loss": 0.2317, "step": 4437, "teacher_loss": 0.2061809003353119 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.49196869134902954, "learning_rate": 1.9248229001012e-05, "loss": 0.2708, "step": 4438, "teacher_loss": 0.24619725346565247 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.7121244072914124, "learning_rate": 1.9252566141390774e-05, "loss": 0.2834, "step": 4439, "teacher_loss": 0.2357184737920761 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.2487257868051529, "learning_rate": 1.9256903281769552e-05, "loss": 0.2492, "step": 4440, "teacher_loss": 0.2492329627275467 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 1.1728515625, "learning_rate": 1.926124042214833e-05, "loss": 0.443, "step": 4441, "teacher_loss": 0.3619272708892822 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.09628164768218994, "learning_rate": 1.9265577562527107e-05, "loss": 0.175, "step": 4442, "teacher_loss": 0.18369171023368835 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.26564958691596985, "learning_rate": 1.9269914702905885e-05, "loss": 0.2133, "step": 4443, "teacher_loss": 0.20748114585876465 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.23224981129169464, "learning_rate": 1.9274251843284663e-05, "loss": 0.2052, "step": 4444, "teacher_loss": 0.2021392285823822 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.23544606566429138, "learning_rate": 1.927858898366344e-05, "loss": 0.152, "step": 4445, "teacher_loss": 0.14270824193954468 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.4909461736679077, "learning_rate": 1.9282926124042214e-05, "loss": 0.2514, "step": 4446, "teacher_loss": 0.22475308179855347 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.5512707233428955, "learning_rate": 1.9287263264420992e-05, "loss": 0.3369, "step": 4447, "teacher_loss": 0.31307852268218994 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.5532106757164001, "learning_rate": 1.929160040479977e-05, "loss": 0.1792, "step": 4448, "teacher_loss": 0.1376638114452362 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.6336266398429871, "learning_rate": 1.9295937545178547e-05, "loss": 0.3304, "step": 4449, "teacher_loss": 0.2966945171356201 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.48162052035331726, "learning_rate": 1.930027468555732e-05, "loss": 0.2549, "step": 4450, "teacher_loss": 0.2297634780406952 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.3254720866680145, "learning_rate": 1.93046118259361e-05, "loss": 0.2556, "step": 4451, "teacher_loss": 0.247822105884552 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.24370527267456055, "learning_rate": 1.9308948966314877e-05, "loss": 0.1881, "step": 4452, "teacher_loss": 0.18191999197006226 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.22032126784324646, "learning_rate": 1.9313286106693654e-05, "loss": 0.1952, "step": 4453, "teacher_loss": 0.1924436092376709 }, { "compression_loss": 0.0, "epoch": 0.8, "label_loss": 0.24624371528625488, "learning_rate": 1.9317623247072432e-05, "loss": 0.1953, "step": 4454, "teacher_loss": 0.18967103958129883 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.6548234224319458, "learning_rate": 1.9321960387451206e-05, "loss": 0.2313, "step": 4455, "teacher_loss": 0.1841966211795807 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.33025023341178894, "learning_rate": 1.9326297527829984e-05, "loss": 0.2939, "step": 4456, "teacher_loss": 0.2898273468017578 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.5020718574523926, "learning_rate": 1.933063466820876e-05, "loss": 0.2328, "step": 4457, "teacher_loss": 0.20286162197589874 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.5606284737586975, "learning_rate": 1.933497180858754e-05, "loss": 0.2319, "step": 4458, "teacher_loss": 0.1953597366809845 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.4259171485900879, "learning_rate": 1.9339308948966317e-05, "loss": 0.2569, "step": 4459, "teacher_loss": 0.23806552588939667 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.6561440229415894, "learning_rate": 1.9343646089345094e-05, "loss": 0.2967, "step": 4460, "teacher_loss": 0.2567846179008484 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.49232596158981323, "learning_rate": 1.934798322972387e-05, "loss": 0.1883, "step": 4461, "teacher_loss": 0.15450426936149597 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.5243464708328247, "learning_rate": 1.9352320370102646e-05, "loss": 0.2819, "step": 4462, "teacher_loss": 0.2549181580543518 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.8740881085395813, "learning_rate": 1.9356657510481424e-05, "loss": 0.3465, "step": 4463, "teacher_loss": 0.28791743516921997 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.5317016839981079, "learning_rate": 1.9360994650860198e-05, "loss": 0.1958, "step": 4464, "teacher_loss": 0.15848064422607422 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.26249051094055176, "learning_rate": 1.9365331791238976e-05, "loss": 0.2518, "step": 4465, "teacher_loss": 0.2506164610385895 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.314662367105484, "learning_rate": 1.9369668931617753e-05, "loss": 0.238, "step": 4466, "teacher_loss": 0.22951368987560272 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.2412196695804596, "learning_rate": 1.937400607199653e-05, "loss": 0.2267, "step": 4467, "teacher_loss": 0.22508668899536133 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.25784194469451904, "learning_rate": 1.937834321237531e-05, "loss": 0.2289, "step": 4468, "teacher_loss": 0.22573524713516235 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.19536930322647095, "learning_rate": 1.9382680352754086e-05, "loss": 0.2332, "step": 4469, "teacher_loss": 0.23738746345043182 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.4496898949146271, "learning_rate": 1.9387017493132864e-05, "loss": 0.2513, "step": 4470, "teacher_loss": 0.2292943298816681 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.5700011253356934, "learning_rate": 1.939135463351164e-05, "loss": 0.2542, "step": 4471, "teacher_loss": 0.21911413967609406 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.2527185082435608, "learning_rate": 1.9395691773890412e-05, "loss": 0.1704, "step": 4472, "teacher_loss": 0.1612669825553894 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.22570839524269104, "learning_rate": 1.940002891426919e-05, "loss": 0.2097, "step": 4473, "teacher_loss": 0.20792317390441895 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.47496742010116577, "learning_rate": 1.9404366054647967e-05, "loss": 0.1577, "step": 4474, "teacher_loss": 0.12246362864971161 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.34297117590904236, "learning_rate": 1.9408703195026745e-05, "loss": 0.2986, "step": 4475, "teacher_loss": 0.29361492395401 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.5019428133964539, "learning_rate": 1.9413040335405523e-05, "loss": 0.3957, "step": 4476, "teacher_loss": 0.3838716745376587 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.293230801820755, "learning_rate": 1.94173774757843e-05, "loss": 0.3029, "step": 4477, "teacher_loss": 0.3039575517177582 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.7814189195632935, "learning_rate": 1.9421714616163078e-05, "loss": 0.3019, "step": 4478, "teacher_loss": 0.24858203530311584 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.34480249881744385, "learning_rate": 1.9426051756541856e-05, "loss": 0.1764, "step": 4479, "teacher_loss": 0.1577427089214325 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.7515468597412109, "learning_rate": 1.9430388896920633e-05, "loss": 0.34, "step": 4480, "teacher_loss": 0.29423367977142334 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.21799224615097046, "learning_rate": 1.9434726037299407e-05, "loss": 0.2388, "step": 4481, "teacher_loss": 0.24106183648109436 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 1.3107478618621826, "learning_rate": 1.9439063177678185e-05, "loss": 0.3998, "step": 4482, "teacher_loss": 0.29853737354278564 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.48927634954452515, "learning_rate": 1.944340031805696e-05, "loss": 0.2174, "step": 4483, "teacher_loss": 0.18718752264976501 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.7885831594467163, "learning_rate": 1.9447737458435737e-05, "loss": 0.319, "step": 4484, "teacher_loss": 0.26680922508239746 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.26493918895721436, "learning_rate": 1.9452074598814515e-05, "loss": 0.1657, "step": 4485, "teacher_loss": 0.15462952852249146 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.3576187193393707, "learning_rate": 1.9456411739193292e-05, "loss": 0.1861, "step": 4486, "teacher_loss": 0.16705426573753357 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.727882981300354, "learning_rate": 1.946074887957207e-05, "loss": 0.3635, "step": 4487, "teacher_loss": 0.3230496048927307 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.6405210494995117, "learning_rate": 1.9465086019950847e-05, "loss": 0.231, "step": 4488, "teacher_loss": 0.18550699949264526 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.3781351149082184, "learning_rate": 1.9469423160329625e-05, "loss": 0.2928, "step": 4489, "teacher_loss": 0.283273845911026 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.4769185781478882, "learning_rate": 1.94737603007084e-05, "loss": 0.1955, "step": 4490, "teacher_loss": 0.16424641013145447 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.35994961857795715, "learning_rate": 1.9478097441087177e-05, "loss": 0.2571, "step": 4491, "teacher_loss": 0.24570125341415405 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.9124099016189575, "learning_rate": 1.9482434581465955e-05, "loss": 0.3447, "step": 4492, "teacher_loss": 0.28157228231430054 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.5469323396682739, "learning_rate": 1.9486771721844732e-05, "loss": 0.2162, "step": 4493, "teacher_loss": 0.1794590950012207 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.6094321012496948, "learning_rate": 1.9491108862223506e-05, "loss": 0.2406, "step": 4494, "teacher_loss": 0.19964975118637085 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.39998501539230347, "learning_rate": 1.9495446002602284e-05, "loss": 0.2119, "step": 4495, "teacher_loss": 0.19099244475364685 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.30796992778778076, "learning_rate": 1.949978314298106e-05, "loss": 0.1991, "step": 4496, "teacher_loss": 0.18694816529750824 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.24383272230625153, "learning_rate": 1.950412028335984e-05, "loss": 0.1816, "step": 4497, "teacher_loss": 0.17470115423202515 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.9361656904220581, "learning_rate": 1.9508457423738617e-05, "loss": 0.3268, "step": 4498, "teacher_loss": 0.25906845927238464 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.350935161113739, "learning_rate": 1.951279456411739e-05, "loss": 0.1994, "step": 4499, "teacher_loss": 0.18253061175346375 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.5836977958679199, "learning_rate": 1.951713170449617e-05, "loss": 0.2236, "step": 4500, "teacher_loss": 0.1836300939321518 }, { "epoch": 0.81, "eval_exact_match": 79.68779564806054, "eval_f1": 87.29877574959808, "step": 4500 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.3788067698478699, "learning_rate": 1.9521468844874946e-05, "loss": 0.2306, "step": 4501, "teacher_loss": 0.2141350507736206 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.6703027486801147, "learning_rate": 1.9525805985253724e-05, "loss": 0.5627, "step": 4502, "teacher_loss": 0.5506966710090637 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.5232012271881104, "learning_rate": 1.95301431256325e-05, "loss": 0.2659, "step": 4503, "teacher_loss": 0.2373151183128357 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.8341158628463745, "learning_rate": 1.953448026601128e-05, "loss": 0.439, "step": 4504, "teacher_loss": 0.3950817584991455 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.23976443707942963, "learning_rate": 1.9538817406390053e-05, "loss": 0.203, "step": 4505, "teacher_loss": 0.19886967539787292 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.8551978468894958, "learning_rate": 1.954315454676883e-05, "loss": 0.3792, "step": 4506, "teacher_loss": 0.32635772228240967 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.41043850779533386, "learning_rate": 1.954749168714761e-05, "loss": 0.211, "step": 4507, "teacher_loss": 0.18885841965675354 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 1.0946969985961914, "learning_rate": 1.9551828827526383e-05, "loss": 0.3556, "step": 4508, "teacher_loss": 0.27352410554885864 }, { "compression_loss": 0.0, "epoch": 0.81, "label_loss": 0.9388766288757324, "learning_rate": 1.955616596790516e-05, "loss": 0.422, "step": 4509, "teacher_loss": 0.3645484447479248 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.27205973863601685, "learning_rate": 1.9560503108283938e-05, "loss": 0.1944, "step": 4510, "teacher_loss": 0.1857774555683136 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.28105664253234863, "learning_rate": 1.9564840248662716e-05, "loss": 0.1694, "step": 4511, "teacher_loss": 0.15697333216667175 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.4196627736091614, "learning_rate": 1.9569177389041493e-05, "loss": 0.2512, "step": 4512, "teacher_loss": 0.23247992992401123 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.49374252557754517, "learning_rate": 1.957351452942027e-05, "loss": 0.2479, "step": 4513, "teacher_loss": 0.2206338346004486 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3415621519088745, "learning_rate": 1.957785166979905e-05, "loss": 0.2428, "step": 4514, "teacher_loss": 0.23177313804626465 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3653858006000519, "learning_rate": 1.9582188810177826e-05, "loss": 0.2333, "step": 4515, "teacher_loss": 0.21857471764087677 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.7266503572463989, "learning_rate": 1.9586525950556597e-05, "loss": 0.2639, "step": 4516, "teacher_loss": 0.21243906021118164 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.30429768562316895, "learning_rate": 1.9590863090935375e-05, "loss": 0.1938, "step": 4517, "teacher_loss": 0.18154282867908478 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.33662354946136475, "learning_rate": 1.9595200231314152e-05, "loss": 0.2115, "step": 4518, "teacher_loss": 0.19755561649799347 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.41198205947875977, "learning_rate": 1.959953737169293e-05, "loss": 0.2634, "step": 4519, "teacher_loss": 0.24690744280815125 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.24396567046642303, "learning_rate": 1.9603874512071708e-05, "loss": 0.1828, "step": 4520, "teacher_loss": 0.17596638202667236 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3098486065864563, "learning_rate": 1.9608211652450485e-05, "loss": 0.2258, "step": 4521, "teacher_loss": 0.21641576290130615 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.4102702736854553, "learning_rate": 1.9612548792829263e-05, "loss": 0.2663, "step": 4522, "teacher_loss": 0.25027284026145935 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.6989509463310242, "learning_rate": 1.961688593320804e-05, "loss": 0.2886, "step": 4523, "teacher_loss": 0.2429679036140442 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.7013245820999146, "learning_rate": 1.9621223073586818e-05, "loss": 0.2614, "step": 4524, "teacher_loss": 0.2124658226966858 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.2899213433265686, "learning_rate": 1.9625560213965592e-05, "loss": 0.2247, "step": 4525, "teacher_loss": 0.21745355427265167 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.30669698119163513, "learning_rate": 1.962989735434437e-05, "loss": 0.1849, "step": 4526, "teacher_loss": 0.17131514847278595 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.2988119423389435, "learning_rate": 1.9634234494723144e-05, "loss": 0.2646, "step": 4527, "teacher_loss": 0.2607831358909607 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.34778010845184326, "learning_rate": 1.9638571635101922e-05, "loss": 0.2476, "step": 4528, "teacher_loss": 0.23642557859420776 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.8675686120986938, "learning_rate": 1.96429087754807e-05, "loss": 0.2739, "step": 4529, "teacher_loss": 0.20796382427215576 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.16270476579666138, "learning_rate": 1.9647245915859477e-05, "loss": 0.1877, "step": 4530, "teacher_loss": 0.19051004946231842 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3909509778022766, "learning_rate": 1.9651583056238255e-05, "loss": 0.3031, "step": 4531, "teacher_loss": 0.29334208369255066 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.6295802593231201, "learning_rate": 1.9655920196617032e-05, "loss": 0.3265, "step": 4532, "teacher_loss": 0.2928031086921692 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3191392123699188, "learning_rate": 1.966025733699581e-05, "loss": 0.1999, "step": 4533, "teacher_loss": 0.18661819398403168 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.23553380370140076, "learning_rate": 1.9664594477374584e-05, "loss": 0.2233, "step": 4534, "teacher_loss": 0.2219860553741455 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.5289946794509888, "learning_rate": 1.9668931617753362e-05, "loss": 0.2623, "step": 4535, "teacher_loss": 0.23262135684490204 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.7444685697555542, "learning_rate": 1.967326875813214e-05, "loss": 0.2742, "step": 4536, "teacher_loss": 0.22191445529460907 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.14221107959747314, "learning_rate": 1.9677605898510914e-05, "loss": 0.1563, "step": 4537, "teacher_loss": 0.1578819453716278 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.26338934898376465, "learning_rate": 1.968194303888969e-05, "loss": 0.2477, "step": 4538, "teacher_loss": 0.24596011638641357 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.4069359302520752, "learning_rate": 1.968628017926847e-05, "loss": 0.2683, "step": 4539, "teacher_loss": 0.2529076337814331 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.6068147420883179, "learning_rate": 1.9690617319647246e-05, "loss": 0.2619, "step": 4540, "teacher_loss": 0.2235943078994751 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.6759498119354248, "learning_rate": 1.9694954460026024e-05, "loss": 0.34, "step": 4541, "teacher_loss": 0.30271345376968384 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.5406700968742371, "learning_rate": 1.9699291600404802e-05, "loss": 0.3102, "step": 4542, "teacher_loss": 0.28463542461395264 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.9173877835273743, "learning_rate": 1.9703628740783576e-05, "loss": 0.3389, "step": 4543, "teacher_loss": 0.2745884656906128 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.5685233473777771, "learning_rate": 1.9707965881162354e-05, "loss": 0.3034, "step": 4544, "teacher_loss": 0.2739788889884949 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.2999640703201294, "learning_rate": 1.971230302154113e-05, "loss": 0.3097, "step": 4545, "teacher_loss": 0.3108366131782532 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3911818861961365, "learning_rate": 1.971664016191991e-05, "loss": 0.2303, "step": 4546, "teacher_loss": 0.2124561369419098 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.5935028195381165, "learning_rate": 1.9720977302298686e-05, "loss": 0.3666, "step": 4547, "teacher_loss": 0.3413691520690918 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.29289913177490234, "learning_rate": 1.972531444267746e-05, "loss": 0.2541, "step": 4548, "teacher_loss": 0.2497403621673584 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.4865691661834717, "learning_rate": 1.972965158305624e-05, "loss": 0.2662, "step": 4549, "teacher_loss": 0.24169203639030457 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3387758731842041, "learning_rate": 1.9733988723435016e-05, "loss": 0.2533, "step": 4550, "teacher_loss": 0.2437693178653717 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.24636882543563843, "learning_rate": 1.9738325863813794e-05, "loss": 0.1882, "step": 4551, "teacher_loss": 0.18174898624420166 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3845309019088745, "learning_rate": 1.9742663004192568e-05, "loss": 0.2156, "step": 4552, "teacher_loss": 0.1968461126089096 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.5481349229812622, "learning_rate": 1.9747000144571345e-05, "loss": 0.2311, "step": 4553, "teacher_loss": 0.1958249807357788 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.4277880787849426, "learning_rate": 1.9751337284950123e-05, "loss": 0.2467, "step": 4554, "teacher_loss": 0.2265796661376953 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.32164761424064636, "learning_rate": 1.97556744253289e-05, "loss": 0.2445, "step": 4555, "teacher_loss": 0.2359553426504135 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3364228904247284, "learning_rate": 1.9760011565707678e-05, "loss": 0.2191, "step": 4556, "teacher_loss": 0.20606756210327148 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.6412774324417114, "learning_rate": 1.9764348706086456e-05, "loss": 0.2744, "step": 4557, "teacher_loss": 0.2336086928844452 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.9273658990859985, "learning_rate": 1.9768685846465234e-05, "loss": 0.3519, "step": 4558, "teacher_loss": 0.288008451461792 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.2052639126777649, "learning_rate": 1.9773022986844008e-05, "loss": 0.2146, "step": 4559, "teacher_loss": 0.2156861126422882 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.9939955472946167, "learning_rate": 1.9777360127222782e-05, "loss": 0.3771, "step": 4560, "teacher_loss": 0.30852556228637695 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.12772300839424133, "learning_rate": 1.978169726760156e-05, "loss": 0.1706, "step": 4561, "teacher_loss": 0.17534644901752472 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3508272171020508, "learning_rate": 1.9786034407980337e-05, "loss": 0.2109, "step": 4562, "teacher_loss": 0.19531863927841187 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.3844001293182373, "learning_rate": 1.9790371548359115e-05, "loss": 0.288, "step": 4563, "teacher_loss": 0.27726036310195923 }, { "compression_loss": 0.0, "epoch": 0.82, "label_loss": 0.26909855008125305, "learning_rate": 1.9794708688737892e-05, "loss": 0.1956, "step": 4564, "teacher_loss": 0.1874411702156067 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.6861876845359802, "learning_rate": 1.979904582911667e-05, "loss": 0.2015, "step": 4565, "teacher_loss": 0.14760680496692657 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.3776729106903076, "learning_rate": 1.9803382969495448e-05, "loss": 0.2861, "step": 4566, "teacher_loss": 0.2758902609348297 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.3799261748790741, "learning_rate": 1.9807720109874225e-05, "loss": 0.3281, "step": 4567, "teacher_loss": 0.32237547636032104 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.422038733959198, "learning_rate": 1.9812057250253003e-05, "loss": 0.1751, "step": 4568, "teacher_loss": 0.14764931797981262 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.5399374961853027, "learning_rate": 1.9816394390631777e-05, "loss": 0.2467, "step": 4569, "teacher_loss": 0.21413882076740265 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.3122031092643738, "learning_rate": 1.982073153101055e-05, "loss": 0.2259, "step": 4570, "teacher_loss": 0.216264545917511 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.5072436332702637, "learning_rate": 1.982506867138933e-05, "loss": 0.2367, "step": 4571, "teacher_loss": 0.20661015808582306 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.293565034866333, "learning_rate": 1.9829405811768107e-05, "loss": 0.2079, "step": 4572, "teacher_loss": 0.19843250513076782 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.565199077129364, "learning_rate": 1.9833742952146884e-05, "loss": 0.2299, "step": 4573, "teacher_loss": 0.19258946180343628 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.2277761697769165, "learning_rate": 1.9838080092525662e-05, "loss": 0.1966, "step": 4574, "teacher_loss": 0.1931137591600418 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.3999192714691162, "learning_rate": 1.984241723290444e-05, "loss": 0.1918, "step": 4575, "teacher_loss": 0.16867533326148987 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.49854612350463867, "learning_rate": 1.9846754373283217e-05, "loss": 0.295, "step": 4576, "teacher_loss": 0.2723630666732788 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.4591091275215149, "learning_rate": 1.9851091513661995e-05, "loss": 0.3934, "step": 4577, "teacher_loss": 0.3860475718975067 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.6976302862167358, "learning_rate": 1.985542865404077e-05, "loss": 0.3109, "step": 4578, "teacher_loss": 0.2679854929447174 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.518950879573822, "learning_rate": 1.9859765794419547e-05, "loss": 0.4015, "step": 4579, "teacher_loss": 0.3884930908679962 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.3135051727294922, "learning_rate": 1.9864102934798324e-05, "loss": 0.2069, "step": 4580, "teacher_loss": 0.19507566094398499 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.34679627418518066, "learning_rate": 1.98684400751771e-05, "loss": 0.2058, "step": 4581, "teacher_loss": 0.1901879608631134 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.17885328829288483, "learning_rate": 1.9872777215555876e-05, "loss": 0.1553, "step": 4582, "teacher_loss": 0.15265792608261108 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.33397477865219116, "learning_rate": 1.9877114355934654e-05, "loss": 0.2942, "step": 4583, "teacher_loss": 0.2897806763648987 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.1667826622724533, "learning_rate": 1.988145149631343e-05, "loss": 0.2487, "step": 4584, "teacher_loss": 0.25777795910835266 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.44153517484664917, "learning_rate": 1.988578863669221e-05, "loss": 0.2454, "step": 4585, "teacher_loss": 0.22357740998268127 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.6416473984718323, "learning_rate": 1.9890125777070987e-05, "loss": 0.2902, "step": 4586, "teacher_loss": 0.25120314955711365 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.32736456394195557, "learning_rate": 1.989446291744976e-05, "loss": 0.2237, "step": 4587, "teacher_loss": 0.21213370561599731 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.2532444894313812, "learning_rate": 1.989880005782854e-05, "loss": 0.2629, "step": 4588, "teacher_loss": 0.26399821043014526 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.41060197353363037, "learning_rate": 1.9903137198207316e-05, "loss": 0.2325, "step": 4589, "teacher_loss": 0.2126852124929428 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.42801234126091003, "learning_rate": 1.9907474338586094e-05, "loss": 0.2545, "step": 4590, "teacher_loss": 0.23517832159996033 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.2867772877216339, "learning_rate": 1.991181147896487e-05, "loss": 0.2157, "step": 4591, "teacher_loss": 0.20779916644096375 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.2646481692790985, "learning_rate": 1.9916148619343646e-05, "loss": 0.2082, "step": 4592, "teacher_loss": 0.20197591185569763 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.6676338911056519, "learning_rate": 1.9920485759722423e-05, "loss": 0.2677, "step": 4593, "teacher_loss": 0.2232159674167633 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.2056477963924408, "learning_rate": 1.99248229001012e-05, "loss": 0.2409, "step": 4594, "teacher_loss": 0.24477984011173248 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.4429328143596649, "learning_rate": 1.992916004047998e-05, "loss": 0.2565, "step": 4595, "teacher_loss": 0.2358298897743225 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.5248163938522339, "learning_rate": 1.9933497180858753e-05, "loss": 0.2305, "step": 4596, "teacher_loss": 0.19779440760612488 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.5143501162528992, "learning_rate": 1.993783432123753e-05, "loss": 0.1966, "step": 4597, "teacher_loss": 0.16128087043762207 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.27569353580474854, "learning_rate": 1.9942171461616308e-05, "loss": 0.2339, "step": 4598, "teacher_loss": 0.22930589318275452 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.28145936131477356, "learning_rate": 1.9946508601995086e-05, "loss": 0.3269, "step": 4599, "teacher_loss": 0.33198282122612 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.25473153591156006, "learning_rate": 1.9950845742373863e-05, "loss": 0.1541, "step": 4600, "teacher_loss": 0.14293237030506134 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.6630728840827942, "learning_rate": 1.995518288275264e-05, "loss": 0.297, "step": 4601, "teacher_loss": 0.256326287984848 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.38394030928611755, "learning_rate": 1.995952002313142e-05, "loss": 0.182, "step": 4602, "teacher_loss": 0.15960556268692017 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.28238391876220703, "learning_rate": 1.9963857163510193e-05, "loss": 0.2436, "step": 4603, "teacher_loss": 0.23925140500068665 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.59486985206604, "learning_rate": 1.9968194303888967e-05, "loss": 0.2538, "step": 4604, "teacher_loss": 0.21590998768806458 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.18063107132911682, "learning_rate": 1.9972531444267744e-05, "loss": 0.238, "step": 4605, "teacher_loss": 0.24436254799365997 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.18290956318378448, "learning_rate": 1.9976868584646522e-05, "loss": 0.1972, "step": 4606, "teacher_loss": 0.19878965616226196 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.34946557879447937, "learning_rate": 1.99812057250253e-05, "loss": 0.2433, "step": 4607, "teacher_loss": 0.23149235546588898 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.38810551166534424, "learning_rate": 1.9985542865404077e-05, "loss": 0.2659, "step": 4608, "teacher_loss": 0.25229862332344055 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.13445571064949036, "learning_rate": 1.9989880005782855e-05, "loss": 0.1525, "step": 4609, "teacher_loss": 0.15454548597335815 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.5226759314537048, "learning_rate": 1.9994217146161633e-05, "loss": 0.2731, "step": 4610, "teacher_loss": 0.24531565606594086 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.5696532726287842, "learning_rate": 1.999855428654041e-05, "loss": 0.2858, "step": 4611, "teacher_loss": 0.25429433584213257 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.2213011384010315, "learning_rate": 2.0002891426919188e-05, "loss": 0.176, "step": 4612, "teacher_loss": 0.17096082866191864 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.4153340458869934, "learning_rate": 2.0007228567297962e-05, "loss": 0.2592, "step": 4613, "teacher_loss": 0.24187862873077393 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.2847558557987213, "learning_rate": 2.0011565707676736e-05, "loss": 0.2076, "step": 4614, "teacher_loss": 0.19904077053070068 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.567826509475708, "learning_rate": 2.0015902848055514e-05, "loss": 0.253, "step": 4615, "teacher_loss": 0.21798181533813477 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.4530789852142334, "learning_rate": 2.002023998843429e-05, "loss": 0.2934, "step": 4616, "teacher_loss": 0.27566760778427124 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.748852550983429, "learning_rate": 2.002457712881307e-05, "loss": 0.4632, "step": 4617, "teacher_loss": 0.43142637610435486 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.43120384216308594, "learning_rate": 2.0028914269191847e-05, "loss": 0.2315, "step": 4618, "teacher_loss": 0.20936301350593567 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.6893396377563477, "learning_rate": 2.0033251409570624e-05, "loss": 0.2862, "step": 4619, "teacher_loss": 0.2414306104183197 }, { "compression_loss": 0.0, "epoch": 0.83, "label_loss": 0.7687826156616211, "learning_rate": 2.0037588549949402e-05, "loss": 0.349, "step": 4620, "teacher_loss": 0.30236169695854187 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.30291542410850525, "learning_rate": 2.004192569032818e-05, "loss": 0.1647, "step": 4621, "teacher_loss": 0.14930902421474457 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.7168831825256348, "learning_rate": 2.0046262830706954e-05, "loss": 0.2702, "step": 4622, "teacher_loss": 0.2206028550863266 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.14678002893924713, "learning_rate": 2.005059997108573e-05, "loss": 0.1741, "step": 4623, "teacher_loss": 0.17716410756111145 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.7131956219673157, "learning_rate": 2.005493711146451e-05, "loss": 0.2812, "step": 4624, "teacher_loss": 0.2332216054201126 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.4446783661842346, "learning_rate": 2.0059274251843283e-05, "loss": 0.3172, "step": 4625, "teacher_loss": 0.3030606806278229 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.5094561576843262, "learning_rate": 2.006361139222206e-05, "loss": 0.2817, "step": 4626, "teacher_loss": 0.256378173828125 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.19025397300720215, "learning_rate": 2.006794853260084e-05, "loss": 0.3337, "step": 4627, "teacher_loss": 0.3495849370956421 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.22981694340705872, "learning_rate": 2.0072285672979616e-05, "loss": 0.2523, "step": 4628, "teacher_loss": 0.25481337308883667 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.505085825920105, "learning_rate": 2.0076622813358394e-05, "loss": 0.2327, "step": 4629, "teacher_loss": 0.20246979594230652 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.28029829263687134, "learning_rate": 2.008095995373717e-05, "loss": 0.2025, "step": 4630, "teacher_loss": 0.1938786804676056 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.4046053886413574, "learning_rate": 2.0085297094115946e-05, "loss": 0.247, "step": 4631, "teacher_loss": 0.22950223088264465 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.6568427681922913, "learning_rate": 2.0089634234494723e-05, "loss": 0.2357, "step": 4632, "teacher_loss": 0.1888718605041504 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.4127659797668457, "learning_rate": 2.00939713748735e-05, "loss": 0.4528, "step": 4633, "teacher_loss": 0.45723968744277954 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.6477503776550293, "learning_rate": 2.009830851525228e-05, "loss": 0.2743, "step": 4634, "teacher_loss": 0.23280251026153564 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.0879109799861908, "learning_rate": 2.0102645655631053e-05, "loss": 0.1873, "step": 4635, "teacher_loss": 0.1983334869146347 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.7636901140213013, "learning_rate": 2.010698279600983e-05, "loss": 0.2582, "step": 4636, "teacher_loss": 0.20200307667255402 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.30433574318885803, "learning_rate": 2.0111319936388608e-05, "loss": 0.1881, "step": 4637, "teacher_loss": 0.17519734799861908 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.40762999653816223, "learning_rate": 2.0115657076767386e-05, "loss": 0.2769, "step": 4638, "teacher_loss": 0.26240506768226624 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.33742058277130127, "learning_rate": 2.0119994217146163e-05, "loss": 0.2132, "step": 4639, "teacher_loss": 0.1993464231491089 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.4958832859992981, "learning_rate": 2.0124331357524938e-05, "loss": 0.4503, "step": 4640, "teacher_loss": 0.4452378749847412 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.5890842080116272, "learning_rate": 2.0128668497903715e-05, "loss": 0.2796, "step": 4641, "teacher_loss": 0.24525412917137146 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.4865787625312805, "learning_rate": 2.0133005638282493e-05, "loss": 0.3788, "step": 4642, "teacher_loss": 0.3668323755264282 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.5590130090713501, "learning_rate": 2.013734277866127e-05, "loss": 0.2197, "step": 4643, "teacher_loss": 0.18195965886116028 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.39190101623535156, "learning_rate": 2.0141679919040048e-05, "loss": 0.2175, "step": 4644, "teacher_loss": 0.19811929762363434 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.6120504140853882, "learning_rate": 2.0146017059418826e-05, "loss": 0.2305, "step": 4645, "teacher_loss": 0.18812735378742218 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.39701128005981445, "learning_rate": 2.01503541997976e-05, "loss": 0.209, "step": 4646, "teacher_loss": 0.18812063336372375 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.416189044713974, "learning_rate": 2.0154691340176378e-05, "loss": 0.229, "step": 4647, "teacher_loss": 0.2081766426563263 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.4827533960342407, "learning_rate": 2.0159028480555155e-05, "loss": 0.3182, "step": 4648, "teacher_loss": 0.29995983839035034 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.2939597964286804, "learning_rate": 2.016336562093393e-05, "loss": 0.2501, "step": 4649, "teacher_loss": 0.2452666312456131 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.19988112151622772, "learning_rate": 2.0167702761312707e-05, "loss": 0.178, "step": 4650, "teacher_loss": 0.17560827732086182 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.5346502065658569, "learning_rate": 2.0172039901691485e-05, "loss": 0.3132, "step": 4651, "teacher_loss": 0.28857722878456116 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.8323071002960205, "learning_rate": 2.0176377042070262e-05, "loss": 0.3608, "step": 4652, "teacher_loss": 0.30846092104911804 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.47282472252845764, "learning_rate": 2.018071418244904e-05, "loss": 0.251, "step": 4653, "teacher_loss": 0.226351797580719 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.18234100937843323, "learning_rate": 2.0185051322827817e-05, "loss": 0.1523, "step": 4654, "teacher_loss": 0.14896777272224426 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.7059367895126343, "learning_rate": 2.0189388463206595e-05, "loss": 0.3869, "step": 4655, "teacher_loss": 0.351399302482605 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.3463664650917053, "learning_rate": 2.0193725603585373e-05, "loss": 0.3583, "step": 4656, "teacher_loss": 0.3596689701080322 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.2776446044445038, "learning_rate": 2.0198062743964144e-05, "loss": 0.2119, "step": 4657, "teacher_loss": 0.20459237694740295 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.3087308406829834, "learning_rate": 2.020239988434292e-05, "loss": 0.2, "step": 4658, "teacher_loss": 0.18793678283691406 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.6109752655029297, "learning_rate": 2.02067370247217e-05, "loss": 0.2374, "step": 4659, "teacher_loss": 0.19585120677947998 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.5284692049026489, "learning_rate": 2.0211074165100476e-05, "loss": 0.2099, "step": 4660, "teacher_loss": 0.1745302379131317 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.7406220436096191, "learning_rate": 2.0215411305479254e-05, "loss": 0.2921, "step": 4661, "teacher_loss": 0.24231459200382233 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.7132091522216797, "learning_rate": 2.021974844585803e-05, "loss": 0.2869, "step": 4662, "teacher_loss": 0.23954693973064423 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.40481775999069214, "learning_rate": 2.022408558623681e-05, "loss": 0.2464, "step": 4663, "teacher_loss": 0.22883744537830353 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.9858428835868835, "learning_rate": 2.0228422726615587e-05, "loss": 0.3668, "step": 4664, "teacher_loss": 0.29804179072380066 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.21836432814598083, "learning_rate": 2.0232759866994365e-05, "loss": 0.2111, "step": 4665, "teacher_loss": 0.2102908492088318 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.37891945242881775, "learning_rate": 2.023709700737314e-05, "loss": 0.2736, "step": 4666, "teacher_loss": 0.2619180679321289 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.4839324951171875, "learning_rate": 2.0241434147751916e-05, "loss": 0.231, "step": 4667, "teacher_loss": 0.20291519165039062 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.49740272760391235, "learning_rate": 2.024577128813069e-05, "loss": 0.216, "step": 4668, "teacher_loss": 0.184719979763031 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.429978609085083, "learning_rate": 2.0250108428509468e-05, "loss": 0.2008, "step": 4669, "teacher_loss": 0.1753380298614502 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.833977997303009, "learning_rate": 2.0254445568888246e-05, "loss": 0.2871, "step": 4670, "teacher_loss": 0.22636312246322632 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.6584421396255493, "learning_rate": 2.0258782709267024e-05, "loss": 0.2628, "step": 4671, "teacher_loss": 0.2188015580177307 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.18187645077705383, "learning_rate": 2.02631198496458e-05, "loss": 0.2039, "step": 4672, "teacher_loss": 0.20632772147655487 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.22288449108600616, "learning_rate": 2.026745699002458e-05, "loss": 0.199, "step": 4673, "teacher_loss": 0.19632232189178467 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.6050679087638855, "learning_rate": 2.0271794130403356e-05, "loss": 0.284, "step": 4674, "teacher_loss": 0.24833041429519653 }, { "compression_loss": 0.0, "epoch": 0.84, "label_loss": 0.35629603266716003, "learning_rate": 2.027613127078213e-05, "loss": 0.2249, "step": 4675, "teacher_loss": 0.21033364534378052 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.36245405673980713, "learning_rate": 2.0280468411160908e-05, "loss": 0.2791, "step": 4676, "teacher_loss": 0.26981133222579956 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.1875852793455124, "learning_rate": 2.0284805551539686e-05, "loss": 0.194, "step": 4677, "teacher_loss": 0.19476372003555298 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.32758450508117676, "learning_rate": 2.0289142691918463e-05, "loss": 0.2004, "step": 4678, "teacher_loss": 0.18627595901489258 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.7231752872467041, "learning_rate": 2.0293479832297238e-05, "loss": 0.2664, "step": 4679, "teacher_loss": 0.21566519141197205 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.3958778381347656, "learning_rate": 2.0297816972676015e-05, "loss": 0.2049, "step": 4680, "teacher_loss": 0.18370817601680756 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.8475362658500671, "learning_rate": 2.0302154113054793e-05, "loss": 0.3372, "step": 4681, "teacher_loss": 0.2804933786392212 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.40638116002082825, "learning_rate": 2.030649125343357e-05, "loss": 0.3294, "step": 4682, "teacher_loss": 0.320858895778656 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.5833108425140381, "learning_rate": 2.0310828393812348e-05, "loss": 0.2914, "step": 4683, "teacher_loss": 0.2590058147907257 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.3511325418949127, "learning_rate": 2.0315165534191122e-05, "loss": 0.227, "step": 4684, "teacher_loss": 0.21316352486610413 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.7975261807441711, "learning_rate": 2.03195026745699e-05, "loss": 0.316, "step": 4685, "teacher_loss": 0.26249682903289795 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.5050575137138367, "learning_rate": 2.0323839814948678e-05, "loss": 0.2924, "step": 4686, "teacher_loss": 0.2688046097755432 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.820890486240387, "learning_rate": 2.0328176955327455e-05, "loss": 0.3725, "step": 4687, "teacher_loss": 0.32263654470443726 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.3852985203266144, "learning_rate": 2.0332514095706233e-05, "loss": 0.257, "step": 4688, "teacher_loss": 0.24279460310935974 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.34823161363601685, "learning_rate": 2.033685123608501e-05, "loss": 0.1979, "step": 4689, "teacher_loss": 0.1811620444059372 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.4771634638309479, "learning_rate": 2.0341188376463785e-05, "loss": 0.307, "step": 4690, "teacher_loss": 0.2881277799606323 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.4022625684738159, "learning_rate": 2.0345525516842562e-05, "loss": 0.3098, "step": 4691, "teacher_loss": 0.2995000183582306 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.36342519521713257, "learning_rate": 2.034986265722134e-05, "loss": 0.2023, "step": 4692, "teacher_loss": 0.18435952067375183 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.8731607794761658, "learning_rate": 2.0354199797600114e-05, "loss": 0.2817, "step": 4693, "teacher_loss": 0.2159455120563507 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.350460410118103, "learning_rate": 2.0358536937978892e-05, "loss": 0.2447, "step": 4694, "teacher_loss": 0.23291495442390442 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.10431374609470367, "learning_rate": 2.036287407835767e-05, "loss": 0.1775, "step": 4695, "teacher_loss": 0.18567220866680145 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.32678449153900146, "learning_rate": 2.0367211218736447e-05, "loss": 0.2206, "step": 4696, "teacher_loss": 0.20879864692687988 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.4416809380054474, "learning_rate": 2.0371548359115225e-05, "loss": 0.3525, "step": 4697, "teacher_loss": 0.34253618121147156 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.410899817943573, "learning_rate": 2.0375885499494002e-05, "loss": 0.3245, "step": 4698, "teacher_loss": 0.3148714303970337 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.40641552209854126, "learning_rate": 2.038022263987278e-05, "loss": 0.2325, "step": 4699, "teacher_loss": 0.21312814950942993 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.24704644083976746, "learning_rate": 2.0384559780251558e-05, "loss": 0.2203, "step": 4700, "teacher_loss": 0.21727995574474335 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.20583905279636383, "learning_rate": 2.038889692063033e-05, "loss": 0.2012, "step": 4701, "teacher_loss": 0.2007027566432953 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.17191235721111298, "learning_rate": 2.0393234061009106e-05, "loss": 0.2109, "step": 4702, "teacher_loss": 0.2152131199836731 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.2472323328256607, "learning_rate": 2.0397571201387884e-05, "loss": 0.2013, "step": 4703, "teacher_loss": 0.1961841881275177 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.7080936431884766, "learning_rate": 2.040190834176666e-05, "loss": 0.3386, "step": 4704, "teacher_loss": 0.2975391447544098 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.32597437500953674, "learning_rate": 2.040624548214544e-05, "loss": 0.221, "step": 4705, "teacher_loss": 0.20930588245391846 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.43104469776153564, "learning_rate": 2.0410582622524217e-05, "loss": 0.2809, "step": 4706, "teacher_loss": 0.2642444372177124 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.2714369297027588, "learning_rate": 2.0414919762902994e-05, "loss": 0.1665, "step": 4707, "teacher_loss": 0.15481694042682648 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.4421897232532501, "learning_rate": 2.0419256903281772e-05, "loss": 0.4849, "step": 4708, "teacher_loss": 0.4896079897880554 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.16248080134391785, "learning_rate": 2.042359404366055e-05, "loss": 0.1535, "step": 4709, "teacher_loss": 0.15255028009414673 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.45376864075660706, "learning_rate": 2.0427931184039324e-05, "loss": 0.2726, "step": 4710, "teacher_loss": 0.2524913549423218 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 1.007807731628418, "learning_rate": 2.04322683244181e-05, "loss": 0.2875, "step": 4711, "teacher_loss": 0.20751619338989258 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.3113197088241577, "learning_rate": 2.0436605464796876e-05, "loss": 0.2072, "step": 4712, "teacher_loss": 0.19564369320869446 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.5506263971328735, "learning_rate": 2.0440942605175653e-05, "loss": 0.269, "step": 4713, "teacher_loss": 0.2377415895462036 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.4580322504043579, "learning_rate": 2.044527974555443e-05, "loss": 0.2799, "step": 4714, "teacher_loss": 0.2601166367530823 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.3905421495437622, "learning_rate": 2.044961688593321e-05, "loss": 0.198, "step": 4715, "teacher_loss": 0.17661559581756592 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.10462985932826996, "learning_rate": 2.0453954026311986e-05, "loss": 0.1959, "step": 4716, "teacher_loss": 0.20600327849388123 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.7212705016136169, "learning_rate": 2.0458291166690764e-05, "loss": 0.4682, "step": 4717, "teacher_loss": 0.4401141107082367 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.6866236925125122, "learning_rate": 2.046262830706954e-05, "loss": 0.2658, "step": 4718, "teacher_loss": 0.21898984909057617 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.3656891882419586, "learning_rate": 2.0466965447448315e-05, "loss": 0.1876, "step": 4719, "teacher_loss": 0.16785411536693573 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.32728344202041626, "learning_rate": 2.0471302587827093e-05, "loss": 0.2362, "step": 4720, "teacher_loss": 0.22608011960983276 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.3760058283805847, "learning_rate": 2.047563972820587e-05, "loss": 0.2329, "step": 4721, "teacher_loss": 0.2169874608516693 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.5055915117263794, "learning_rate": 2.047997686858465e-05, "loss": 0.2606, "step": 4722, "teacher_loss": 0.23332643508911133 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.5275909900665283, "learning_rate": 2.0484314008963423e-05, "loss": 0.2928, "step": 4723, "teacher_loss": 0.2667027711868286 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.1965922713279724, "learning_rate": 2.04886511493422e-05, "loss": 0.1563, "step": 4724, "teacher_loss": 0.15185774862766266 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.48514658212661743, "learning_rate": 2.0492988289720978e-05, "loss": 0.272, "step": 4725, "teacher_loss": 0.24828682839870453 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.2736703157424927, "learning_rate": 2.0497325430099755e-05, "loss": 0.2071, "step": 4726, "teacher_loss": 0.1997268944978714 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.266363263130188, "learning_rate": 2.0501662570478533e-05, "loss": 0.212, "step": 4727, "teacher_loss": 0.20596735179424286 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.21976739168167114, "learning_rate": 2.0505999710857307e-05, "loss": 0.23, "step": 4728, "teacher_loss": 0.2310842126607895 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.304928719997406, "learning_rate": 2.0510336851236085e-05, "loss": 0.2285, "step": 4729, "teacher_loss": 0.2199726700782776 }, { "compression_loss": 0.0, "epoch": 0.85, "label_loss": 0.3335579037666321, "learning_rate": 2.0514673991614863e-05, "loss": 0.3004, "step": 4730, "teacher_loss": 0.29673337936401367 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.10645858943462372, "learning_rate": 2.051901113199364e-05, "loss": 0.133, "step": 4731, "teacher_loss": 0.13590675592422485 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.5253108739852905, "learning_rate": 2.0523348272372418e-05, "loss": 0.2455, "step": 4732, "teacher_loss": 0.21445125341415405 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.1407293975353241, "learning_rate": 2.0527685412751192e-05, "loss": 0.123, "step": 4733, "teacher_loss": 0.12104855477809906 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.49479037523269653, "learning_rate": 2.053202255312997e-05, "loss": 0.2792, "step": 4734, "teacher_loss": 0.2552777826786041 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.3112625479698181, "learning_rate": 2.0536359693508747e-05, "loss": 0.2552, "step": 4735, "teacher_loss": 0.24896648526191711 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.21511104702949524, "learning_rate": 2.0540696833887525e-05, "loss": 0.2539, "step": 4736, "teacher_loss": 0.25825080275535583 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.18832336366176605, "learning_rate": 2.05450339742663e-05, "loss": 0.2253, "step": 4737, "teacher_loss": 0.22936120629310608 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.24616801738739014, "learning_rate": 2.0549371114645077e-05, "loss": 0.2645, "step": 4738, "teacher_loss": 0.2665729522705078 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.446675568819046, "learning_rate": 2.0553708255023854e-05, "loss": 0.2128, "step": 4739, "teacher_loss": 0.18684086203575134 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.33981913328170776, "learning_rate": 2.0558045395402632e-05, "loss": 0.3022, "step": 4740, "teacher_loss": 0.29804426431655884 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.609460711479187, "learning_rate": 2.056238253578141e-05, "loss": 0.2438, "step": 4741, "teacher_loss": 0.20321688055992126 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.5373461246490479, "learning_rate": 2.0566719676160187e-05, "loss": 0.2257, "step": 4742, "teacher_loss": 0.1911269724369049 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.34360814094543457, "learning_rate": 2.0571056816538965e-05, "loss": 0.195, "step": 4743, "teacher_loss": 0.1784539669752121 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.21057409048080444, "learning_rate": 2.057539395691774e-05, "loss": 0.2385, "step": 4744, "teacher_loss": 0.24161408841609955 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.923721432685852, "learning_rate": 2.0579731097296513e-05, "loss": 0.2734, "step": 4745, "teacher_loss": 0.20118948817253113 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.541483998298645, "learning_rate": 2.058406823767529e-05, "loss": 0.321, "step": 4746, "teacher_loss": 0.296536386013031 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.5715552568435669, "learning_rate": 2.058840537805407e-05, "loss": 0.2268, "step": 4747, "teacher_loss": 0.18853317201137543 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.40522849559783936, "learning_rate": 2.0592742518432846e-05, "loss": 0.2422, "step": 4748, "teacher_loss": 0.22406047582626343 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.4761125147342682, "learning_rate": 2.0597079658811624e-05, "loss": 0.3276, "step": 4749, "teacher_loss": 0.31105393171310425 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.3519165813922882, "learning_rate": 2.06014167991904e-05, "loss": 0.3553, "step": 4750, "teacher_loss": 0.3556811809539795 }, { "epoch": 0.86, "eval_exact_match": 79.62157048249763, "eval_f1": 87.10371369303293, "step": 4750 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.4493677616119385, "learning_rate": 2.060575393956918e-05, "loss": 0.2374, "step": 4751, "teacher_loss": 0.21380159258842468 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.3715413510799408, "learning_rate": 2.0610091079947957e-05, "loss": 0.2188, "step": 4752, "teacher_loss": 0.20183061063289642 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.22725386917591095, "learning_rate": 2.0614428220326734e-05, "loss": 0.2443, "step": 4753, "teacher_loss": 0.24614199995994568 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.6654298305511475, "learning_rate": 2.061876536070551e-05, "loss": 0.2793, "step": 4754, "teacher_loss": 0.2363489270210266 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.27652814984321594, "learning_rate": 2.0623102501084283e-05, "loss": 0.1607, "step": 4755, "teacher_loss": 0.14788006246089935 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.5477147102355957, "learning_rate": 2.062743964146306e-05, "loss": 0.2934, "step": 4756, "teacher_loss": 0.2651033103466034 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.7871124148368835, "learning_rate": 2.0631776781841838e-05, "loss": 0.2691, "step": 4757, "teacher_loss": 0.21153008937835693 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.26898670196533203, "learning_rate": 2.0636113922220616e-05, "loss": 0.1877, "step": 4758, "teacher_loss": 0.17865067720413208 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.8086433410644531, "learning_rate": 2.0640451062599393e-05, "loss": 0.371, "step": 4759, "teacher_loss": 0.32240110635757446 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.5675837397575378, "learning_rate": 2.064478820297817e-05, "loss": 0.228, "step": 4760, "teacher_loss": 0.19025897979736328 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.3095381259918213, "learning_rate": 2.064912534335695e-05, "loss": 0.2732, "step": 4761, "teacher_loss": 0.26919642090797424 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.4400290846824646, "learning_rate": 2.0653462483735726e-05, "loss": 0.3156, "step": 4762, "teacher_loss": 0.3018299639225006 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.3071986436843872, "learning_rate": 2.06577996241145e-05, "loss": 0.2976, "step": 4763, "teacher_loss": 0.29654982686042786 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.6105849146842957, "learning_rate": 2.0662136764493278e-05, "loss": 0.2293, "step": 4764, "teacher_loss": 0.18698062002658844 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.24157050251960754, "learning_rate": 2.0666473904872056e-05, "loss": 0.1932, "step": 4765, "teacher_loss": 0.1878001093864441 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.3392367362976074, "learning_rate": 2.067081104525083e-05, "loss": 0.1733, "step": 4766, "teacher_loss": 0.1549014449119568 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.40825098752975464, "learning_rate": 2.0675148185629607e-05, "loss": 0.2354, "step": 4767, "teacher_loss": 0.21617184579372406 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.08453512191772461, "learning_rate": 2.0679485326008385e-05, "loss": 0.1945, "step": 4768, "teacher_loss": 0.20673884451389313 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.5940424203872681, "learning_rate": 2.0683822466387163e-05, "loss": 0.2733, "step": 4769, "teacher_loss": 0.23763887584209442 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.4813592731952667, "learning_rate": 2.068815960676594e-05, "loss": 0.3155, "step": 4770, "teacher_loss": 0.2970581650733948 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.2245270013809204, "learning_rate": 2.0692496747144718e-05, "loss": 0.2075, "step": 4771, "teacher_loss": 0.20565257966518402 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.7164108157157898, "learning_rate": 2.0696833887523492e-05, "loss": 0.283, "step": 4772, "teacher_loss": 0.23480072617530823 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.2889282703399658, "learning_rate": 2.070117102790227e-05, "loss": 0.237, "step": 4773, "teacher_loss": 0.2312161773443222 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.38308480381965637, "learning_rate": 2.0705508168281047e-05, "loss": 0.2192, "step": 4774, "teacher_loss": 0.20097298920154572 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.5842920541763306, "learning_rate": 2.0709845308659825e-05, "loss": 0.2573, "step": 4775, "teacher_loss": 0.2209763079881668 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.4704876244068146, "learning_rate": 2.0714182449038603e-05, "loss": 0.2203, "step": 4776, "teacher_loss": 0.19244609773159027 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.2384336292743683, "learning_rate": 2.0718519589417377e-05, "loss": 0.1677, "step": 4777, "teacher_loss": 0.1598646491765976 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.5685132741928101, "learning_rate": 2.0722856729796155e-05, "loss": 0.25, "step": 4778, "teacher_loss": 0.2146632820367813 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.17542965710163116, "learning_rate": 2.0727193870174932e-05, "loss": 0.1818, "step": 4779, "teacher_loss": 0.18249854445457458 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.36382240056991577, "learning_rate": 2.073153101055371e-05, "loss": 0.2315, "step": 4780, "teacher_loss": 0.2167939394712448 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.36647355556488037, "learning_rate": 2.0735868150932484e-05, "loss": 0.2922, "step": 4781, "teacher_loss": 0.28393611311912537 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.4240023195743561, "learning_rate": 2.074020529131126e-05, "loss": 0.1608, "step": 4782, "teacher_loss": 0.13160736858844757 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.3196992874145508, "learning_rate": 2.074454243169004e-05, "loss": 0.2416, "step": 4783, "teacher_loss": 0.23295104503631592 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.5352063179016113, "learning_rate": 2.0748879572068817e-05, "loss": 0.2278, "step": 4784, "teacher_loss": 0.1936257779598236 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.6432743072509766, "learning_rate": 2.0753216712447594e-05, "loss": 0.3525, "step": 4785, "teacher_loss": 0.3202311396598816 }, { "compression_loss": 0.0, "epoch": 0.86, "label_loss": 0.3428359031677246, "learning_rate": 2.0757553852826372e-05, "loss": 0.2875, "step": 4786, "teacher_loss": 0.28130775690078735 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.06807465851306915, "learning_rate": 2.076189099320515e-05, "loss": 0.1071, "step": 4787, "teacher_loss": 0.11140544712543488 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.4267340898513794, "learning_rate": 2.0766228133583924e-05, "loss": 0.334, "step": 4788, "teacher_loss": 0.32367902994155884 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.6794570088386536, "learning_rate": 2.07705652739627e-05, "loss": 0.335, "step": 4789, "teacher_loss": 0.29668188095092773 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.6524490118026733, "learning_rate": 2.0774902414341476e-05, "loss": 0.3174, "step": 4790, "teacher_loss": 0.2801551818847656 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.5418270230293274, "learning_rate": 2.0779239554720253e-05, "loss": 0.2498, "step": 4791, "teacher_loss": 0.2173733413219452 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.3127993047237396, "learning_rate": 2.078357669509903e-05, "loss": 0.2236, "step": 4792, "teacher_loss": 0.21372443437576294 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.36337536573410034, "learning_rate": 2.078791383547781e-05, "loss": 0.2492, "step": 4793, "teacher_loss": 0.23648947477340698 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.3192163109779358, "learning_rate": 2.0792250975856586e-05, "loss": 0.2098, "step": 4794, "teacher_loss": 0.1975913941860199 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.44483301043510437, "learning_rate": 2.0796588116235364e-05, "loss": 0.2089, "step": 4795, "teacher_loss": 0.18273496627807617 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.35255932807922363, "learning_rate": 2.080092525661414e-05, "loss": 0.2944, "step": 4796, "teacher_loss": 0.28792160749435425 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.4680088460445404, "learning_rate": 2.080526239699292e-05, "loss": 0.2357, "step": 4797, "teacher_loss": 0.20989438891410828 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.3696249723434448, "learning_rate": 2.0809599537371693e-05, "loss": 0.2488, "step": 4798, "teacher_loss": 0.23542523384094238 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.6639704704284668, "learning_rate": 2.0813936677750468e-05, "loss": 0.3229, "step": 4799, "teacher_loss": 0.2850167751312256 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.6983274817466736, "learning_rate": 2.0818273818129245e-05, "loss": 0.299, "step": 4800, "teacher_loss": 0.25460439920425415 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.4615797996520996, "learning_rate": 2.0822610958508023e-05, "loss": 0.2763, "step": 4801, "teacher_loss": 0.2557072639465332 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.31135794520378113, "learning_rate": 2.08269480988868e-05, "loss": 0.2069, "step": 4802, "teacher_loss": 0.19526076316833496 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.7562644481658936, "learning_rate": 2.0831285239265578e-05, "loss": 0.2376, "step": 4803, "teacher_loss": 0.17996902763843536 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.12007440626621246, "learning_rate": 2.0835622379644356e-05, "loss": 0.218, "step": 4804, "teacher_loss": 0.2288973182439804 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.5890671014785767, "learning_rate": 2.0839959520023133e-05, "loss": 0.2444, "step": 4805, "teacher_loss": 0.20609742403030396 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.30036479234695435, "learning_rate": 2.084429666040191e-05, "loss": 0.1994, "step": 4806, "teacher_loss": 0.18820902705192566 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.47405874729156494, "learning_rate": 2.0848633800780685e-05, "loss": 0.2548, "step": 4807, "teacher_loss": 0.23042921721935272 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.4083493947982788, "learning_rate": 2.0852970941159463e-05, "loss": 0.2213, "step": 4808, "teacher_loss": 0.20048563182353973 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.34885621070861816, "learning_rate": 2.085730808153824e-05, "loss": 0.2864, "step": 4809, "teacher_loss": 0.27946630120277405 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.2252580225467682, "learning_rate": 2.0861645221917015e-05, "loss": 0.1794, "step": 4810, "teacher_loss": 0.17431147396564484 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.4814949631690979, "learning_rate": 2.0865982362295792e-05, "loss": 0.2516, "step": 4811, "teacher_loss": 0.22600892186164856 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.518089771270752, "learning_rate": 2.087031950267457e-05, "loss": 0.4367, "step": 4812, "teacher_loss": 0.4276808500289917 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.20721884071826935, "learning_rate": 2.0874656643053348e-05, "loss": 0.1723, "step": 4813, "teacher_loss": 0.16841307282447815 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.4128264784812927, "learning_rate": 2.0878993783432125e-05, "loss": 0.2428, "step": 4814, "teacher_loss": 0.22395160794258118 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.3508679270744324, "learning_rate": 2.0883330923810903e-05, "loss": 0.156, "step": 4815, "teacher_loss": 0.13437426090240479 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.3383867144584656, "learning_rate": 2.0887668064189677e-05, "loss": 0.2763, "step": 4816, "teacher_loss": 0.2693929672241211 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.8833049535751343, "learning_rate": 2.0892005204568455e-05, "loss": 0.2856, "step": 4817, "teacher_loss": 0.2191411554813385 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.499011754989624, "learning_rate": 2.0896342344947232e-05, "loss": 0.2837, "step": 4818, "teacher_loss": 0.25977832078933716 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.1661023199558258, "learning_rate": 2.090067948532601e-05, "loss": 0.1895, "step": 4819, "teacher_loss": 0.19215211272239685 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.35957083106040955, "learning_rate": 2.0905016625704788e-05, "loss": 0.2111, "step": 4820, "teacher_loss": 0.1946393847465515 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.9085617661476135, "learning_rate": 2.0909353766083562e-05, "loss": 0.3808, "step": 4821, "teacher_loss": 0.32211828231811523 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.8703345060348511, "learning_rate": 2.091369090646234e-05, "loss": 0.2934, "step": 4822, "teacher_loss": 0.22929087281227112 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.44223934412002563, "learning_rate": 2.0918028046841117e-05, "loss": 0.2689, "step": 4823, "teacher_loss": 0.24968111515045166 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.5612695217132568, "learning_rate": 2.0922365187219895e-05, "loss": 0.3717, "step": 4824, "teacher_loss": 0.3506481647491455 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.46431466937065125, "learning_rate": 2.092670232759867e-05, "loss": 0.3103, "step": 4825, "teacher_loss": 0.293241024017334 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.6635532975196838, "learning_rate": 2.0931039467977447e-05, "loss": 0.2327, "step": 4826, "teacher_loss": 0.1848260760307312 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.6287559866905212, "learning_rate": 2.0935376608356224e-05, "loss": 0.3335, "step": 4827, "teacher_loss": 0.3006608486175537 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.4950222373008728, "learning_rate": 2.0939713748735002e-05, "loss": 0.2394, "step": 4828, "teacher_loss": 0.21098008751869202 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.5812622308731079, "learning_rate": 2.094405088911378e-05, "loss": 0.2256, "step": 4829, "teacher_loss": 0.18605396151542664 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.6748309135437012, "learning_rate": 2.0948388029492557e-05, "loss": 0.621, "step": 4830, "teacher_loss": 0.6150619387626648 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 1.085985541343689, "learning_rate": 2.095272516987133e-05, "loss": 0.3593, "step": 4831, "teacher_loss": 0.27858567237854004 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.8553024530410767, "learning_rate": 2.095706231025011e-05, "loss": 0.5284, "step": 4832, "teacher_loss": 0.4920288920402527 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.3057997226715088, "learning_rate": 2.0961399450628886e-05, "loss": 0.2111, "step": 4833, "teacher_loss": 0.20057693123817444 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.3562811017036438, "learning_rate": 2.096573659100766e-05, "loss": 0.2201, "step": 4834, "teacher_loss": 0.20495307445526123 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.7519885301589966, "learning_rate": 2.097007373138644e-05, "loss": 0.3094, "step": 4835, "teacher_loss": 0.2602734863758087 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.6169558763504028, "learning_rate": 2.0974410871765216e-05, "loss": 0.2239, "step": 4836, "teacher_loss": 0.18018747866153717 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.3238542377948761, "learning_rate": 2.0978748012143994e-05, "loss": 0.248, "step": 4837, "teacher_loss": 0.2396085262298584 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.3412906229496002, "learning_rate": 2.098308515252277e-05, "loss": 0.2161, "step": 4838, "teacher_loss": 0.2022000253200531 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.5752498507499695, "learning_rate": 2.098742229290155e-05, "loss": 0.2153, "step": 4839, "teacher_loss": 0.17532062530517578 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.24048426747322083, "learning_rate": 2.0991759433280326e-05, "loss": 0.1937, "step": 4840, "teacher_loss": 0.1885371208190918 }, { "compression_loss": 0.0, "epoch": 0.87, "label_loss": 0.927212119102478, "learning_rate": 2.0996096573659104e-05, "loss": 0.2854, "step": 4841, "teacher_loss": 0.21404683589935303 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.23985639214515686, "learning_rate": 2.1000433714037875e-05, "loss": 0.2428, "step": 4842, "teacher_loss": 0.2430967390537262 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.3295680284500122, "learning_rate": 2.1004770854416653e-05, "loss": 0.1837, "step": 4843, "teacher_loss": 0.16749054193496704 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.38000011444091797, "learning_rate": 2.100910799479543e-05, "loss": 0.3014, "step": 4844, "teacher_loss": 0.2926191985607147 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.2840404808521271, "learning_rate": 2.1013445135174208e-05, "loss": 0.256, "step": 4845, "teacher_loss": 0.2528376877307892 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.5129337906837463, "learning_rate": 2.1017782275552985e-05, "loss": 0.2186, "step": 4846, "teacher_loss": 0.18590568006038666 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.8131666779518127, "learning_rate": 2.1022119415931763e-05, "loss": 0.2982, "step": 4847, "teacher_loss": 0.24097684025764465 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.4696654677391052, "learning_rate": 2.102645655631054e-05, "loss": 0.2471, "step": 4848, "teacher_loss": 0.2224232703447342 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.6193064451217651, "learning_rate": 2.1030793696689318e-05, "loss": 0.266, "step": 4849, "teacher_loss": 0.22679319977760315 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.4176526665687561, "learning_rate": 2.1035130837068096e-05, "loss": 0.2032, "step": 4850, "teacher_loss": 0.17933514714241028 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.9364557266235352, "learning_rate": 2.103946797744687e-05, "loss": 0.408, "step": 4851, "teacher_loss": 0.3493291139602661 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.27834367752075195, "learning_rate": 2.1043805117825648e-05, "loss": 0.2334, "step": 4852, "teacher_loss": 0.22836343944072723 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.2658689022064209, "learning_rate": 2.1048142258204422e-05, "loss": 0.2473, "step": 4853, "teacher_loss": 0.24521687626838684 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.18103188276290894, "learning_rate": 2.10524793985832e-05, "loss": 0.1583, "step": 4854, "teacher_loss": 0.15573669970035553 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.40230584144592285, "learning_rate": 2.1056816538961977e-05, "loss": 0.4206, "step": 4855, "teacher_loss": 0.42267870903015137 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.6293624043464661, "learning_rate": 2.1061153679340755e-05, "loss": 0.2848, "step": 4856, "teacher_loss": 0.24655066430568695 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.5312010049819946, "learning_rate": 2.1065490819719532e-05, "loss": 0.2648, "step": 4857, "teacher_loss": 0.23525011539459229 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.3443912863731384, "learning_rate": 2.106982796009831e-05, "loss": 0.3098, "step": 4858, "teacher_loss": 0.3059839606285095 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.4562992453575134, "learning_rate": 2.1074165100477088e-05, "loss": 0.2453, "step": 4859, "teacher_loss": 0.2218371033668518 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.3486790955066681, "learning_rate": 2.1078502240855862e-05, "loss": 0.1678, "step": 4860, "teacher_loss": 0.14771874248981476 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.5641835927963257, "learning_rate": 2.108283938123464e-05, "loss": 0.3621, "step": 4861, "teacher_loss": 0.33961790800094604 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 1.0885891914367676, "learning_rate": 2.1087176521613417e-05, "loss": 0.3606, "step": 4862, "teacher_loss": 0.2797633707523346 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.8079729080200195, "learning_rate": 2.1091513661992195e-05, "loss": 0.3713, "step": 4863, "teacher_loss": 0.32277625799179077 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.38171976804733276, "learning_rate": 2.109585080237097e-05, "loss": 0.2445, "step": 4864, "teacher_loss": 0.2292403131723404 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.5673741698265076, "learning_rate": 2.1100187942749747e-05, "loss": 0.3102, "step": 4865, "teacher_loss": 0.28162863850593567 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.49253854155540466, "learning_rate": 2.1104525083128524e-05, "loss": 0.2887, "step": 4866, "teacher_loss": 0.2660059332847595 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.2558417320251465, "learning_rate": 2.1108862223507302e-05, "loss": 0.1799, "step": 4867, "teacher_loss": 0.17151561379432678 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.3826592266559601, "learning_rate": 2.111319936388608e-05, "loss": 0.2761, "step": 4868, "teacher_loss": 0.2642658054828644 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.32508817315101624, "learning_rate": 2.1117536504264854e-05, "loss": 0.2909, "step": 4869, "teacher_loss": 0.2871348261833191 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 1.0501303672790527, "learning_rate": 2.112187364464363e-05, "loss": 0.3836, "step": 4870, "teacher_loss": 0.30953431129455566 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.6292088031768799, "learning_rate": 2.112621078502241e-05, "loss": 0.2427, "step": 4871, "teacher_loss": 0.19978384673595428 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.40905478596687317, "learning_rate": 2.1130547925401187e-05, "loss": 0.2007, "step": 4872, "teacher_loss": 0.17753881216049194 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.2770938575267792, "learning_rate": 2.1134885065779964e-05, "loss": 0.2087, "step": 4873, "teacher_loss": 0.20105795562267303 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.3100948929786682, "learning_rate": 2.1139222206158742e-05, "loss": 0.336, "step": 4874, "teacher_loss": 0.33886635303497314 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.49566251039505005, "learning_rate": 2.1143559346537516e-05, "loss": 0.2984, "step": 4875, "teacher_loss": 0.2764957845211029 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.25857824087142944, "learning_rate": 2.1147896486916294e-05, "loss": 0.2375, "step": 4876, "teacher_loss": 0.2351183295249939 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.4528854489326477, "learning_rate": 2.115223362729507e-05, "loss": 0.2461, "step": 4877, "teacher_loss": 0.22313690185546875 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.2965570092201233, "learning_rate": 2.1156570767673846e-05, "loss": 0.171, "step": 4878, "teacher_loss": 0.1570700705051422 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.191180020570755, "learning_rate": 2.1160907908052623e-05, "loss": 0.1832, "step": 4879, "teacher_loss": 0.1823207288980484 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.5405227541923523, "learning_rate": 2.11652450484314e-05, "loss": 0.2765, "step": 4880, "teacher_loss": 0.24716366827487946 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.31240585446357727, "learning_rate": 2.116958218881018e-05, "loss": 0.2318, "step": 4881, "teacher_loss": 0.22281807661056519 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.48004692792892456, "learning_rate": 2.1173919329188956e-05, "loss": 0.2408, "step": 4882, "teacher_loss": 0.21426282823085785 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.1573874056339264, "learning_rate": 2.1178256469567734e-05, "loss": 0.1676, "step": 4883, "teacher_loss": 0.16872447729110718 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.3565481901168823, "learning_rate": 2.118259360994651e-05, "loss": 0.2345, "step": 4884, "teacher_loss": 0.2209150791168213 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.6600135564804077, "learning_rate": 2.118693075032529e-05, "loss": 0.4031, "step": 4885, "teacher_loss": 0.37453049421310425 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.7931019067764282, "learning_rate": 2.119126789070406e-05, "loss": 0.5347, "step": 4886, "teacher_loss": 0.5059648156166077 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.5532708764076233, "learning_rate": 2.1195605031082837e-05, "loss": 0.2624, "step": 4887, "teacher_loss": 0.23012381792068481 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.6887480020523071, "learning_rate": 2.1199942171461615e-05, "loss": 0.2144, "step": 4888, "teacher_loss": 0.16169053316116333 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.24550040066242218, "learning_rate": 2.1204279311840393e-05, "loss": 0.2036, "step": 4889, "teacher_loss": 0.19889873266220093 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.2875514626502991, "learning_rate": 2.120861645221917e-05, "loss": 0.1899, "step": 4890, "teacher_loss": 0.17899571359157562 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.1795966625213623, "learning_rate": 2.1212953592597948e-05, "loss": 0.1958, "step": 4891, "teacher_loss": 0.1976279616355896 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.3074508607387543, "learning_rate": 2.1217290732976726e-05, "loss": 0.2475, "step": 4892, "teacher_loss": 0.24087673425674438 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.46810466051101685, "learning_rate": 2.1221627873355503e-05, "loss": 0.2432, "step": 4893, "teacher_loss": 0.2181912213563919 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.5411608219146729, "learning_rate": 2.122596501373428e-05, "loss": 0.198, "step": 4894, "teacher_loss": 0.15983399748802185 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.5552703142166138, "learning_rate": 2.1230302154113055e-05, "loss": 0.2345, "step": 4895, "teacher_loss": 0.198857843875885 }, { "compression_loss": 0.0, "epoch": 0.88, "label_loss": 0.10408133268356323, "learning_rate": 2.1234639294491833e-05, "loss": 0.1328, "step": 4896, "teacher_loss": 0.13598322868347168 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.5159024000167847, "learning_rate": 2.1238976434870607e-05, "loss": 0.3307, "step": 4897, "teacher_loss": 0.3101428747177124 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.46688953042030334, "learning_rate": 2.1243313575249384e-05, "loss": 0.1939, "step": 4898, "teacher_loss": 0.16351687908172607 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.3184567391872406, "learning_rate": 2.1247650715628162e-05, "loss": 0.2728, "step": 4899, "teacher_loss": 0.2676974833011627 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.5943064093589783, "learning_rate": 2.125198785600694e-05, "loss": 0.299, "step": 4900, "teacher_loss": 0.26621532440185547 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 1.1503369808197021, "learning_rate": 2.1256324996385717e-05, "loss": 0.3046, "step": 4901, "teacher_loss": 0.2106659710407257 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.4666033983230591, "learning_rate": 2.1260662136764495e-05, "loss": 0.2345, "step": 4902, "teacher_loss": 0.20870742201805115 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.9410369992256165, "learning_rate": 2.1264999277143273e-05, "loss": 0.3013, "step": 4903, "teacher_loss": 0.23018693923950195 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.2715277075767517, "learning_rate": 2.1269336417522047e-05, "loss": 0.1836, "step": 4904, "teacher_loss": 0.17387613654136658 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.5608832836151123, "learning_rate": 2.1273673557900824e-05, "loss": 0.2896, "step": 4905, "teacher_loss": 0.2594859004020691 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.365366131067276, "learning_rate": 2.1278010698279602e-05, "loss": 0.285, "step": 4906, "teacher_loss": 0.2760217487812042 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.4786378741264343, "learning_rate": 2.128234783865838e-05, "loss": 0.2506, "step": 4907, "teacher_loss": 0.22523358464241028 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.7722607851028442, "learning_rate": 2.1286684979037154e-05, "loss": 0.309, "step": 4908, "teacher_loss": 0.2575114965438843 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.49076831340789795, "learning_rate": 2.129102211941593e-05, "loss": 0.2741, "step": 4909, "teacher_loss": 0.25000888109207153 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.5835728645324707, "learning_rate": 2.129535925979471e-05, "loss": 0.668, "step": 4910, "teacher_loss": 0.677399754524231 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.20781002938747406, "learning_rate": 2.1299696400173487e-05, "loss": 0.2223, "step": 4911, "teacher_loss": 0.22389043867588043 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.6984086632728577, "learning_rate": 2.1304033540552264e-05, "loss": 0.3971, "step": 4912, "teacher_loss": 0.36366546154022217 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.5680651664733887, "learning_rate": 2.130837068093104e-05, "loss": 0.2798, "step": 4913, "teacher_loss": 0.24780185520648956 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.4330735206604004, "learning_rate": 2.1312707821309816e-05, "loss": 0.3389, "step": 4914, "teacher_loss": 0.328416109085083 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.6274861097335815, "learning_rate": 2.1317044961688594e-05, "loss": 0.2524, "step": 4915, "teacher_loss": 0.21071599423885345 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.3112795650959015, "learning_rate": 2.132138210206737e-05, "loss": 0.2003, "step": 4916, "teacher_loss": 0.1880241483449936 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.24191522598266602, "learning_rate": 2.132571924244615e-05, "loss": 0.234, "step": 4917, "teacher_loss": 0.23306824266910553 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.18727698922157288, "learning_rate": 2.1330056382824927e-05, "loss": 0.2634, "step": 4918, "teacher_loss": 0.27183055877685547 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.6641616225242615, "learning_rate": 2.13343935232037e-05, "loss": 0.2917, "step": 4919, "teacher_loss": 0.2503282427787781 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.774112343788147, "learning_rate": 2.133873066358248e-05, "loss": 0.2973, "step": 4920, "teacher_loss": 0.24432498216629028 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.850703775882721, "learning_rate": 2.1343067803961256e-05, "loss": 0.353, "step": 4921, "teacher_loss": 0.2977176904678345 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.5981116890907288, "learning_rate": 2.134740494434003e-05, "loss": 0.3596, "step": 4922, "teacher_loss": 0.33310291171073914 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.3872612714767456, "learning_rate": 2.1351742084718808e-05, "loss": 0.2092, "step": 4923, "teacher_loss": 0.18944603204727173 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.728947103023529, "learning_rate": 2.1356079225097586e-05, "loss": 0.3065, "step": 4924, "teacher_loss": 0.2596026062965393 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.8113147616386414, "learning_rate": 2.1360416365476363e-05, "loss": 0.3307, "step": 4925, "teacher_loss": 0.27726688981056213 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.44602975249290466, "learning_rate": 2.136475350585514e-05, "loss": 0.3185, "step": 4926, "teacher_loss": 0.30436283349990845 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.7215862274169922, "learning_rate": 2.136909064623392e-05, "loss": 0.3181, "step": 4927, "teacher_loss": 0.27322137355804443 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.4445520043373108, "learning_rate": 2.1373427786612696e-05, "loss": 0.2003, "step": 4928, "teacher_loss": 0.17318299412727356 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.4289535582065582, "learning_rate": 2.137776492699147e-05, "loss": 0.2952, "step": 4929, "teacher_loss": 0.28028473258018494 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.7420486211776733, "learning_rate": 2.1382102067370248e-05, "loss": 0.2525, "step": 4930, "teacher_loss": 0.19805431365966797 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.45578306913375854, "learning_rate": 2.1386439207749022e-05, "loss": 0.3078, "step": 4931, "teacher_loss": 0.2913256883621216 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.28732916712760925, "learning_rate": 2.13907763481278e-05, "loss": 0.2221, "step": 4932, "teacher_loss": 0.21480277180671692 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.19382552802562714, "learning_rate": 2.1395113488506578e-05, "loss": 0.1668, "step": 4933, "teacher_loss": 0.16377496719360352 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.3187437057495117, "learning_rate": 2.1399450628885355e-05, "loss": 0.1955, "step": 4934, "teacher_loss": 0.18182289600372314 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.4643344283103943, "learning_rate": 2.1403787769264133e-05, "loss": 0.2693, "step": 4935, "teacher_loss": 0.24765314161777496 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.5880998969078064, "learning_rate": 2.140812490964291e-05, "loss": 0.2622, "step": 4936, "teacher_loss": 0.2259451448917389 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.2634769082069397, "learning_rate": 2.1412462050021688e-05, "loss": 0.1845, "step": 4937, "teacher_loss": 0.17569279670715332 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.6411460638046265, "learning_rate": 2.1416799190400466e-05, "loss": 0.2442, "step": 4938, "teacher_loss": 0.20011621713638306 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.7642667889595032, "learning_rate": 2.142113633077924e-05, "loss": 0.4246, "step": 4939, "teacher_loss": 0.3868919312953949 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.359567254781723, "learning_rate": 2.1425473471158017e-05, "loss": 0.2789, "step": 4940, "teacher_loss": 0.2699160575866699 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.7302291393280029, "learning_rate": 2.1429810611536792e-05, "loss": 0.2627, "step": 4941, "teacher_loss": 0.2107389122247696 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.36183881759643555, "learning_rate": 2.143414775191557e-05, "loss": 0.2123, "step": 4942, "teacher_loss": 0.19568568468093872 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.5417706966400146, "learning_rate": 2.1438484892294347e-05, "loss": 0.2218, "step": 4943, "teacher_loss": 0.18627247214317322 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.4372347891330719, "learning_rate": 2.1442822032673125e-05, "loss": 0.3483, "step": 4944, "teacher_loss": 0.33838292956352234 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.3364478647708893, "learning_rate": 2.1447159173051902e-05, "loss": 0.2099, "step": 4945, "teacher_loss": 0.19580666720867157 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.2590668201446533, "learning_rate": 2.145149631343068e-05, "loss": 0.2195, "step": 4946, "teacher_loss": 0.21508730947971344 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.44933491945266724, "learning_rate": 2.1455833453809457e-05, "loss": 0.3378, "step": 4947, "teacher_loss": 0.32538288831710815 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.49010169506073, "learning_rate": 2.1460170594188232e-05, "loss": 0.3009, "step": 4948, "teacher_loss": 0.2798406183719635 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.6871552467346191, "learning_rate": 2.146450773456701e-05, "loss": 0.3553, "step": 4949, "teacher_loss": 0.3184507489204407 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.22841525077819824, "learning_rate": 2.1468844874945787e-05, "loss": 0.1431, "step": 4950, "teacher_loss": 0.13367322087287903 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.5973429083824158, "learning_rate": 2.147318201532456e-05, "loss": 0.2642, "step": 4951, "teacher_loss": 0.2272353619337082 }, { "compression_loss": 0.0, "epoch": 0.89, "label_loss": 0.32793813943862915, "learning_rate": 2.147751915570334e-05, "loss": 0.2603, "step": 4952, "teacher_loss": 0.2528391182422638 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.4855886697769165, "learning_rate": 2.1481856296082116e-05, "loss": 0.1927, "step": 4953, "teacher_loss": 0.16010509431362152 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.8270766735076904, "learning_rate": 2.1486193436460894e-05, "loss": 0.4078, "step": 4954, "teacher_loss": 0.3612019121646881 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.5824657678604126, "learning_rate": 2.149053057683967e-05, "loss": 0.3259, "step": 4955, "teacher_loss": 0.2974180579185486 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.6485385894775391, "learning_rate": 2.149486771721845e-05, "loss": 0.28, "step": 4956, "teacher_loss": 0.23900456726551056 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.5515516996383667, "learning_rate": 2.1499204857597224e-05, "loss": 0.2734, "step": 4957, "teacher_loss": 0.2424619495868683 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.47967639565467834, "learning_rate": 2.1503541997976e-05, "loss": 0.3044, "step": 4958, "teacher_loss": 0.28497135639190674 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.490291029214859, "learning_rate": 2.150787913835478e-05, "loss": 0.227, "step": 4959, "teacher_loss": 0.19769850373268127 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.523702085018158, "learning_rate": 2.1512216278733556e-05, "loss": 0.295, "step": 4960, "teacher_loss": 0.26961225271224976 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.8569098711013794, "learning_rate": 2.1516553419112334e-05, "loss": 0.5047, "step": 4961, "teacher_loss": 0.465536892414093 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.48284712433815, "learning_rate": 2.1520890559491108e-05, "loss": 0.2579, "step": 4962, "teacher_loss": 0.23287333548069 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.35147833824157715, "learning_rate": 2.1525227699869886e-05, "loss": 0.2183, "step": 4963, "teacher_loss": 0.20348471403121948 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.534899115562439, "learning_rate": 2.1529564840248663e-05, "loss": 0.246, "step": 4964, "teacher_loss": 0.21386224031448364 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.8693801760673523, "learning_rate": 2.153390198062744e-05, "loss": 0.2898, "step": 4965, "teacher_loss": 0.2253713309764862 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.8280802965164185, "learning_rate": 2.1538239121006215e-05, "loss": 0.2671, "step": 4966, "teacher_loss": 0.20475471019744873 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.5361369252204895, "learning_rate": 2.1542576261384993e-05, "loss": 0.3386, "step": 4967, "teacher_loss": 0.3166942596435547 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.2746128439903259, "learning_rate": 2.154691340176377e-05, "loss": 0.2124, "step": 4968, "teacher_loss": 0.20548345148563385 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.8689091801643372, "learning_rate": 2.1551250542142548e-05, "loss": 0.3106, "step": 4969, "teacher_loss": 0.24857546389102936 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.23524591326713562, "learning_rate": 2.1555587682521326e-05, "loss": 0.2069, "step": 4970, "teacher_loss": 0.20377680659294128 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.5544571280479431, "learning_rate": 2.1559924822900103e-05, "loss": 0.2604, "step": 4971, "teacher_loss": 0.22770847380161285 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.4059109389781952, "learning_rate": 2.156426196327888e-05, "loss": 0.2912, "step": 4972, "teacher_loss": 0.27842846512794495 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.38329017162323, "learning_rate": 2.1568599103657655e-05, "loss": 0.2156, "step": 4973, "teacher_loss": 0.19700732827186584 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.3662585914134979, "learning_rate": 2.1572936244036433e-05, "loss": 0.1804, "step": 4974, "teacher_loss": 0.15976077318191528 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.06622756272554398, "learning_rate": 2.1577273384415207e-05, "loss": 0.1589, "step": 4975, "teacher_loss": 0.16914281249046326 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.361545205116272, "learning_rate": 2.1581610524793985e-05, "loss": 0.2637, "step": 4976, "teacher_loss": 0.2528447210788727 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.34256407618522644, "learning_rate": 2.1585947665172762e-05, "loss": 0.2355, "step": 4977, "teacher_loss": 0.2235802412033081 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.4668552875518799, "learning_rate": 2.159028480555154e-05, "loss": 0.162, "step": 4978, "teacher_loss": 0.12809503078460693 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.46656686067581177, "learning_rate": 2.1594621945930318e-05, "loss": 0.2704, "step": 4979, "teacher_loss": 0.24855628609657288 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.2090367078781128, "learning_rate": 2.1598959086309095e-05, "loss": 0.1917, "step": 4980, "teacher_loss": 0.1898198127746582 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.3238547146320343, "learning_rate": 2.1603296226687873e-05, "loss": 0.3156, "step": 4981, "teacher_loss": 0.314701110124588 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.6774986982345581, "learning_rate": 2.160763336706665e-05, "loss": 0.3289, "step": 4982, "teacher_loss": 0.2901648283004761 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.8897311687469482, "learning_rate": 2.1611970507445425e-05, "loss": 0.3759, "step": 4983, "teacher_loss": 0.31875723600387573 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.9408584833145142, "learning_rate": 2.16163076478242e-05, "loss": 0.416, "step": 4984, "teacher_loss": 0.35763445496559143 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.33588656783103943, "learning_rate": 2.1620644788202977e-05, "loss": 0.2356, "step": 4985, "teacher_loss": 0.224424809217453 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.2043946385383606, "learning_rate": 2.1624981928581754e-05, "loss": 0.1916, "step": 4986, "teacher_loss": 0.190158873796463 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.6215960383415222, "learning_rate": 2.1629319068960532e-05, "loss": 0.2927, "step": 4987, "teacher_loss": 0.25620540976524353 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.2454887330532074, "learning_rate": 2.163365620933931e-05, "loss": 0.2377, "step": 4988, "teacher_loss": 0.23683345317840576 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.37675097584724426, "learning_rate": 2.1637993349718087e-05, "loss": 0.3096, "step": 4989, "teacher_loss": 0.3021875023841858 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.930221676826477, "learning_rate": 2.1642330490096865e-05, "loss": 0.7065, "step": 4990, "teacher_loss": 0.681634247303009 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.2444666028022766, "learning_rate": 2.1646667630475642e-05, "loss": 0.2384, "step": 4991, "teacher_loss": 0.23774632811546326 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.5182797908782959, "learning_rate": 2.1651004770854417e-05, "loss": 0.2416, "step": 4992, "teacher_loss": 0.2108837515115738 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.25727009773254395, "learning_rate": 2.1655341911233194e-05, "loss": 0.1939, "step": 4993, "teacher_loss": 0.18687215447425842 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.4268108010292053, "learning_rate": 2.1659679051611972e-05, "loss": 0.1865, "step": 4994, "teacher_loss": 0.15983860194683075 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.7278549671173096, "learning_rate": 2.1664016191990746e-05, "loss": 0.3292, "step": 4995, "teacher_loss": 0.28487664461135864 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.6536684036254883, "learning_rate": 2.1668353332369524e-05, "loss": 0.2593, "step": 4996, "teacher_loss": 0.215532124042511 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.2759597599506378, "learning_rate": 2.16726904727483e-05, "loss": 0.1823, "step": 4997, "teacher_loss": 0.17187872529029846 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.4096769690513611, "learning_rate": 2.167702761312708e-05, "loss": 0.2555, "step": 4998, "teacher_loss": 0.23835709691047668 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.48129016160964966, "learning_rate": 2.1681364753505857e-05, "loss": 0.2616, "step": 4999, "teacher_loss": 0.23719972372055054 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.5433094501495361, "learning_rate": 2.1685701893884634e-05, "loss": 0.2884, "step": 5000, "teacher_loss": 0.2601229250431061 }, { "epoch": 0.9, "eval_exact_match": 79.94323557237465, "eval_f1": 87.38469094119894, "step": 5000 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.3355875015258789, "learning_rate": 2.169003903426341e-05, "loss": 0.1989, "step": 5001, "teacher_loss": 0.18374839425086975 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.4448932409286499, "learning_rate": 2.1694376174642186e-05, "loss": 0.2127, "step": 5002, "teacher_loss": 0.18689042329788208 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.40050047636032104, "learning_rate": 2.1698713315020964e-05, "loss": 0.2233, "step": 5003, "teacher_loss": 0.2036624550819397 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.347493052482605, "learning_rate": 2.170305045539974e-05, "loss": 0.2596, "step": 5004, "teacher_loss": 0.2498435527086258 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.39709848165512085, "learning_rate": 2.170738759577852e-05, "loss": 0.286, "step": 5005, "teacher_loss": 0.27360135316848755 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.7585475444793701, "learning_rate": 2.1711724736157293e-05, "loss": 0.3192, "step": 5006, "teacher_loss": 0.2704097330570221 }, { "compression_loss": 0.0, "epoch": 0.9, "label_loss": 0.5988269448280334, "learning_rate": 2.171606187653607e-05, "loss": 0.2293, "step": 5007, "teacher_loss": 0.1881895363330841 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.602444589138031, "learning_rate": 2.172039901691485e-05, "loss": 0.2354, "step": 5008, "teacher_loss": 0.19463714957237244 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.32646477222442627, "learning_rate": 2.1724736157293626e-05, "loss": 0.2405, "step": 5009, "teacher_loss": 0.23095834255218506 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.2694118916988373, "learning_rate": 2.17290732976724e-05, "loss": 0.192, "step": 5010, "teacher_loss": 0.18341705203056335 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.8982809782028198, "learning_rate": 2.1733410438051178e-05, "loss": 0.2493, "step": 5011, "teacher_loss": 0.1772342324256897 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.3746625781059265, "learning_rate": 2.1737747578429955e-05, "loss": 0.243, "step": 5012, "teacher_loss": 0.2283729612827301 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.2214689403772354, "learning_rate": 2.1742084718808733e-05, "loss": 0.2358, "step": 5013, "teacher_loss": 0.23736536502838135 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.2346813678741455, "learning_rate": 2.174642185918751e-05, "loss": 0.1941, "step": 5014, "teacher_loss": 0.18957194685935974 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.5537986755371094, "learning_rate": 2.175075899956629e-05, "loss": 0.2514, "step": 5015, "teacher_loss": 0.21782582998275757 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.23473691940307617, "learning_rate": 2.1755096139945066e-05, "loss": 0.1776, "step": 5016, "teacher_loss": 0.1712019145488739 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.8613081574440002, "learning_rate": 2.175943328032384e-05, "loss": 0.3249, "step": 5017, "teacher_loss": 0.2652929723262787 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.3878059685230255, "learning_rate": 2.1763770420702618e-05, "loss": 0.2736, "step": 5018, "teacher_loss": 0.26094740629196167 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.26398012042045593, "learning_rate": 2.1768107561081392e-05, "loss": 0.2055, "step": 5019, "teacher_loss": 0.19902634620666504 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.4983898997306824, "learning_rate": 2.177244470146017e-05, "loss": 0.2515, "step": 5020, "teacher_loss": 0.22410613298416138 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.36464983224868774, "learning_rate": 2.1776781841838947e-05, "loss": 0.2438, "step": 5021, "teacher_loss": 0.23041774332523346 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.3985564112663269, "learning_rate": 2.1781118982217725e-05, "loss": 0.2978, "step": 5022, "teacher_loss": 0.2865598201751709 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.33505991101264954, "learning_rate": 2.1785456122596503e-05, "loss": 0.2024, "step": 5023, "teacher_loss": 0.18762236833572388 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.9182227253913879, "learning_rate": 2.178979326297528e-05, "loss": 0.3878, "step": 5024, "teacher_loss": 0.3289141356945038 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.6506767272949219, "learning_rate": 2.1794130403354058e-05, "loss": 0.2961, "step": 5025, "teacher_loss": 0.25674110651016235 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.8632342219352722, "learning_rate": 2.1798467543732835e-05, "loss": 0.3071, "step": 5026, "teacher_loss": 0.24528968334197998 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.7953322529792786, "learning_rate": 2.180280468411161e-05, "loss": 0.3426, "step": 5027, "teacher_loss": 0.2922816276550293 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.9344212412834167, "learning_rate": 2.1807141824490384e-05, "loss": 0.5585, "step": 5028, "teacher_loss": 0.5166952610015869 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.5721264481544495, "learning_rate": 2.181147896486916e-05, "loss": 0.2318, "step": 5029, "teacher_loss": 0.1940159946680069 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.6489219665527344, "learning_rate": 2.181581610524794e-05, "loss": 0.2724, "step": 5030, "teacher_loss": 0.23057657480239868 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.26539158821105957, "learning_rate": 2.1820153245626717e-05, "loss": 0.2918, "step": 5031, "teacher_loss": 0.29478883743286133 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.557948648929596, "learning_rate": 2.1824490386005494e-05, "loss": 0.2394, "step": 5032, "teacher_loss": 0.20399603247642517 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.433555543422699, "learning_rate": 2.1828827526384272e-05, "loss": 0.2657, "step": 5033, "teacher_loss": 0.24700413644313812 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.42045560479164124, "learning_rate": 2.183316466676305e-05, "loss": 0.3181, "step": 5034, "teacher_loss": 0.30669713020324707 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.5812956094741821, "learning_rate": 2.1837501807141827e-05, "loss": 0.2785, "step": 5035, "teacher_loss": 0.24484045803546906 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.27369487285614014, "learning_rate": 2.18418389475206e-05, "loss": 0.2302, "step": 5036, "teacher_loss": 0.22542057931423187 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.43715769052505493, "learning_rate": 2.184617608789938e-05, "loss": 0.2805, "step": 5037, "teacher_loss": 0.26312124729156494 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.34317851066589355, "learning_rate": 2.1850513228278157e-05, "loss": 0.1996, "step": 5038, "teacher_loss": 0.1836351901292801 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.2169075906276703, "learning_rate": 2.185485036865693e-05, "loss": 0.2117, "step": 5039, "teacher_loss": 0.21115203201770782 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.2305580973625183, "learning_rate": 2.185918750903571e-05, "loss": 0.1531, "step": 5040, "teacher_loss": 0.144525408744812 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.36952972412109375, "learning_rate": 2.1863524649414486e-05, "loss": 0.2234, "step": 5041, "teacher_loss": 0.2071918547153473 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.42547714710235596, "learning_rate": 2.1867861789793264e-05, "loss": 0.2508, "step": 5042, "teacher_loss": 0.23133596777915955 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.7025610208511353, "learning_rate": 2.187219893017204e-05, "loss": 0.2835, "step": 5043, "teacher_loss": 0.23689395189285278 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.5379042029380798, "learning_rate": 2.187653607055082e-05, "loss": 0.4063, "step": 5044, "teacher_loss": 0.3916476368904114 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.5258693695068359, "learning_rate": 2.1880873210929593e-05, "loss": 0.2331, "step": 5045, "teacher_loss": 0.2005929797887802 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.3884434103965759, "learning_rate": 2.188521035130837e-05, "loss": 0.2488, "step": 5046, "teacher_loss": 0.23325151205062866 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.8033318519592285, "learning_rate": 2.188954749168715e-05, "loss": 0.268, "step": 5047, "teacher_loss": 0.20854748785495758 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.16746020317077637, "learning_rate": 2.1893884632065926e-05, "loss": 0.1625, "step": 5048, "teacher_loss": 0.16191929578781128 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.6545905470848083, "learning_rate": 2.18982217724447e-05, "loss": 0.4166, "step": 5049, "teacher_loss": 0.39011919498443604 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.5566633343696594, "learning_rate": 2.1902558912823478e-05, "loss": 0.3195, "step": 5050, "teacher_loss": 0.29318100214004517 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.41910529136657715, "learning_rate": 2.1906896053202256e-05, "loss": 0.2, "step": 5051, "teacher_loss": 0.17566964030265808 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.6011900901794434, "learning_rate": 2.1911233193581033e-05, "loss": 0.1756, "step": 5052, "teacher_loss": 0.12832866609096527 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.7882957458496094, "learning_rate": 2.191557033395981e-05, "loss": 0.3264, "step": 5053, "teacher_loss": 0.2751200795173645 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.6054220795631409, "learning_rate": 2.1919907474338585e-05, "loss": 0.3809, "step": 5054, "teacher_loss": 0.3559808135032654 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.22506718337535858, "learning_rate": 2.1924244614717363e-05, "loss": 0.2145, "step": 5055, "teacher_loss": 0.2133278250694275 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.4036915600299835, "learning_rate": 2.192858175509614e-05, "loss": 0.2935, "step": 5056, "teacher_loss": 0.2812880873680115 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.2568385899066925, "learning_rate": 2.1932918895474918e-05, "loss": 0.2323, "step": 5057, "teacher_loss": 0.2295297086238861 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.7053796052932739, "learning_rate": 2.1937256035853696e-05, "loss": 0.3442, "step": 5058, "teacher_loss": 0.3040328919887543 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.5729056000709534, "learning_rate": 2.1941593176232473e-05, "loss": 0.2077, "step": 5059, "teacher_loss": 0.16707561910152435 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.2908748388290405, "learning_rate": 2.1945930316611247e-05, "loss": 0.1946, "step": 5060, "teacher_loss": 0.18386542797088623 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.4173637628555298, "learning_rate": 2.1950267456990025e-05, "loss": 0.246, "step": 5061, "teacher_loss": 0.2269698977470398 }, { "compression_loss": 0.0, "epoch": 0.91, "label_loss": 0.4610925614833832, "learning_rate": 2.1954604597368803e-05, "loss": 0.2356, "step": 5062, "teacher_loss": 0.21059909462928772 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.36663299798965454, "learning_rate": 2.1958941737747577e-05, "loss": 0.277, "step": 5063, "teacher_loss": 0.26699942350387573 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.7183891534805298, "learning_rate": 2.1963278878126355e-05, "loss": 0.3813, "step": 5064, "teacher_loss": 0.34384071826934814 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.9515157341957092, "learning_rate": 2.1967616018505132e-05, "loss": 0.3374, "step": 5065, "teacher_loss": 0.2691618800163269 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.4123256206512451, "learning_rate": 2.197195315888391e-05, "loss": 0.2926, "step": 5066, "teacher_loss": 0.2793012261390686 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.38430964946746826, "learning_rate": 2.1976290299262687e-05, "loss": 0.2285, "step": 5067, "teacher_loss": 0.21119289100170135 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.3126375079154968, "learning_rate": 2.1980627439641465e-05, "loss": 0.1925, "step": 5068, "teacher_loss": 0.17915701866149902 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.39222753047943115, "learning_rate": 2.1984964580020243e-05, "loss": 0.2419, "step": 5069, "teacher_loss": 0.22518888115882874 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.49275559186935425, "learning_rate": 2.198930172039902e-05, "loss": 0.278, "step": 5070, "teacher_loss": 0.2541758716106415 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.2644200921058655, "learning_rate": 2.199363886077779e-05, "loss": 0.1939, "step": 5071, "teacher_loss": 0.18608510494232178 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.4927518367767334, "learning_rate": 2.199797600115657e-05, "loss": 0.2373, "step": 5072, "teacher_loss": 0.20892333984375 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.3768884539604187, "learning_rate": 2.2002313141535346e-05, "loss": 0.2388, "step": 5073, "teacher_loss": 0.22350391745567322 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.430931031703949, "learning_rate": 2.2006650281914124e-05, "loss": 0.2266, "step": 5074, "teacher_loss": 0.20389777421951294 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.5921732783317566, "learning_rate": 2.20109874222929e-05, "loss": 0.2323, "step": 5075, "teacher_loss": 0.19230225682258606 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 1.0756014585494995, "learning_rate": 2.201532456267168e-05, "loss": 0.5575, "step": 5076, "teacher_loss": 0.49994099140167236 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.6048029661178589, "learning_rate": 2.2019661703050457e-05, "loss": 0.2466, "step": 5077, "teacher_loss": 0.20683380961418152 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.8456159830093384, "learning_rate": 2.2023998843429234e-05, "loss": 0.7344, "step": 5078, "teacher_loss": 0.7220935225486755 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.32778510451316833, "learning_rate": 2.2028335983808012e-05, "loss": 0.2019, "step": 5079, "teacher_loss": 0.18796800076961517 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.5015953779220581, "learning_rate": 2.2032673124186786e-05, "loss": 0.2505, "step": 5080, "teacher_loss": 0.22256334125995636 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.49273157119750977, "learning_rate": 2.2037010264565564e-05, "loss": 0.305, "step": 5081, "teacher_loss": 0.2841217517852783 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.40552520751953125, "learning_rate": 2.2041347404944338e-05, "loss": 0.3022, "step": 5082, "teacher_loss": 0.29073381423950195 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.6219925880432129, "learning_rate": 2.2045684545323116e-05, "loss": 0.3531, "step": 5083, "teacher_loss": 0.32317888736724854 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.44387075304985046, "learning_rate": 2.2050021685701893e-05, "loss": 0.3448, "step": 5084, "teacher_loss": 0.33377572894096375 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.34271883964538574, "learning_rate": 2.205435882608067e-05, "loss": 0.1735, "step": 5085, "teacher_loss": 0.15472745895385742 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.8497046232223511, "learning_rate": 2.205869596645945e-05, "loss": 0.2463, "step": 5086, "teacher_loss": 0.17930924892425537 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.22965535521507263, "learning_rate": 2.2063033106838226e-05, "loss": 0.2465, "step": 5087, "teacher_loss": 0.24835895001888275 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.6426375508308411, "learning_rate": 2.2067370247217004e-05, "loss": 0.3705, "step": 5088, "teacher_loss": 0.34030839800834656 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.46825292706489563, "learning_rate": 2.2071707387595778e-05, "loss": 0.2197, "step": 5089, "teacher_loss": 0.19205589592456818 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.4416676163673401, "learning_rate": 2.2076044527974556e-05, "loss": 0.2182, "step": 5090, "teacher_loss": 0.19341400265693665 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.3583717346191406, "learning_rate": 2.2080381668353333e-05, "loss": 0.2611, "step": 5091, "teacher_loss": 0.2503451108932495 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.4295124411582947, "learning_rate": 2.208471880873211e-05, "loss": 0.2113, "step": 5092, "teacher_loss": 0.18707753717899323 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.6649757623672485, "learning_rate": 2.2089055949110885e-05, "loss": 0.2709, "step": 5093, "teacher_loss": 0.2270745038986206 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.07297470420598984, "learning_rate": 2.2093393089489663e-05, "loss": 0.167, "step": 5094, "teacher_loss": 0.1774577796459198 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.3141477108001709, "learning_rate": 2.209773022986844e-05, "loss": 0.2197, "step": 5095, "teacher_loss": 0.20923012495040894 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.3098946511745453, "learning_rate": 2.2102067370247218e-05, "loss": 0.2063, "step": 5096, "teacher_loss": 0.19479797780513763 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.3089548647403717, "learning_rate": 2.2106404510625996e-05, "loss": 0.2663, "step": 5097, "teacher_loss": 0.2615513205528259 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.22667396068572998, "learning_rate": 2.211074165100477e-05, "loss": 0.2841, "step": 5098, "teacher_loss": 0.2905122637748718 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.36995601654052734, "learning_rate": 2.2115078791383548e-05, "loss": 0.3049, "step": 5099, "teacher_loss": 0.2977074086666107 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.5350288152694702, "learning_rate": 2.2119415931762325e-05, "loss": 0.248, "step": 5100, "teacher_loss": 0.21605652570724487 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.6179404854774475, "learning_rate": 2.2123753072141103e-05, "loss": 0.3451, "step": 5101, "teacher_loss": 0.3148387372493744 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.29611727595329285, "learning_rate": 2.212809021251988e-05, "loss": 0.2341, "step": 5102, "teacher_loss": 0.22723616659641266 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.2847411036491394, "learning_rate": 2.2132427352898658e-05, "loss": 0.2304, "step": 5103, "teacher_loss": 0.22436955571174622 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.15685270726680756, "learning_rate": 2.2136764493277432e-05, "loss": 0.1656, "step": 5104, "teacher_loss": 0.16652518510818481 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.23992204666137695, "learning_rate": 2.214110163365621e-05, "loss": 0.2268, "step": 5105, "teacher_loss": 0.22533570230007172 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.3588973581790924, "learning_rate": 2.2145438774034988e-05, "loss": 0.2305, "step": 5106, "teacher_loss": 0.21623027324676514 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.17951223254203796, "learning_rate": 2.2149775914413762e-05, "loss": 0.1833, "step": 5107, "teacher_loss": 0.18373596668243408 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.6676278710365295, "learning_rate": 2.215411305479254e-05, "loss": 0.2464, "step": 5108, "teacher_loss": 0.19961433112621307 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.3264515995979309, "learning_rate": 2.2158450195171317e-05, "loss": 0.2459, "step": 5109, "teacher_loss": 0.2369847148656845 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.9933271408081055, "learning_rate": 2.2162787335550095e-05, "loss": 0.2519, "step": 5110, "teacher_loss": 0.16952507197856903 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 1.8078665733337402, "learning_rate": 2.2167124475928872e-05, "loss": 0.4692, "step": 5111, "teacher_loss": 0.3204426169395447 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.40188518166542053, "learning_rate": 2.217146161630765e-05, "loss": 0.266, "step": 5112, "teacher_loss": 0.25089964270591736 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.44755205512046814, "learning_rate": 2.2175798756686428e-05, "loss": 0.2854, "step": 5113, "teacher_loss": 0.26737403869628906 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.5052809715270996, "learning_rate": 2.2180135897065205e-05, "loss": 0.2689, "step": 5114, "teacher_loss": 0.24268098175525665 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.9582458734512329, "learning_rate": 2.218447303744398e-05, "loss": 0.2949, "step": 5115, "teacher_loss": 0.22121265530586243 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.3993985652923584, "learning_rate": 2.2188810177822754e-05, "loss": 0.1597, "step": 5116, "teacher_loss": 0.13308513164520264 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.17697572708129883, "learning_rate": 2.219314731820153e-05, "loss": 0.2512, "step": 5117, "teacher_loss": 0.2594691812992096 }, { "compression_loss": 0.0, "epoch": 0.92, "label_loss": 0.18234822154045105, "learning_rate": 2.219748445858031e-05, "loss": 0.2259, "step": 5118, "teacher_loss": 0.2307826280593872 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.3989626467227936, "learning_rate": 2.2201821598959086e-05, "loss": 0.2394, "step": 5119, "teacher_loss": 0.22162196040153503 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.5394414663314819, "learning_rate": 2.2206158739337864e-05, "loss": 0.3933, "step": 5120, "teacher_loss": 0.37701237201690674 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.2066372036933899, "learning_rate": 2.2210495879716642e-05, "loss": 0.3219, "step": 5121, "teacher_loss": 0.3346531093120575 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.42274904251098633, "learning_rate": 2.221483302009542e-05, "loss": 0.2814, "step": 5122, "teacher_loss": 0.26568275690078735 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.23372864723205566, "learning_rate": 2.2219170160474197e-05, "loss": 0.177, "step": 5123, "teacher_loss": 0.1706833392381668 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.45319265127182007, "learning_rate": 2.222350730085297e-05, "loss": 0.2611, "step": 5124, "teacher_loss": 0.23978909850120544 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.5599527359008789, "learning_rate": 2.222784444123175e-05, "loss": 0.2652, "step": 5125, "teacher_loss": 0.23246073722839355 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.4552772045135498, "learning_rate": 2.2232181581610523e-05, "loss": 0.2558, "step": 5126, "teacher_loss": 0.2335938811302185 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.3029862642288208, "learning_rate": 2.22365187219893e-05, "loss": 0.2063, "step": 5127, "teacher_loss": 0.19554370641708374 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.31261229515075684, "learning_rate": 2.224085586236808e-05, "loss": 0.2244, "step": 5128, "teacher_loss": 0.214548259973526 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.6989035606384277, "learning_rate": 2.2245193002746856e-05, "loss": 0.2245, "step": 5129, "teacher_loss": 0.17179185152053833 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.4375190734863281, "learning_rate": 2.2249530143125634e-05, "loss": 0.3059, "step": 5130, "teacher_loss": 0.29128098487854004 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.9181143045425415, "learning_rate": 2.225386728350441e-05, "loss": 0.2929, "step": 5131, "teacher_loss": 0.22344090044498444 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.444706529378891, "learning_rate": 2.225820442388319e-05, "loss": 0.3929, "step": 5132, "teacher_loss": 0.3871840238571167 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.19605103135108948, "learning_rate": 2.2262541564261963e-05, "loss": 0.2539, "step": 5133, "teacher_loss": 0.26034265756607056 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.8624093532562256, "learning_rate": 2.226687870464074e-05, "loss": 0.2562, "step": 5134, "teacher_loss": 0.18885639309883118 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.34122729301452637, "learning_rate": 2.2271215845019518e-05, "loss": 0.2629, "step": 5135, "teacher_loss": 0.25414156913757324 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.6111626625061035, "learning_rate": 2.2275552985398296e-05, "loss": 0.245, "step": 5136, "teacher_loss": 0.20434913039207458 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.2054591178894043, "learning_rate": 2.227989012577707e-05, "loss": 0.226, "step": 5137, "teacher_loss": 0.2283117175102234 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.20084145665168762, "learning_rate": 2.2284227266155848e-05, "loss": 0.1909, "step": 5138, "teacher_loss": 0.18979953229427338 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.33382779359817505, "learning_rate": 2.2288564406534625e-05, "loss": 0.2195, "step": 5139, "teacher_loss": 0.20679888129234314 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 1.0286345481872559, "learning_rate": 2.2292901546913403e-05, "loss": 0.3946, "step": 5140, "teacher_loss": 0.3241494297981262 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.5276502370834351, "learning_rate": 2.229723868729218e-05, "loss": 0.3214, "step": 5141, "teacher_loss": 0.29852110147476196 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.7595165967941284, "learning_rate": 2.2301575827670955e-05, "loss": 0.324, "step": 5142, "teacher_loss": 0.2755633592605591 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.2286558896303177, "learning_rate": 2.2305912968049732e-05, "loss": 0.2166, "step": 5143, "teacher_loss": 0.21522654592990875 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.6739788055419922, "learning_rate": 2.231025010842851e-05, "loss": 0.2739, "step": 5144, "teacher_loss": 0.2294815331697464 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.14675763249397278, "learning_rate": 2.2314587248807288e-05, "loss": 0.1761, "step": 5145, "teacher_loss": 0.17932449281215668 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.48987025022506714, "learning_rate": 2.2318924389186065e-05, "loss": 0.2334, "step": 5146, "teacher_loss": 0.20487241446971893 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.21976234018802643, "learning_rate": 2.232326152956484e-05, "loss": 0.2227, "step": 5147, "teacher_loss": 0.22299061715602875 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.7391775846481323, "learning_rate": 2.2327598669943617e-05, "loss": 0.3247, "step": 5148, "teacher_loss": 0.27859389781951904 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.3850157856941223, "learning_rate": 2.2331935810322395e-05, "loss": 0.2686, "step": 5149, "teacher_loss": 0.2556228041648865 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.3147790729999542, "learning_rate": 2.2336272950701172e-05, "loss": 0.1748, "step": 5150, "teacher_loss": 0.15925270318984985 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.5497558116912842, "learning_rate": 2.2340610091079947e-05, "loss": 0.3476, "step": 5151, "teacher_loss": 0.3251686096191406 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.4033011794090271, "learning_rate": 2.2344947231458724e-05, "loss": 0.2875, "step": 5152, "teacher_loss": 0.27466481924057007 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.19783766567707062, "learning_rate": 2.2349284371837502e-05, "loss": 0.2125, "step": 5153, "teacher_loss": 0.21415627002716064 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.23227161169052124, "learning_rate": 2.235362151221628e-05, "loss": 0.1803, "step": 5154, "teacher_loss": 0.17449629306793213 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.5798959732055664, "learning_rate": 2.2357958652595057e-05, "loss": 0.2762, "step": 5155, "teacher_loss": 0.24248716235160828 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 1.0399142503738403, "learning_rate": 2.2362295792973835e-05, "loss": 0.3185, "step": 5156, "teacher_loss": 0.23838664591312408 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.48925402760505676, "learning_rate": 2.2366632933352612e-05, "loss": 0.2493, "step": 5157, "teacher_loss": 0.22261153161525726 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.25585877895355225, "learning_rate": 2.2370970073731387e-05, "loss": 0.1867, "step": 5158, "teacher_loss": 0.1790388822555542 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.294344961643219, "learning_rate": 2.2375307214110164e-05, "loss": 0.211, "step": 5159, "teacher_loss": 0.2017013430595398 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.6578915119171143, "learning_rate": 2.237964435448894e-05, "loss": 0.2973, "step": 5160, "teacher_loss": 0.2572368383407593 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.4158608913421631, "learning_rate": 2.2383981494867716e-05, "loss": 0.253, "step": 5161, "teacher_loss": 0.2349167764186859 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.22157132625579834, "learning_rate": 2.2388318635246494e-05, "loss": 0.2211, "step": 5162, "teacher_loss": 0.2210463136434555 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.5379351377487183, "learning_rate": 2.239265577562527e-05, "loss": 0.3446, "step": 5163, "teacher_loss": 0.3231605291366577 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.37333449721336365, "learning_rate": 2.239699291600405e-05, "loss": 0.2115, "step": 5164, "teacher_loss": 0.1935131549835205 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.17080548405647278, "learning_rate": 2.2401330056382827e-05, "loss": 0.2254, "step": 5165, "teacher_loss": 0.2314268946647644 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.7143734693527222, "learning_rate": 2.2405667196761604e-05, "loss": 0.345, "step": 5166, "teacher_loss": 0.3040086627006531 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.7520779371261597, "learning_rate": 2.2410004337140382e-05, "loss": 0.3036, "step": 5167, "teacher_loss": 0.2537897229194641 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.684225857257843, "learning_rate": 2.2414341477519156e-05, "loss": 0.3007, "step": 5168, "teacher_loss": 0.25807303190231323 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.23379717767238617, "learning_rate": 2.241867861789793e-05, "loss": 0.1486, "step": 5169, "teacher_loss": 0.1391725242137909 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.31053274869918823, "learning_rate": 2.2423015758276708e-05, "loss": 0.1724, "step": 5170, "teacher_loss": 0.15702611207962036 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.22114913165569305, "learning_rate": 2.2427352898655486e-05, "loss": 0.2585, "step": 5171, "teacher_loss": 0.26262637972831726 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.4058115482330322, "learning_rate": 2.2431690039034263e-05, "loss": 0.2626, "step": 5172, "teacher_loss": 0.24673837423324585 }, { "compression_loss": 0.0, "epoch": 0.93, "label_loss": 0.13354527950286865, "learning_rate": 2.243602717941304e-05, "loss": 0.1955, "step": 5173, "teacher_loss": 0.20240341126918793 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.2527647912502289, "learning_rate": 2.244036431979182e-05, "loss": 0.239, "step": 5174, "teacher_loss": 0.23742276430130005 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.3180329203605652, "learning_rate": 2.2444701460170596e-05, "loss": 0.2166, "step": 5175, "teacher_loss": 0.20528320968151093 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.7262953519821167, "learning_rate": 2.2449038600549374e-05, "loss": 0.5043, "step": 5176, "teacher_loss": 0.47957974672317505 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.2496146857738495, "learning_rate": 2.2453375740928148e-05, "loss": 0.1712, "step": 5177, "teacher_loss": 0.16245608031749725 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.4889895021915436, "learning_rate": 2.2457712881306926e-05, "loss": 0.2446, "step": 5178, "teacher_loss": 0.21742860972881317 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.5513525605201721, "learning_rate": 2.2462050021685703e-05, "loss": 0.2596, "step": 5179, "teacher_loss": 0.22720560431480408 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.3760373890399933, "learning_rate": 2.2466387162064477e-05, "loss": 0.29, "step": 5180, "teacher_loss": 0.28045833110809326 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.12090438604354858, "learning_rate": 2.2470724302443255e-05, "loss": 0.1557, "step": 5181, "teacher_loss": 0.15951929986476898 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.44478827714920044, "learning_rate": 2.2475061442822033e-05, "loss": 0.2508, "step": 5182, "teacher_loss": 0.2292410433292389 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.468021035194397, "learning_rate": 2.247939858320081e-05, "loss": 0.3061, "step": 5183, "teacher_loss": 0.2880815863609314 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.5448011159896851, "learning_rate": 2.2483735723579588e-05, "loss": 0.3294, "step": 5184, "teacher_loss": 0.3054155111312866 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.36160629987716675, "learning_rate": 2.2488072863958365e-05, "loss": 0.2462, "step": 5185, "teacher_loss": 0.23333030939102173 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.8166458606719971, "learning_rate": 2.249241000433714e-05, "loss": 0.3009, "step": 5186, "teacher_loss": 0.24362075328826904 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.34891456365585327, "learning_rate": 2.2496747144715917e-05, "loss": 0.2287, "step": 5187, "teacher_loss": 0.215294748544693 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.39646828174591064, "learning_rate": 2.2501084285094695e-05, "loss": 0.3423, "step": 5188, "teacher_loss": 0.3362903892993927 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.5243868827819824, "learning_rate": 2.2505421425473473e-05, "loss": 0.247, "step": 5189, "teacher_loss": 0.21617653965950012 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.378104031085968, "learning_rate": 2.250975856585225e-05, "loss": 0.215, "step": 5190, "teacher_loss": 0.19687201082706451 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.40269333124160767, "learning_rate": 2.2514095706231024e-05, "loss": 0.2041, "step": 5191, "teacher_loss": 0.1820824146270752 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.5432336330413818, "learning_rate": 2.2518432846609802e-05, "loss": 0.2286, "step": 5192, "teacher_loss": 0.1936730593442917 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.4971905052661896, "learning_rate": 2.252276998698858e-05, "loss": 0.2995, "step": 5193, "teacher_loss": 0.277488648891449 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.4415738880634308, "learning_rate": 2.2527107127367357e-05, "loss": 0.2774, "step": 5194, "teacher_loss": 0.25916820764541626 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.5430472493171692, "learning_rate": 2.253144426774613e-05, "loss": 0.2529, "step": 5195, "teacher_loss": 0.22064979374408722 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.5330978631973267, "learning_rate": 2.253578140812491e-05, "loss": 0.5355, "step": 5196, "teacher_loss": 0.5358136296272278 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.4828476011753082, "learning_rate": 2.2540118548503687e-05, "loss": 0.3746, "step": 5197, "teacher_loss": 0.36252695322036743 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.4799504280090332, "learning_rate": 2.2544455688882464e-05, "loss": 0.1987, "step": 5198, "teacher_loss": 0.16748881340026855 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.3681102991104126, "learning_rate": 2.2548792829261242e-05, "loss": 0.2907, "step": 5199, "teacher_loss": 0.2821410298347473 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.4468977451324463, "learning_rate": 2.255312996964002e-05, "loss": 0.1988, "step": 5200, "teacher_loss": 0.17121225595474243 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.6335517168045044, "learning_rate": 2.2557467110018797e-05, "loss": 0.3002, "step": 5201, "teacher_loss": 0.2632067799568176 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.4042230546474457, "learning_rate": 2.256180425039757e-05, "loss": 0.2872, "step": 5202, "teacher_loss": 0.27421101927757263 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.628432035446167, "learning_rate": 2.256614139077635e-05, "loss": 0.291, "step": 5203, "teacher_loss": 0.25353002548217773 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.38567131757736206, "learning_rate": 2.2570478531155123e-05, "loss": 0.1866, "step": 5204, "teacher_loss": 0.16453619301319122 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 1.2500131130218506, "learning_rate": 2.25748156715339e-05, "loss": 0.516, "step": 5205, "teacher_loss": 0.43444371223449707 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.3733985722064972, "learning_rate": 2.257915281191268e-05, "loss": 0.351, "step": 5206, "teacher_loss": 0.34854069352149963 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.47947967052459717, "learning_rate": 2.2583489952291456e-05, "loss": 0.2227, "step": 5207, "teacher_loss": 0.19412267208099365 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.5088632702827454, "learning_rate": 2.2587827092670234e-05, "loss": 0.2195, "step": 5208, "teacher_loss": 0.18729454278945923 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.46812838315963745, "learning_rate": 2.259216423304901e-05, "loss": 0.2869, "step": 5209, "teacher_loss": 0.2667706608772278 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.31662237644195557, "learning_rate": 2.259650137342779e-05, "loss": 0.2153, "step": 5210, "teacher_loss": 0.2040342092514038 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.3427325189113617, "learning_rate": 2.2600838513806567e-05, "loss": 0.4275, "step": 5211, "teacher_loss": 0.4368712902069092 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.2818562388420105, "learning_rate": 2.260517565418534e-05, "loss": 0.1828, "step": 5212, "teacher_loss": 0.17174619436264038 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.43525975942611694, "learning_rate": 2.2609512794564115e-05, "loss": 0.1729, "step": 5213, "teacher_loss": 0.1437588930130005 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.2975597381591797, "learning_rate": 2.2613849934942893e-05, "loss": 0.2301, "step": 5214, "teacher_loss": 0.222581684589386 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.25309211015701294, "learning_rate": 2.261818707532167e-05, "loss": 0.2081, "step": 5215, "teacher_loss": 0.20309637486934662 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.4849016070365906, "learning_rate": 2.2622524215700448e-05, "loss": 0.3044, "step": 5216, "teacher_loss": 0.2843630015850067 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.2920071482658386, "learning_rate": 2.2626861356079226e-05, "loss": 0.2055, "step": 5217, "teacher_loss": 0.1959172785282135 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.6029982566833496, "learning_rate": 2.2631198496458003e-05, "loss": 0.2997, "step": 5218, "teacher_loss": 0.26605552434921265 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.1946466863155365, "learning_rate": 2.263553563683678e-05, "loss": 0.1647, "step": 5219, "teacher_loss": 0.16140566766262054 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.6143782734870911, "learning_rate": 2.263987277721556e-05, "loss": 0.2686, "step": 5220, "teacher_loss": 0.23017823696136475 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.21290603280067444, "learning_rate": 2.2644209917594333e-05, "loss": 0.1957, "step": 5221, "teacher_loss": 0.1938113272190094 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.4121314287185669, "learning_rate": 2.264854705797311e-05, "loss": 0.2165, "step": 5222, "teacher_loss": 0.19480614364147186 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.47370651364326477, "learning_rate": 2.2652884198351888e-05, "loss": 0.2302, "step": 5223, "teacher_loss": 0.20313358306884766 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.2794271409511566, "learning_rate": 2.2657221338730662e-05, "loss": 0.2575, "step": 5224, "teacher_loss": 0.25501489639282227 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.347237765789032, "learning_rate": 2.266155847910944e-05, "loss": 0.2666, "step": 5225, "teacher_loss": 0.2576659917831421 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.8451735973358154, "learning_rate": 2.2665895619488218e-05, "loss": 0.2514, "step": 5226, "teacher_loss": 0.18546950817108154 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 1.0351994037628174, "learning_rate": 2.2670232759866995e-05, "loss": 0.4788, "step": 5227, "teacher_loss": 0.4170287847518921 }, { "compression_loss": 0.0, "epoch": 0.94, "label_loss": 0.6906437873840332, "learning_rate": 2.2674569900245773e-05, "loss": 0.2939, "step": 5228, "teacher_loss": 0.24980652332305908 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.9929763078689575, "learning_rate": 2.267890704062455e-05, "loss": 0.2817, "step": 5229, "teacher_loss": 0.2026483565568924 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.7074835300445557, "learning_rate": 2.2683244181003325e-05, "loss": 0.2788, "step": 5230, "teacher_loss": 0.23115208745002747 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.3571232557296753, "learning_rate": 2.2687581321382102e-05, "loss": 0.2962, "step": 5231, "teacher_loss": 0.2894464433193207 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.42810314893722534, "learning_rate": 2.269191846176088e-05, "loss": 0.2194, "step": 5232, "teacher_loss": 0.1962508261203766 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.2567265033721924, "learning_rate": 2.2696255602139657e-05, "loss": 0.2423, "step": 5233, "teacher_loss": 0.2407262623310089 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.6598492860794067, "learning_rate": 2.2700592742518435e-05, "loss": 0.3227, "step": 5234, "teacher_loss": 0.2852838933467865 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.5134366750717163, "learning_rate": 2.270492988289721e-05, "loss": 0.3148, "step": 5235, "teacher_loss": 0.2927519381046295 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.2056937962770462, "learning_rate": 2.2709267023275987e-05, "loss": 0.2118, "step": 5236, "teacher_loss": 0.21250778436660767 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.5422415733337402, "learning_rate": 2.2713604163654765e-05, "loss": 0.2282, "step": 5237, "teacher_loss": 0.19330623745918274 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.5009069442749023, "learning_rate": 2.2717941304033542e-05, "loss": 0.252, "step": 5238, "teacher_loss": 0.2243586629629135 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.37588587403297424, "learning_rate": 2.2722278444412316e-05, "loss": 0.2275, "step": 5239, "teacher_loss": 0.2109965682029724 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.4359273314476013, "learning_rate": 2.2726615584791094e-05, "loss": 0.2677, "step": 5240, "teacher_loss": 0.2489645779132843 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.395673930644989, "learning_rate": 2.273095272516987e-05, "loss": 0.3863, "step": 5241, "teacher_loss": 0.3852764368057251 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.18563883006572723, "learning_rate": 2.273528986554865e-05, "loss": 0.2155, "step": 5242, "teacher_loss": 0.21880190074443817 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.23733291029930115, "learning_rate": 2.2739627005927427e-05, "loss": 0.2066, "step": 5243, "teacher_loss": 0.2031739205121994 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.3128124475479126, "learning_rate": 2.2743964146306205e-05, "loss": 0.1834, "step": 5244, "teacher_loss": 0.16898566484451294 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.6251288652420044, "learning_rate": 2.274830128668498e-05, "loss": 0.2973, "step": 5245, "teacher_loss": 0.2608697712421417 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.41884487867355347, "learning_rate": 2.2752638427063756e-05, "loss": 0.3151, "step": 5246, "teacher_loss": 0.3035174012184143 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.37374347448349, "learning_rate": 2.2756975567442534e-05, "loss": 0.2212, "step": 5247, "teacher_loss": 0.20426736772060394 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.3473066985607147, "learning_rate": 2.2761312707821308e-05, "loss": 0.2591, "step": 5248, "teacher_loss": 0.24926765263080597 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.49194997549057007, "learning_rate": 2.2765649848200086e-05, "loss": 0.2776, "step": 5249, "teacher_loss": 0.25381016731262207 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.24947193264961243, "learning_rate": 2.2769986988578863e-05, "loss": 0.2409, "step": 5250, "teacher_loss": 0.2399916797876358 }, { "epoch": 0.95, "eval_exact_match": 79.63103122043519, "eval_f1": 87.18634119235962, "step": 5250 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.2812878489494324, "learning_rate": 2.277432412895764e-05, "loss": 0.4144, "step": 5251, "teacher_loss": 0.42915308475494385 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.7196929454803467, "learning_rate": 2.277866126933642e-05, "loss": 0.3573, "step": 5252, "teacher_loss": 0.3170585334300995 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.39043283462524414, "learning_rate": 2.2782998409715196e-05, "loss": 0.2008, "step": 5253, "teacher_loss": 0.1797480583190918 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.2606884837150574, "learning_rate": 2.2787335550093974e-05, "loss": 0.3089, "step": 5254, "teacher_loss": 0.3142600357532501 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.16645267605781555, "learning_rate": 2.279167269047275e-05, "loss": 0.1668, "step": 5255, "teacher_loss": 0.16688109934329987 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.42375534772872925, "learning_rate": 2.2796009830851526e-05, "loss": 0.2011, "step": 5256, "teacher_loss": 0.1763540804386139 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.45146262645721436, "learning_rate": 2.28003469712303e-05, "loss": 0.2372, "step": 5257, "teacher_loss": 0.21333810687065125 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.8403382301330566, "learning_rate": 2.2804684111609078e-05, "loss": 0.4967, "step": 5258, "teacher_loss": 0.4585610628128052 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.33543968200683594, "learning_rate": 2.2809021251987855e-05, "loss": 0.1915, "step": 5259, "teacher_loss": 0.17547093331813812 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.28520306944847107, "learning_rate": 2.2813358392366633e-05, "loss": 0.2349, "step": 5260, "teacher_loss": 0.229332834482193 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.16511911153793335, "learning_rate": 2.281769553274541e-05, "loss": 0.1895, "step": 5261, "teacher_loss": 0.1921992152929306 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.623196542263031, "learning_rate": 2.2822032673124188e-05, "loss": 0.3784, "step": 5262, "teacher_loss": 0.3511947989463806 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.422479510307312, "learning_rate": 2.2826369813502966e-05, "loss": 0.2691, "step": 5263, "teacher_loss": 0.2520909905433655 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.5880293846130371, "learning_rate": 2.2830706953881743e-05, "loss": 0.4016, "step": 5264, "teacher_loss": 0.38090479373931885 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.5907383561134338, "learning_rate": 2.2835044094260518e-05, "loss": 0.2803, "step": 5265, "teacher_loss": 0.24582664668560028 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.388629674911499, "learning_rate": 2.2839381234639295e-05, "loss": 0.2379, "step": 5266, "teacher_loss": 0.2211308777332306 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.28395259380340576, "learning_rate": 2.284371837501807e-05, "loss": 0.2046, "step": 5267, "teacher_loss": 0.19577160477638245 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.30127406120300293, "learning_rate": 2.2848055515396847e-05, "loss": 0.2127, "step": 5268, "teacher_loss": 0.20289164781570435 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.4572233259677887, "learning_rate": 2.2852392655775625e-05, "loss": 0.2669, "step": 5269, "teacher_loss": 0.2457369863986969 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.6482110619544983, "learning_rate": 2.2856729796154402e-05, "loss": 0.3682, "step": 5270, "teacher_loss": 0.3370552659034729 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.4635393023490906, "learning_rate": 2.286106693653318e-05, "loss": 0.1988, "step": 5271, "teacher_loss": 0.16938892006874084 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.7569433450698853, "learning_rate": 2.2865404076911958e-05, "loss": 0.2915, "step": 5272, "teacher_loss": 0.2397874891757965 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.2106567919254303, "learning_rate": 2.2869741217290735e-05, "loss": 0.2392, "step": 5273, "teacher_loss": 0.2423810511827469 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.3675040006637573, "learning_rate": 2.287407835766951e-05, "loss": 0.2719, "step": 5274, "teacher_loss": 0.26126301288604736 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.372201532125473, "learning_rate": 2.2878415498048287e-05, "loss": 0.398, "step": 5275, "teacher_loss": 0.40084031224250793 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.391643226146698, "learning_rate": 2.2882752638427065e-05, "loss": 0.2846, "step": 5276, "teacher_loss": 0.27274852991104126 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.42954087257385254, "learning_rate": 2.2887089778805842e-05, "loss": 0.2577, "step": 5277, "teacher_loss": 0.23864296078681946 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 1.2149486541748047, "learning_rate": 2.2891426919184617e-05, "loss": 0.4275, "step": 5278, "teacher_loss": 0.3400581479072571 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.3806818723678589, "learning_rate": 2.2895764059563394e-05, "loss": 0.2197, "step": 5279, "teacher_loss": 0.20180785655975342 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.767670750617981, "learning_rate": 2.2900101199942172e-05, "loss": 0.4487, "step": 5280, "teacher_loss": 0.4132162630558014 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.711574912071228, "learning_rate": 2.290443834032095e-05, "loss": 0.3307, "step": 5281, "teacher_loss": 0.2883613705635071 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.5144262909889221, "learning_rate": 2.2908775480699727e-05, "loss": 0.2793, "step": 5282, "teacher_loss": 0.2531528174877167 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.31219446659088135, "learning_rate": 2.29131126210785e-05, "loss": 0.2237, "step": 5283, "teacher_loss": 0.2138577699661255 }, { "compression_loss": 0.0, "epoch": 0.95, "label_loss": 0.4874677062034607, "learning_rate": 2.291744976145728e-05, "loss": 0.2473, "step": 5284, "teacher_loss": 0.2205744981765747 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.44611963629722595, "learning_rate": 2.2921786901836057e-05, "loss": 0.2109, "step": 5285, "teacher_loss": 0.18478459119796753 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.3108885884284973, "learning_rate": 2.2926124042214834e-05, "loss": 0.1765, "step": 5286, "teacher_loss": 0.16161490976810455 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.434268057346344, "learning_rate": 2.2930461182593612e-05, "loss": 0.259, "step": 5287, "teacher_loss": 0.23951254785060883 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.8003425598144531, "learning_rate": 2.293479832297239e-05, "loss": 0.345, "step": 5288, "teacher_loss": 0.29438310861587524 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.6043721437454224, "learning_rate": 2.2939135463351164e-05, "loss": 0.2608, "step": 5289, "teacher_loss": 0.2226196825504303 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 1.0015742778778076, "learning_rate": 2.294347260372994e-05, "loss": 0.2933, "step": 5290, "teacher_loss": 0.21458885073661804 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.5063802599906921, "learning_rate": 2.294780974410872e-05, "loss": 0.2818, "step": 5291, "teacher_loss": 0.2568388879299164 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.48456746339797974, "learning_rate": 2.2952146884487493e-05, "loss": 0.2843, "step": 5292, "teacher_loss": 0.2620980739593506 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.537642776966095, "learning_rate": 2.295648402486627e-05, "loss": 0.2705, "step": 5293, "teacher_loss": 0.24077317118644714 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.2621435821056366, "learning_rate": 2.296082116524505e-05, "loss": 0.2107, "step": 5294, "teacher_loss": 0.20498394966125488 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.8919625282287598, "learning_rate": 2.2965158305623826e-05, "loss": 0.3275, "step": 5295, "teacher_loss": 0.26477229595184326 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.4959392547607422, "learning_rate": 2.2969495446002604e-05, "loss": 0.2496, "step": 5296, "teacher_loss": 0.22228381037712097 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.24925217032432556, "learning_rate": 2.297383258638138e-05, "loss": 0.1626, "step": 5297, "teacher_loss": 0.1529797911643982 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.7525221109390259, "learning_rate": 2.297816972676016e-05, "loss": 0.3961, "step": 5298, "teacher_loss": 0.3565044403076172 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.14059105515480042, "learning_rate": 2.2982506867138936e-05, "loss": 0.2874, "step": 5299, "teacher_loss": 0.3036627173423767 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.408150315284729, "learning_rate": 2.298684400751771e-05, "loss": 0.3686, "step": 5300, "teacher_loss": 0.36421453952789307 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.4785027801990509, "learning_rate": 2.2991181147896485e-05, "loss": 0.3436, "step": 5301, "teacher_loss": 0.3285689651966095 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.19875618815422058, "learning_rate": 2.2995518288275263e-05, "loss": 0.1888, "step": 5302, "teacher_loss": 0.18765440583229065 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.8475450873374939, "learning_rate": 2.299985542865404e-05, "loss": 0.4318, "step": 5303, "teacher_loss": 0.38558512926101685 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.49890416860580444, "learning_rate": 2.3004192569032818e-05, "loss": 0.3101, "step": 5304, "teacher_loss": 0.28915178775787354 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.13040488958358765, "learning_rate": 2.3008529709411595e-05, "loss": 0.1486, "step": 5305, "teacher_loss": 0.15066519379615784 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.5142734050750732, "learning_rate": 2.3012866849790373e-05, "loss": 0.3032, "step": 5306, "teacher_loss": 0.27975788712501526 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.2963438630104065, "learning_rate": 2.301720399016915e-05, "loss": 0.2805, "step": 5307, "teacher_loss": 0.27878519892692566 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.2815457582473755, "learning_rate": 2.302154113054793e-05, "loss": 0.3214, "step": 5308, "teacher_loss": 0.3258435130119324 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.49866294860839844, "learning_rate": 2.3025878270926703e-05, "loss": 0.2862, "step": 5309, "teacher_loss": 0.2626439929008484 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.17292019724845886, "learning_rate": 2.303021541130548e-05, "loss": 0.2801, "step": 5310, "teacher_loss": 0.29195380210876465 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.2461528778076172, "learning_rate": 2.3034552551684254e-05, "loss": 0.1623, "step": 5311, "teacher_loss": 0.15302368998527527 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.41084691882133484, "learning_rate": 2.3038889692063032e-05, "loss": 0.2453, "step": 5312, "teacher_loss": 0.22686000168323517 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.29898664355278015, "learning_rate": 2.304322683244181e-05, "loss": 0.2638, "step": 5313, "teacher_loss": 0.2598741352558136 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.37229400873184204, "learning_rate": 2.3047563972820587e-05, "loss": 0.293, "step": 5314, "teacher_loss": 0.2841700613498688 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.4559156894683838, "learning_rate": 2.3051901113199365e-05, "loss": 0.3119, "step": 5315, "teacher_loss": 0.2959163784980774 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.2551038861274719, "learning_rate": 2.3056238253578143e-05, "loss": 0.3094, "step": 5316, "teacher_loss": 0.3154229521751404 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.2521950602531433, "learning_rate": 2.306057539395692e-05, "loss": 0.1843, "step": 5317, "teacher_loss": 0.17671126127243042 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.2045522928237915, "learning_rate": 2.3064912534335694e-05, "loss": 0.1451, "step": 5318, "teacher_loss": 0.138482466340065 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.30991801619529724, "learning_rate": 2.3069249674714472e-05, "loss": 0.1954, "step": 5319, "teacher_loss": 0.1826450526714325 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 1.071541666984558, "learning_rate": 2.307358681509325e-05, "loss": 0.3195, "step": 5320, "teacher_loss": 0.23588864505290985 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.29259878396987915, "learning_rate": 2.3077923955472027e-05, "loss": 0.1871, "step": 5321, "teacher_loss": 0.17534509301185608 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.48258674144744873, "learning_rate": 2.30822610958508e-05, "loss": 0.239, "step": 5322, "teacher_loss": 0.21196448802947998 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.5426325798034668, "learning_rate": 2.308659823622958e-05, "loss": 0.3001, "step": 5323, "teacher_loss": 0.2731361985206604 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.4768981337547302, "learning_rate": 2.3090935376608357e-05, "loss": 0.2253, "step": 5324, "teacher_loss": 0.19734270870685577 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.6536442041397095, "learning_rate": 2.3095272516987134e-05, "loss": 0.3475, "step": 5325, "teacher_loss": 0.3134285807609558 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.7678326368331909, "learning_rate": 2.3099609657365912e-05, "loss": 0.228, "step": 5326, "teacher_loss": 0.16804514825344086 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.21201226115226746, "learning_rate": 2.3103946797744686e-05, "loss": 0.1774, "step": 5327, "teacher_loss": 0.1735735535621643 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.5550143122673035, "learning_rate": 2.3108283938123464e-05, "loss": 0.2766, "step": 5328, "teacher_loss": 0.24567165970802307 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.33735334873199463, "learning_rate": 2.311262107850224e-05, "loss": 0.3393, "step": 5329, "teacher_loss": 0.339538037776947 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.37199753522872925, "learning_rate": 2.311695821888102e-05, "loss": 0.2311, "step": 5330, "teacher_loss": 0.21543939411640167 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.5593907833099365, "learning_rate": 2.3121295359259797e-05, "loss": 0.2656, "step": 5331, "teacher_loss": 0.23299317061901093 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.31990504264831543, "learning_rate": 2.3125632499638574e-05, "loss": 0.234, "step": 5332, "teacher_loss": 0.22443200647830963 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.3720256984233856, "learning_rate": 2.312996964001735e-05, "loss": 0.1982, "step": 5333, "teacher_loss": 0.17883452773094177 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.31332194805145264, "learning_rate": 2.3134306780396126e-05, "loss": 0.2697, "step": 5334, "teacher_loss": 0.2649003863334656 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.6613035202026367, "learning_rate": 2.3138643920774904e-05, "loss": 0.5597, "step": 5335, "teacher_loss": 0.548446536064148 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.23744845390319824, "learning_rate": 2.3142981061153678e-05, "loss": 0.2066, "step": 5336, "teacher_loss": 0.20318594574928284 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.4818035960197449, "learning_rate": 2.3147318201532456e-05, "loss": 0.2323, "step": 5337, "teacher_loss": 0.20456373691558838 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.42452001571655273, "learning_rate": 2.3151655341911233e-05, "loss": 0.2651, "step": 5338, "teacher_loss": 0.24737368524074554 }, { "compression_loss": 0.0, "epoch": 0.96, "label_loss": 0.8728054761886597, "learning_rate": 2.315599248229001e-05, "loss": 0.4228, "step": 5339, "teacher_loss": 0.37277162075042725 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.8660918474197388, "learning_rate": 2.316032962266879e-05, "loss": 0.3203, "step": 5340, "teacher_loss": 0.25968992710113525 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.22465132176876068, "learning_rate": 2.3164666763047566e-05, "loss": 0.2431, "step": 5341, "teacher_loss": 0.24515533447265625 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 1.1361886262893677, "learning_rate": 2.3169003903426344e-05, "loss": 0.4531, "step": 5342, "teacher_loss": 0.37723132967948914 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.25529032945632935, "learning_rate": 2.3173341043805118e-05, "loss": 0.2163, "step": 5343, "teacher_loss": 0.21191942691802979 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.2200397253036499, "learning_rate": 2.3177678184183896e-05, "loss": 0.1977, "step": 5344, "teacher_loss": 0.19520539045333862 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.16571789979934692, "learning_rate": 2.318201532456267e-05, "loss": 0.2261, "step": 5345, "teacher_loss": 0.2327772080898285 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.2317473441362381, "learning_rate": 2.3186352464941447e-05, "loss": 0.2094, "step": 5346, "teacher_loss": 0.2068655639886856 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.16925732791423798, "learning_rate": 2.3190689605320225e-05, "loss": 0.2512, "step": 5347, "teacher_loss": 0.26027965545654297 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.49995240569114685, "learning_rate": 2.3195026745699003e-05, "loss": 0.2787, "step": 5348, "teacher_loss": 0.2541377544403076 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.6650125980377197, "learning_rate": 2.319936388607778e-05, "loss": 0.5539, "step": 5349, "teacher_loss": 0.5415416955947876 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.49391233921051025, "learning_rate": 2.3203701026456558e-05, "loss": 0.2414, "step": 5350, "teacher_loss": 0.21338112652301788 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.1688418686389923, "learning_rate": 2.3208038166835336e-05, "loss": 0.1694, "step": 5351, "teacher_loss": 0.1694801300764084 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.5063471794128418, "learning_rate": 2.3212375307214113e-05, "loss": 0.3839, "step": 5352, "teacher_loss": 0.37028831243515015 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.22846029698848724, "learning_rate": 2.3216712447592887e-05, "loss": 0.2592, "step": 5353, "teacher_loss": 0.26265761256217957 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.4165247678756714, "learning_rate": 2.322104958797166e-05, "loss": 0.1858, "step": 5354, "teacher_loss": 0.16014957427978516 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.2282799482345581, "learning_rate": 2.322538672835044e-05, "loss": 0.1884, "step": 5355, "teacher_loss": 0.18399415910243988 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.42540442943573, "learning_rate": 2.3229723868729217e-05, "loss": 0.232, "step": 5356, "teacher_loss": 0.21055945754051208 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.5764268040657043, "learning_rate": 2.3234061009107995e-05, "loss": 0.2716, "step": 5357, "teacher_loss": 0.23769541084766388 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.6497641801834106, "learning_rate": 2.3238398149486772e-05, "loss": 0.2971, "step": 5358, "teacher_loss": 0.25793084502220154 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.6197007894515991, "learning_rate": 2.324273528986555e-05, "loss": 0.2876, "step": 5359, "teacher_loss": 0.25074994564056396 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 1.1256108283996582, "learning_rate": 2.3247072430244327e-05, "loss": 0.3872, "step": 5360, "teacher_loss": 0.3051164746284485 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.5118353366851807, "learning_rate": 2.3251409570623105e-05, "loss": 0.3214, "step": 5361, "teacher_loss": 0.30025580525398254 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.48860234022140503, "learning_rate": 2.325574671100188e-05, "loss": 0.2487, "step": 5362, "teacher_loss": 0.22203359007835388 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.5370458364486694, "learning_rate": 2.3260083851380657e-05, "loss": 0.2413, "step": 5363, "teacher_loss": 0.20849084854125977 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.41701263189315796, "learning_rate": 2.3264420991759434e-05, "loss": 0.3354, "step": 5364, "teacher_loss": 0.32630735635757446 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.36297526955604553, "learning_rate": 2.326875813213821e-05, "loss": 0.2577, "step": 5365, "teacher_loss": 0.245981365442276 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.25561586022377014, "learning_rate": 2.3273095272516986e-05, "loss": 0.1849, "step": 5366, "teacher_loss": 0.17703570425510406 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.2053670585155487, "learning_rate": 2.3277432412895764e-05, "loss": 0.2262, "step": 5367, "teacher_loss": 0.22848904132843018 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 1.0243103504180908, "learning_rate": 2.328176955327454e-05, "loss": 0.4653, "step": 5368, "teacher_loss": 0.4031349718570709 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.4429263770580292, "learning_rate": 2.328610669365332e-05, "loss": 0.2364, "step": 5369, "teacher_loss": 0.21349795162677765 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.2591411769390106, "learning_rate": 2.3290443834032097e-05, "loss": 0.2628, "step": 5370, "teacher_loss": 0.26318442821502686 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.1070953831076622, "learning_rate": 2.329478097441087e-05, "loss": 0.1423, "step": 5371, "teacher_loss": 0.14622879028320312 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.3947625160217285, "learning_rate": 2.329911811478965e-05, "loss": 0.3047, "step": 5372, "teacher_loss": 0.29472678899765015 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.16454137861728668, "learning_rate": 2.3303455255168426e-05, "loss": 0.2103, "step": 5373, "teacher_loss": 0.21540237963199615 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.37359437346458435, "learning_rate": 2.3307792395547204e-05, "loss": 0.2136, "step": 5374, "teacher_loss": 0.1958184838294983 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.43434181809425354, "learning_rate": 2.331212953592598e-05, "loss": 0.4877, "step": 5375, "teacher_loss": 0.49364525079727173 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.2762134075164795, "learning_rate": 2.3316466676304756e-05, "loss": 0.1919, "step": 5376, "teacher_loss": 0.18257302045822144 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.19992363452911377, "learning_rate": 2.3320803816683533e-05, "loss": 0.1864, "step": 5377, "teacher_loss": 0.1848490685224533 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.33336132764816284, "learning_rate": 2.332514095706231e-05, "loss": 0.3295, "step": 5378, "teacher_loss": 0.32908475399017334 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.3430309295654297, "learning_rate": 2.332947809744109e-05, "loss": 0.3666, "step": 5379, "teacher_loss": 0.369229257106781 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.18287985026836395, "learning_rate": 2.3333815237819863e-05, "loss": 0.2763, "step": 5380, "teacher_loss": 0.28670966625213623 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.49821245670318604, "learning_rate": 2.333815237819864e-05, "loss": 0.2686, "step": 5381, "teacher_loss": 0.24310877919197083 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.3554704189300537, "learning_rate": 2.3342489518577418e-05, "loss": 0.3191, "step": 5382, "teacher_loss": 0.31502699851989746 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.7280396223068237, "learning_rate": 2.3346826658956196e-05, "loss": 0.2718, "step": 5383, "teacher_loss": 0.22114010155200958 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.6777359247207642, "learning_rate": 2.3351163799334973e-05, "loss": 0.4402, "step": 5384, "teacher_loss": 0.41383713483810425 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.38362541794776917, "learning_rate": 2.335550093971375e-05, "loss": 0.2359, "step": 5385, "teacher_loss": 0.2194925844669342 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.42113980650901794, "learning_rate": 2.335983808009253e-05, "loss": 0.2555, "step": 5386, "teacher_loss": 0.2371484637260437 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.5297960042953491, "learning_rate": 2.3364175220471303e-05, "loss": 0.3073, "step": 5387, "teacher_loss": 0.28261566162109375 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.876060962677002, "learning_rate": 2.336851236085008e-05, "loss": 0.3757, "step": 5388, "teacher_loss": 0.32011473178863525 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.5322084426879883, "learning_rate": 2.3372849501228855e-05, "loss": 0.2739, "step": 5389, "teacher_loss": 0.24519816040992737 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.3035171926021576, "learning_rate": 2.3377186641607632e-05, "loss": 0.2306, "step": 5390, "teacher_loss": 0.222476065158844 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.4409346580505371, "learning_rate": 2.338152378198641e-05, "loss": 0.2284, "step": 5391, "teacher_loss": 0.204797625541687 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.39762794971466064, "learning_rate": 2.3385860922365188e-05, "loss": 0.2453, "step": 5392, "teacher_loss": 0.22839441895484924 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.4769248366355896, "learning_rate": 2.3390198062743965e-05, "loss": 0.2405, "step": 5393, "teacher_loss": 0.21422025561332703 }, { "compression_loss": 0.0, "epoch": 0.97, "label_loss": 0.3190556764602661, "learning_rate": 2.3394535203122743e-05, "loss": 0.4323, "step": 5394, "teacher_loss": 0.44491666555404663 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.4160124361515045, "learning_rate": 2.339887234350152e-05, "loss": 0.2271, "step": 5395, "teacher_loss": 0.20611083507537842 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.8731113076210022, "learning_rate": 2.3403209483880298e-05, "loss": 0.4674, "step": 5396, "teacher_loss": 0.42231184244155884 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 1.1101782321929932, "learning_rate": 2.3407546624259076e-05, "loss": 0.2795, "step": 5397, "teacher_loss": 0.1872110664844513 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.3266570568084717, "learning_rate": 2.3411883764637847e-05, "loss": 0.2312, "step": 5398, "teacher_loss": 0.22059719264507294 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.13921064138412476, "learning_rate": 2.3416220905016624e-05, "loss": 0.1729, "step": 5399, "teacher_loss": 0.1766607165336609 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.25814932584762573, "learning_rate": 2.3420558045395402e-05, "loss": 0.2054, "step": 5400, "teacher_loss": 0.19951960444450378 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.4061494469642639, "learning_rate": 2.342489518577418e-05, "loss": 0.2928, "step": 5401, "teacher_loss": 0.2801753580570221 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.2980736196041107, "learning_rate": 2.3429232326152957e-05, "loss": 0.2005, "step": 5402, "teacher_loss": 0.1896316260099411 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.5789540410041809, "learning_rate": 2.3433569466531735e-05, "loss": 0.2807, "step": 5403, "teacher_loss": 0.2475496232509613 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.2554803192615509, "learning_rate": 2.3437906606910512e-05, "loss": 0.1833, "step": 5404, "teacher_loss": 0.1752779185771942 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.24882067739963531, "learning_rate": 2.344224374728929e-05, "loss": 0.2396, "step": 5405, "teacher_loss": 0.23862558603286743 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.4301232695579529, "learning_rate": 2.3446580887668064e-05, "loss": 0.1983, "step": 5406, "teacher_loss": 0.17251065373420715 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.36738380789756775, "learning_rate": 2.3450918028046842e-05, "loss": 0.2176, "step": 5407, "teacher_loss": 0.20091985166072845 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.24191394448280334, "learning_rate": 2.345525516842562e-05, "loss": 0.2015, "step": 5408, "teacher_loss": 0.19699940085411072 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.5004382133483887, "learning_rate": 2.3459592308804394e-05, "loss": 0.2753, "step": 5409, "teacher_loss": 0.2502540647983551 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.47690683603286743, "learning_rate": 2.346392944918317e-05, "loss": 0.318, "step": 5410, "teacher_loss": 0.30029550194740295 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.6977181434631348, "learning_rate": 2.346826658956195e-05, "loss": 0.2575, "step": 5411, "teacher_loss": 0.20860819518566132 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.43647173047065735, "learning_rate": 2.3472603729940726e-05, "loss": 0.2786, "step": 5412, "teacher_loss": 0.2610396444797516 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.2459789663553238, "learning_rate": 2.3476940870319504e-05, "loss": 0.2944, "step": 5413, "teacher_loss": 0.29983416199684143 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.2712249457836151, "learning_rate": 2.3481278010698282e-05, "loss": 0.1827, "step": 5414, "teacher_loss": 0.17285102605819702 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.5777601003646851, "learning_rate": 2.3485615151077056e-05, "loss": 0.3939, "step": 5415, "teacher_loss": 0.37349873781204224 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.302334725856781, "learning_rate": 2.3489952291455834e-05, "loss": 0.1952, "step": 5416, "teacher_loss": 0.18331976234912872 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.4470178186893463, "learning_rate": 2.349428943183461e-05, "loss": 0.3042, "step": 5417, "teacher_loss": 0.28837352991104126 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.4125959575176239, "learning_rate": 2.349862657221339e-05, "loss": 0.3353, "step": 5418, "teacher_loss": 0.3267241418361664 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.30898982286453247, "learning_rate": 2.3502963712592166e-05, "loss": 0.2344, "step": 5419, "teacher_loss": 0.22606824338436127 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.3123939633369446, "learning_rate": 2.350730085297094e-05, "loss": 0.2267, "step": 5420, "teacher_loss": 0.21720454096794128 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.2864669859409332, "learning_rate": 2.3511637993349718e-05, "loss": 0.206, "step": 5421, "teacher_loss": 0.19707578420639038 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.153743714094162, "learning_rate": 2.3515975133728496e-05, "loss": 0.2056, "step": 5422, "teacher_loss": 0.21133500337600708 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.34247249364852905, "learning_rate": 2.3520312274107274e-05, "loss": 0.2294, "step": 5423, "teacher_loss": 0.2168051153421402 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.8783653974533081, "learning_rate": 2.3524649414486048e-05, "loss": 0.3171, "step": 5424, "teacher_loss": 0.2546904683113098 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.3593134880065918, "learning_rate": 2.3528986554864825e-05, "loss": 0.3258, "step": 5425, "teacher_loss": 0.3220583200454712 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.4042368531227112, "learning_rate": 2.3533323695243603e-05, "loss": 0.2929, "step": 5426, "teacher_loss": 0.2805687189102173 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.381625771522522, "learning_rate": 2.353766083562238e-05, "loss": 0.2129, "step": 5427, "teacher_loss": 0.19409725069999695 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.7073277235031128, "learning_rate": 2.3541997976001158e-05, "loss": 0.2699, "step": 5428, "teacher_loss": 0.2212734818458557 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.2719656229019165, "learning_rate": 2.3546335116379936e-05, "loss": 0.1623, "step": 5429, "teacher_loss": 0.15014883875846863 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.6445224285125732, "learning_rate": 2.3550672256758713e-05, "loss": 0.3669, "step": 5430, "teacher_loss": 0.3360990881919861 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.4739958941936493, "learning_rate": 2.3555009397137488e-05, "loss": 0.2476, "step": 5431, "teacher_loss": 0.22244848310947418 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 1.4296815395355225, "learning_rate": 2.3559346537516265e-05, "loss": 0.4581, "step": 5432, "teacher_loss": 0.3500993549823761 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.42647045850753784, "learning_rate": 2.356368367789504e-05, "loss": 0.2438, "step": 5433, "teacher_loss": 0.22350388765335083 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.44229501485824585, "learning_rate": 2.3568020818273817e-05, "loss": 0.1999, "step": 5434, "teacher_loss": 0.17298519611358643 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 1.0484583377838135, "learning_rate": 2.3572357958652595e-05, "loss": 0.2517, "step": 5435, "teacher_loss": 0.16312196850776672 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.9945250749588013, "learning_rate": 2.3576695099031372e-05, "loss": 0.3356, "step": 5436, "teacher_loss": 0.26233339309692383 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.5502623915672302, "learning_rate": 2.358103223941015e-05, "loss": 0.2395, "step": 5437, "teacher_loss": 0.20502164959907532 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.6483927369117737, "learning_rate": 2.3585369379788928e-05, "loss": 0.2247, "step": 5438, "teacher_loss": 0.1776670217514038 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.4235118627548218, "learning_rate": 2.3589706520167705e-05, "loss": 0.311, "step": 5439, "teacher_loss": 0.2985331416130066 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.6320832371711731, "learning_rate": 2.3594043660546483e-05, "loss": 0.3292, "step": 5440, "teacher_loss": 0.2955858111381531 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.25183913111686707, "learning_rate": 2.3598380800925257e-05, "loss": 0.2193, "step": 5441, "teacher_loss": 0.2156939059495926 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.3258577883243561, "learning_rate": 2.360271794130403e-05, "loss": 0.2287, "step": 5442, "teacher_loss": 0.21786652505397797 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.32024165987968445, "learning_rate": 2.360705508168281e-05, "loss": 0.232, "step": 5443, "teacher_loss": 0.2222200334072113 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.27019447088241577, "learning_rate": 2.3611392222061587e-05, "loss": 0.2203, "step": 5444, "teacher_loss": 0.2147570550441742 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.6908140778541565, "learning_rate": 2.3615729362440364e-05, "loss": 0.3519, "step": 5445, "teacher_loss": 0.31429553031921387 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.24531099200248718, "learning_rate": 2.3620066502819142e-05, "loss": 0.1979, "step": 5446, "teacher_loss": 0.19257938861846924 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.27792447805404663, "learning_rate": 2.362440364319792e-05, "loss": 0.2639, "step": 5447, "teacher_loss": 0.26233696937561035 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.4228118658065796, "learning_rate": 2.3628740783576697e-05, "loss": 0.2643, "step": 5448, "teacher_loss": 0.24667519330978394 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.6106827259063721, "learning_rate": 2.3633077923955475e-05, "loss": 0.2619, "step": 5449, "teacher_loss": 0.22319769859313965 }, { "compression_loss": 0.0, "epoch": 0.98, "label_loss": 0.2263053059577942, "learning_rate": 2.363741506433425e-05, "loss": 0.2186, "step": 5450, "teacher_loss": 0.2177969217300415 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.2460690140724182, "learning_rate": 2.3641752204713027e-05, "loss": 0.3006, "step": 5451, "teacher_loss": 0.3066667914390564 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5033208131790161, "learning_rate": 2.36460893450918e-05, "loss": 0.4311, "step": 5452, "teacher_loss": 0.423112154006958 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.4537234306335449, "learning_rate": 2.365042648547058e-05, "loss": 0.2061, "step": 5453, "teacher_loss": 0.1785932183265686 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.7310105562210083, "learning_rate": 2.3654763625849356e-05, "loss": 0.2758, "step": 5454, "teacher_loss": 0.2252088189125061 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.8010063171386719, "learning_rate": 2.3659100766228134e-05, "loss": 0.3028, "step": 5455, "teacher_loss": 0.24742259085178375 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.4143591821193695, "learning_rate": 2.366343790660691e-05, "loss": 0.24, "step": 5456, "teacher_loss": 0.2206798493862152 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5115957260131836, "learning_rate": 2.366777504698569e-05, "loss": 0.2924, "step": 5457, "teacher_loss": 0.2680544853210449 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.3721899092197418, "learning_rate": 2.3672112187364467e-05, "loss": 0.3075, "step": 5458, "teacher_loss": 0.3002851605415344 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5223338603973389, "learning_rate": 2.367644932774324e-05, "loss": 0.2612, "step": 5459, "teacher_loss": 0.2322404533624649 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.3888382911682129, "learning_rate": 2.368078646812202e-05, "loss": 0.2387, "step": 5460, "teacher_loss": 0.22202935814857483 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5063039064407349, "learning_rate": 2.3685123608500796e-05, "loss": 0.3863, "step": 5461, "teacher_loss": 0.37298738956451416 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.2738635540008545, "learning_rate": 2.3689460748879574e-05, "loss": 0.1768, "step": 5462, "teacher_loss": 0.16601964831352234 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.39991146326065063, "learning_rate": 2.3693797889258348e-05, "loss": 0.2241, "step": 5463, "teacher_loss": 0.20458745956420898 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.2586379051208496, "learning_rate": 2.3698135029637126e-05, "loss": 0.2569, "step": 5464, "teacher_loss": 0.25672462582588196 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.7350320219993591, "learning_rate": 2.3702472170015903e-05, "loss": 0.2837, "step": 5465, "teacher_loss": 0.23354575037956238 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.4511708617210388, "learning_rate": 2.370680931039468e-05, "loss": 0.3346, "step": 5466, "teacher_loss": 0.321644127368927 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.36172378063201904, "learning_rate": 2.371114645077346e-05, "loss": 0.1721, "step": 5467, "teacher_loss": 0.15097564458847046 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.503262996673584, "learning_rate": 2.3715483591152233e-05, "loss": 0.267, "step": 5468, "teacher_loss": 0.24076221883296967 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.6842743754386902, "learning_rate": 2.371982073153101e-05, "loss": 0.4294, "step": 5469, "teacher_loss": 0.40112870931625366 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.529126763343811, "learning_rate": 2.3724157871909788e-05, "loss": 0.3839, "step": 5470, "teacher_loss": 0.36778998374938965 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.42309868335723877, "learning_rate": 2.3728495012288566e-05, "loss": 0.2199, "step": 5471, "teacher_loss": 0.19730165600776672 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.4516448378562927, "learning_rate": 2.3732832152667343e-05, "loss": 0.202, "step": 5472, "teacher_loss": 0.17424491047859192 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.30680304765701294, "learning_rate": 2.373716929304612e-05, "loss": 0.2009, "step": 5473, "teacher_loss": 0.18908946216106415 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.4683828353881836, "learning_rate": 2.3741506433424895e-05, "loss": 0.287, "step": 5474, "teacher_loss": 0.26680734753608704 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.3786434531211853, "learning_rate": 2.3745843573803673e-05, "loss": 0.2068, "step": 5475, "teacher_loss": 0.18773552775382996 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.8037061095237732, "learning_rate": 2.375018071418245e-05, "loss": 0.3263, "step": 5476, "teacher_loss": 0.27327990531921387 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.27950191497802734, "learning_rate": 2.3754517854561224e-05, "loss": 0.234, "step": 5477, "teacher_loss": 0.22889825701713562 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.17843934893608093, "learning_rate": 2.3758854994940002e-05, "loss": 0.1931, "step": 5478, "teacher_loss": 0.1947167068719864 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.19085225462913513, "learning_rate": 2.376319213531878e-05, "loss": 0.2144, "step": 5479, "teacher_loss": 0.21700209379196167 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.1601770669221878, "learning_rate": 2.3767529275697557e-05, "loss": 0.2008, "step": 5480, "teacher_loss": 0.20528821647167206 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.3767884373664856, "learning_rate": 2.3771866416076335e-05, "loss": 0.2949, "step": 5481, "teacher_loss": 0.2858337163925171 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.42538249492645264, "learning_rate": 2.3776203556455113e-05, "loss": 0.2431, "step": 5482, "teacher_loss": 0.22281736135482788 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.2810768485069275, "learning_rate": 2.378054069683389e-05, "loss": 0.2638, "step": 5483, "teacher_loss": 0.2618332505226135 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.42660534381866455, "learning_rate": 2.3784877837212668e-05, "loss": 0.3869, "step": 5484, "teacher_loss": 0.3824366629123688 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.3473745584487915, "learning_rate": 2.3789214977591442e-05, "loss": 0.3016, "step": 5485, "teacher_loss": 0.2965286374092102 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5838441848754883, "learning_rate": 2.3793552117970216e-05, "loss": 0.4045, "step": 5486, "teacher_loss": 0.3846234083175659 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.4836907386779785, "learning_rate": 2.3797889258348994e-05, "loss": 0.2862, "step": 5487, "teacher_loss": 0.2642497420310974 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5806557536125183, "learning_rate": 2.380222639872777e-05, "loss": 0.3269, "step": 5488, "teacher_loss": 0.2986660301685333 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5350724458694458, "learning_rate": 2.380656353910655e-05, "loss": 0.3407, "step": 5489, "teacher_loss": 0.31911808252334595 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.9408642649650574, "learning_rate": 2.3810900679485327e-05, "loss": 0.2952, "step": 5490, "teacher_loss": 0.2234172821044922 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.44192638993263245, "learning_rate": 2.3815237819864104e-05, "loss": 0.2393, "step": 5491, "teacher_loss": 0.21678532660007477 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.3596239387989044, "learning_rate": 2.3819574960242882e-05, "loss": 0.2137, "step": 5492, "teacher_loss": 0.19747722148895264 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.3033997416496277, "learning_rate": 2.382391210062166e-05, "loss": 0.318, "step": 5493, "teacher_loss": 0.3195984959602356 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.4174913167953491, "learning_rate": 2.3828249241000434e-05, "loss": 0.2917, "step": 5494, "teacher_loss": 0.2777055501937866 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.22468841075897217, "learning_rate": 2.383258638137921e-05, "loss": 0.2012, "step": 5495, "teacher_loss": 0.19859914481639862 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 1.2779312133789062, "learning_rate": 2.3836923521757986e-05, "loss": 1.0136, "step": 5496, "teacher_loss": 0.9842210412025452 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.2496974766254425, "learning_rate": 2.3841260662136763e-05, "loss": 0.304, "step": 5497, "teacher_loss": 0.30998706817626953 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.2141002118587494, "learning_rate": 2.384559780251554e-05, "loss": 0.2359, "step": 5498, "teacher_loss": 0.2382681965827942 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5450341701507568, "learning_rate": 2.384993494289432e-05, "loss": 0.3198, "step": 5499, "teacher_loss": 0.29481691122055054 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5489734411239624, "learning_rate": 2.3854272083273096e-05, "loss": 0.3784, "step": 5500, "teacher_loss": 0.3594951033592224 }, { "epoch": 0.99, "eval_exact_match": 79.73509933774834, "eval_f1": 87.35944602695977, "step": 5500 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5233284831047058, "learning_rate": 2.3858609223651874e-05, "loss": 0.4033, "step": 5501, "teacher_loss": 0.3899722099304199 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5641283988952637, "learning_rate": 2.386294636403065e-05, "loss": 0.2863, "step": 5502, "teacher_loss": 0.2553831934928894 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.7542777061462402, "learning_rate": 2.3867283504409426e-05, "loss": 0.3494, "step": 5503, "teacher_loss": 0.30440258979797363 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.5384633541107178, "learning_rate": 2.3871620644788203e-05, "loss": 0.2834, "step": 5504, "teacher_loss": 0.2550843060016632 }, { "compression_loss": 0.0, "epoch": 0.99, "label_loss": 0.7068926692008972, "learning_rate": 2.387595778516698e-05, "loss": 0.2929, "step": 5505, "teacher_loss": 0.2468506395816803 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.5082980394363403, "learning_rate": 2.388029492554576e-05, "loss": 0.2234, "step": 5506, "teacher_loss": 0.1917380839586258 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.12020628154277802, "learning_rate": 2.3884632065924533e-05, "loss": 0.1763, "step": 5507, "teacher_loss": 0.18256735801696777 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.4165462851524353, "learning_rate": 2.388896920630331e-05, "loss": 0.2266, "step": 5508, "teacher_loss": 0.20546376705169678 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.42891794443130493, "learning_rate": 2.3893306346682088e-05, "loss": 0.2807, "step": 5509, "teacher_loss": 0.264274001121521 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.9357650876045227, "learning_rate": 2.3897643487060866e-05, "loss": 0.4576, "step": 5510, "teacher_loss": 0.4044947326183319 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.3889278471469879, "learning_rate": 2.3901980627439643e-05, "loss": 0.2118, "step": 5511, "teacher_loss": 0.19211365282535553 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.22113007307052612, "learning_rate": 2.3906317767818418e-05, "loss": 0.2511, "step": 5512, "teacher_loss": 0.2543991804122925 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.13394254446029663, "learning_rate": 2.3910654908197195e-05, "loss": 0.1726, "step": 5513, "teacher_loss": 0.1768435835838318 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.32154330611228943, "learning_rate": 2.3914992048575973e-05, "loss": 0.1829, "step": 5514, "teacher_loss": 0.1675400584936142 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.25767266750335693, "learning_rate": 2.391932918895475e-05, "loss": 0.1829, "step": 5515, "teacher_loss": 0.17457157373428345 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.39777910709381104, "learning_rate": 2.3923666329333528e-05, "loss": 0.1922, "step": 5516, "teacher_loss": 0.16930381953716278 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.483955055475235, "learning_rate": 2.3928003469712306e-05, "loss": 0.3311, "step": 5517, "teacher_loss": 0.31411248445510864 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.5580171346664429, "learning_rate": 2.393234061009108e-05, "loss": 0.2391, "step": 5518, "teacher_loss": 0.20361566543579102 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.2070433348417282, "learning_rate": 2.3936677750469857e-05, "loss": 0.1595, "step": 5519, "teacher_loss": 0.15423351526260376 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.3792913556098938, "learning_rate": 2.3941014890848635e-05, "loss": 0.3003, "step": 5520, "teacher_loss": 0.29149261116981506 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.13489359617233276, "learning_rate": 2.394535203122741e-05, "loss": 0.1739, "step": 5521, "teacher_loss": 0.17824864387512207 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.5052412152290344, "learning_rate": 2.3949689171606187e-05, "loss": 0.2683, "step": 5522, "teacher_loss": 0.24200588464736938 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.2507641613483429, "learning_rate": 2.3954026311984965e-05, "loss": 0.4393, "step": 5523, "teacher_loss": 0.4602043032646179 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.6958397626876831, "learning_rate": 2.3958363452363742e-05, "loss": 0.3561, "step": 5524, "teacher_loss": 0.31834876537323 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.2722817361354828, "learning_rate": 2.396270059274252e-05, "loss": 0.2359, "step": 5525, "teacher_loss": 0.23183688521385193 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.21399657428264618, "learning_rate": 2.3967037733121297e-05, "loss": 0.1998, "step": 5526, "teacher_loss": 0.19817332923412323 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.2081492841243744, "learning_rate": 2.3971374873500075e-05, "loss": 0.2297, "step": 5527, "teacher_loss": 0.23206466436386108 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.2719005048274994, "learning_rate": 2.3975712013878853e-05, "loss": 0.2534, "step": 5528, "teacher_loss": 0.251294881105423 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.348596453666687, "learning_rate": 2.3980049154257627e-05, "loss": 0.2809, "step": 5529, "teacher_loss": 0.2733915448188782 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.3902977406978607, "learning_rate": 2.39843862946364e-05, "loss": 0.2965, "step": 5530, "teacher_loss": 0.2860453724861145 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.5832538604736328, "learning_rate": 2.398872343501518e-05, "loss": 0.3181, "step": 5531, "teacher_loss": 0.2886642813682556 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.4647487998008728, "learning_rate": 2.3993060575393956e-05, "loss": 0.344, "step": 5532, "teacher_loss": 0.330563485622406 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.2958689332008362, "learning_rate": 2.3997397715772734e-05, "loss": 0.2127, "step": 5533, "teacher_loss": 0.20342203974723816 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.3096369802951813, "learning_rate": 2.400173485615151e-05, "loss": 0.3125, "step": 5534, "teacher_loss": 0.3128219544887543 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.3914162218570709, "learning_rate": 2.400607199653029e-05, "loss": 0.3004, "step": 5535, "teacher_loss": 0.2903319001197815 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.1795274317264557, "learning_rate": 2.4010409136909067e-05, "loss": 0.189, "step": 5536, "teacher_loss": 0.19007575511932373 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.3075406849384308, "learning_rate": 2.4014746277287845e-05, "loss": 0.2474, "step": 5537, "teacher_loss": 0.24071195721626282 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.2852080464363098, "learning_rate": 2.4019083417666622e-05, "loss": 0.2208, "step": 5538, "teacher_loss": 0.21366475522518158 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.5133790969848633, "learning_rate": 2.4023420558045396e-05, "loss": 0.279, "step": 5539, "teacher_loss": 0.252974271774292 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.6297202110290527, "learning_rate": 2.402775769842417e-05, "loss": 0.2348, "step": 5540, "teacher_loss": 0.19095048308372498 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.4543585777282715, "learning_rate": 2.4032094838802948e-05, "loss": 0.1974, "step": 5541, "teacher_loss": 0.16888773441314697 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.3950199484825134, "learning_rate": 2.4036431979181726e-05, "loss": 0.2009, "step": 5542, "teacher_loss": 0.17931383848190308 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.4443906247615814, "learning_rate": 2.4040769119560503e-05, "loss": 0.2901, "step": 5543, "teacher_loss": 0.27291643619537354 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.1903933733701706, "learning_rate": 2.404510625993928e-05, "loss": 0.15, "step": 5544, "teacher_loss": 0.14556069672107697 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.45108562707901, "learning_rate": 2.404944340031806e-05, "loss": 0.2391, "step": 5545, "teacher_loss": 0.2155945897102356 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.49527227878570557, "learning_rate": 2.4053780540696836e-05, "loss": 0.3276, "step": 5546, "teacher_loss": 0.30899083614349365 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.7124514579772949, "learning_rate": 2.405811768107561e-05, "loss": 0.4123, "step": 5547, "teacher_loss": 0.37899094820022583 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.6522963643074036, "learning_rate": 2.4062454821454388e-05, "loss": 0.3292, "step": 5548, "teacher_loss": 0.2933151125907898 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.7107315063476562, "learning_rate": 2.4066791961833166e-05, "loss": 0.324, "step": 5549, "teacher_loss": 0.2810158133506775 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.3425602316856384, "learning_rate": 2.407112910221194e-05, "loss": 0.2057, "step": 5550, "teacher_loss": 0.1905456930398941 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.2833007872104645, "learning_rate": 2.4075466242590718e-05, "loss": 0.2369, "step": 5551, "teacher_loss": 0.23179934918880463 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.36231696605682373, "learning_rate": 2.4079803382969495e-05, "loss": 0.3214, "step": 5552, "teacher_loss": 0.3168398141860962 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.5425717830657959, "learning_rate": 2.4084140523348273e-05, "loss": 0.3686, "step": 5553, "teacher_loss": 0.3492559790611267 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.4136574864387512, "learning_rate": 2.408847766372705e-05, "loss": 0.3536, "step": 5554, "teacher_loss": 0.34697774052619934 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.3692624866962433, "learning_rate": 2.4092814804105828e-05, "loss": 0.3581, "step": 5555, "teacher_loss": 0.35681891441345215 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.5137479901313782, "learning_rate": 2.4097151944484602e-05, "loss": 0.2613, "step": 5556, "teacher_loss": 0.2332562804222107 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.5401643514633179, "learning_rate": 2.410148908486338e-05, "loss": 0.2539, "step": 5557, "teacher_loss": 0.2221173644065857 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.4916079640388489, "learning_rate": 2.4105826225242158e-05, "loss": 0.2995, "step": 5558, "teacher_loss": 0.2781757712364197 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.8625274896621704, "learning_rate": 2.4110163365620935e-05, "loss": 0.5247, "step": 5559, "teacher_loss": 0.4871985912322998 }, { "compression_loss": 0.0, "epoch": 1.0, "label_loss": 0.3087458908557892, "learning_rate": 2.4114500505999713e-05, "loss": 0.2391, "step": 5560, "teacher_loss": 0.2313242256641388 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.35308146476745605, "learning_rate": 2.4118837646378487e-05, "loss": 0.2169, "step": 5561, "teacher_loss": 0.20174546539783478 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.50047367811203, "learning_rate": 2.4123174786757265e-05, "loss": 0.3105, "step": 5562, "teacher_loss": 0.28936219215393066 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.4380897283554077, "learning_rate": 2.4127511927136042e-05, "loss": 0.2635, "step": 5563, "teacher_loss": 0.24415223300457 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.20104242861270905, "learning_rate": 2.413184906751482e-05, "loss": 0.1772, "step": 5564, "teacher_loss": 0.17459365725517273 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.5302785038948059, "learning_rate": 2.4136186207893594e-05, "loss": 0.2562, "step": 5565, "teacher_loss": 0.22577491402626038 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.5072332620620728, "learning_rate": 2.4140523348272372e-05, "loss": 0.2476, "step": 5566, "teacher_loss": 0.21872252225875854 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.43954703211784363, "learning_rate": 2.414486048865115e-05, "loss": 0.2412, "step": 5567, "teacher_loss": 0.21916130185127258 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.49317100644111633, "learning_rate": 2.4149197629029927e-05, "loss": 0.3291, "step": 5568, "teacher_loss": 0.31091898679733276 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.4020460844039917, "learning_rate": 2.4153534769408705e-05, "loss": 0.2484, "step": 5569, "teacher_loss": 0.23137065768241882 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.5230247974395752, "learning_rate": 2.4157871909787482e-05, "loss": 0.2438, "step": 5570, "teacher_loss": 0.2128157615661621 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.7183653116226196, "learning_rate": 2.416220905016626e-05, "loss": 0.3636, "step": 5571, "teacher_loss": 0.3242231011390686 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.19906014204025269, "learning_rate": 2.4166546190545034e-05, "loss": 0.1815, "step": 5572, "teacher_loss": 0.1795075386762619 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.7087011337280273, "learning_rate": 2.4170883330923812e-05, "loss": 0.3784, "step": 5573, "teacher_loss": 0.34170466661453247 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.3014792501926422, "learning_rate": 2.4175220471302586e-05, "loss": 0.2922, "step": 5574, "teacher_loss": 0.29114097356796265 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.49453943967819214, "learning_rate": 2.4179557611681364e-05, "loss": 0.2187, "step": 5575, "teacher_loss": 0.18801945447921753 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.650495171546936, "learning_rate": 2.418389475206014e-05, "loss": 0.2932, "step": 5576, "teacher_loss": 0.2535434067249298 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.44322168827056885, "learning_rate": 2.418823189243892e-05, "loss": 0.285, "step": 5577, "teacher_loss": 0.2674024701118469 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.3634362816810608, "learning_rate": 2.4192569032817697e-05, "loss": 0.2918, "step": 5578, "teacher_loss": 0.28381985425949097 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.4401315152645111, "learning_rate": 2.4196906173196474e-05, "loss": 0.3111, "step": 5579, "teacher_loss": 0.2967928946018219 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 1.008434772491455, "learning_rate": 2.4201243313575252e-05, "loss": 0.4831, "step": 5580, "teacher_loss": 0.4247262179851532 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.47278016805648804, "learning_rate": 2.420558045395403e-05, "loss": 0.2873, "step": 5581, "teacher_loss": 0.26672977209091187 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.18486978113651276, "learning_rate": 2.4209917594332807e-05, "loss": 0.2154, "step": 5582, "teacher_loss": 0.2188088297843933 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.30044764280319214, "learning_rate": 2.4214254734711578e-05, "loss": 0.3138, "step": 5583, "teacher_loss": 0.3152994215488434 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.4646206200122833, "learning_rate": 2.4218591875090355e-05, "loss": 0.2068, "step": 5584, "teacher_loss": 0.17817914485931396 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.5127899646759033, "learning_rate": 2.4222929015469133e-05, "loss": 0.2708, "step": 5585, "teacher_loss": 0.24396450817584991 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.94619220495224, "learning_rate": 2.422726615584791e-05, "loss": 0.3049, "step": 5586, "teacher_loss": 0.2336428165435791 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 1.4700956344604492, "learning_rate": 2.423160329622669e-05, "loss": 0.3326, "step": 5587, "teacher_loss": 0.20618806779384613 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.37568119168281555, "learning_rate": 2.4235940436605466e-05, "loss": 0.2003, "step": 5588, "teacher_loss": 0.18084616959095 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.37261492013931274, "learning_rate": 2.4240277576984244e-05, "loss": 0.226, "step": 5589, "teacher_loss": 0.20975112915039062 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.36511141061782837, "learning_rate": 2.424461471736302e-05, "loss": 0.2169, "step": 5590, "teacher_loss": 0.20042762160301208 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.2893666625022888, "learning_rate": 2.4248951857741795e-05, "loss": 0.1946, "step": 5591, "teacher_loss": 0.18404307961463928 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.28168922662734985, "learning_rate": 2.4253288998120573e-05, "loss": 0.2315, "step": 5592, "teacher_loss": 0.22589761018753052 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.24444711208343506, "learning_rate": 2.425762613849935e-05, "loss": 0.2457, "step": 5593, "teacher_loss": 0.24585431814193726 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.5088082551956177, "learning_rate": 2.4261963278878125e-05, "loss": 0.3146, "step": 5594, "teacher_loss": 0.2930651605129242 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.8658096790313721, "learning_rate": 2.4266300419256903e-05, "loss": 0.2776, "step": 5595, "teacher_loss": 0.21224218606948853 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.46944358944892883, "learning_rate": 2.427063755963568e-05, "loss": 0.2972, "step": 5596, "teacher_loss": 0.2780410647392273 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.24515071511268616, "learning_rate": 2.4274974700014458e-05, "loss": 0.2115, "step": 5597, "teacher_loss": 0.20781417191028595 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.5131568312644958, "learning_rate": 2.4279311840393235e-05, "loss": 0.3161, "step": 5598, "teacher_loss": 0.2942129969596863 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.16224858164787292, "learning_rate": 2.4283648980772013e-05, "loss": 0.2171, "step": 5599, "teacher_loss": 0.22315669059753418 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.5373561978340149, "learning_rate": 2.4287986121150787e-05, "loss": 0.3193, "step": 5600, "teacher_loss": 0.2950502932071686 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.3081502616405487, "learning_rate": 2.4292323261529565e-05, "loss": 0.2329, "step": 5601, "teacher_loss": 0.2245505005121231 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.2721555531024933, "learning_rate": 2.4296660401908343e-05, "loss": 0.2149, "step": 5602, "teacher_loss": 0.20850205421447754 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.15866847336292267, "learning_rate": 2.430099754228712e-05, "loss": 0.1623, "step": 5603, "teacher_loss": 0.16268372535705566 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.497715562582016, "learning_rate": 2.4305334682665898e-05, "loss": 0.2456, "step": 5604, "teacher_loss": 0.21760836243629456 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.2097712755203247, "learning_rate": 2.4309671823044672e-05, "loss": 0.1728, "step": 5605, "teacher_loss": 0.16873791813850403 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.10559982806444168, "learning_rate": 2.431400896342345e-05, "loss": 0.1642, "step": 5606, "teacher_loss": 0.17065785825252533 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 1.039673089981079, "learning_rate": 2.4318346103802227e-05, "loss": 0.3897, "step": 5607, "teacher_loss": 0.3174961805343628 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.39323747158050537, "learning_rate": 2.4322683244181005e-05, "loss": 0.1814, "step": 5608, "teacher_loss": 0.15790067613124847 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.33837801218032837, "learning_rate": 2.432702038455978e-05, "loss": 0.2382, "step": 5609, "teacher_loss": 0.2270343005657196 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.33296412229537964, "learning_rate": 2.4331357524938557e-05, "loss": 0.2252, "step": 5610, "teacher_loss": 0.21317672729492188 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 1.2495427131652832, "learning_rate": 2.4335694665317334e-05, "loss": 0.3054, "step": 5611, "teacher_loss": 0.2005338817834854 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.4443269968032837, "learning_rate": 2.4340031805696112e-05, "loss": 0.2469, "step": 5612, "teacher_loss": 0.22500471770763397 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.7357381582260132, "learning_rate": 2.434436894607489e-05, "loss": 0.8905, "step": 5613, "teacher_loss": 0.9077329635620117 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 1.0446408987045288, "learning_rate": 2.4348706086453667e-05, "loss": 0.366, "step": 5614, "teacher_loss": 0.2905818223953247 }, { "compression_loss": 0.0, "epoch": 1.01, "label_loss": 0.36794453859329224, "learning_rate": 2.4353043226832445e-05, "loss": 0.201, "step": 5615, "teacher_loss": 0.18245071172714233 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.6399587988853455, "learning_rate": 2.435738036721122e-05, "loss": 0.3256, "step": 5616, "teacher_loss": 0.2906665802001953 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.30041027069091797, "learning_rate": 2.4361717507589997e-05, "loss": 0.2231, "step": 5617, "teacher_loss": 0.21447762846946716 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.6319758892059326, "learning_rate": 2.436605464796877e-05, "loss": 0.2338, "step": 5618, "teacher_loss": 0.1895652413368225 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.5243546962738037, "learning_rate": 2.437039178834755e-05, "loss": 0.2638, "step": 5619, "teacher_loss": 0.2348591387271881 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.28889650106430054, "learning_rate": 2.4374728928726326e-05, "loss": 0.2258, "step": 5620, "teacher_loss": 0.21874016523361206 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.8411446809768677, "learning_rate": 2.4379066069105104e-05, "loss": 0.4277, "step": 5621, "teacher_loss": 0.38174301385879517 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.4184070825576782, "learning_rate": 2.438340320948388e-05, "loss": 0.4005, "step": 5622, "teacher_loss": 0.3984929919242859 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.2530801296234131, "learning_rate": 2.438774034986266e-05, "loss": 0.2229, "step": 5623, "teacher_loss": 0.2195446491241455 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.5007804036140442, "learning_rate": 2.4392077490241437e-05, "loss": 0.4297, "step": 5624, "teacher_loss": 0.42185771465301514 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.674731969833374, "learning_rate": 2.4396414630620214e-05, "loss": 0.2834, "step": 5625, "teacher_loss": 0.2399139553308487 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.5432819128036499, "learning_rate": 2.4400751770998992e-05, "loss": 0.2931, "step": 5626, "teacher_loss": 0.26524823904037476 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 1.1695358753204346, "learning_rate": 2.4405088911377763e-05, "loss": 0.5252, "step": 5627, "teacher_loss": 0.45355552434921265 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.5249483585357666, "learning_rate": 2.440942605175654e-05, "loss": 0.2616, "step": 5628, "teacher_loss": 0.2323458343744278 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.48556044697761536, "learning_rate": 2.4413763192135318e-05, "loss": 0.2638, "step": 5629, "teacher_loss": 0.23915642499923706 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.5114362239837646, "learning_rate": 2.4418100332514096e-05, "loss": 0.2799, "step": 5630, "teacher_loss": 0.2541220486164093 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.32199379801750183, "learning_rate": 2.4422437472892873e-05, "loss": 0.1888, "step": 5631, "teacher_loss": 0.1740495264530182 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 1.7694306373596191, "learning_rate": 2.442677461327165e-05, "loss": 0.7707, "step": 5632, "teacher_loss": 0.6597065329551697 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.38596707582473755, "learning_rate": 2.443111175365043e-05, "loss": 0.2196, "step": 5633, "teacher_loss": 0.20110058784484863 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.4178635776042938, "learning_rate": 2.4435448894029206e-05, "loss": 0.1697, "step": 5634, "teacher_loss": 0.14209628105163574 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.47275400161743164, "learning_rate": 2.443978603440798e-05, "loss": 0.2192, "step": 5635, "teacher_loss": 0.19101710617542267 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.508428692817688, "learning_rate": 2.4444123174786758e-05, "loss": 0.2112, "step": 5636, "teacher_loss": 0.17816904187202454 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.2387896329164505, "learning_rate": 2.4448460315165536e-05, "loss": 0.1789, "step": 5637, "teacher_loss": 0.17229697108268738 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.2613878846168518, "learning_rate": 2.445279745554431e-05, "loss": 0.2016, "step": 5638, "teacher_loss": 0.19499284029006958 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.47389528155326843, "learning_rate": 2.4457134595923087e-05, "loss": 0.2429, "step": 5639, "teacher_loss": 0.21727406978607178 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.6177856922149658, "learning_rate": 2.4461471736301865e-05, "loss": 0.3746, "step": 5640, "teacher_loss": 0.3475687503814697 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.44690102338790894, "learning_rate": 2.4465808876680643e-05, "loss": 0.298, "step": 5641, "teacher_loss": 0.2814851403236389 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.42904990911483765, "learning_rate": 2.447014601705942e-05, "loss": 0.2846, "step": 5642, "teacher_loss": 0.26853519678115845 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.39792031049728394, "learning_rate": 2.4474483157438198e-05, "loss": 0.238, "step": 5643, "teacher_loss": 0.22022143006324768 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.5822258591651917, "learning_rate": 2.4478820297816972e-05, "loss": 0.2443, "step": 5644, "teacher_loss": 0.2067420929670334 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.5057730674743652, "learning_rate": 2.448315743819575e-05, "loss": 0.2146, "step": 5645, "teacher_loss": 0.18219396471977234 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.4598786234855652, "learning_rate": 2.4487494578574527e-05, "loss": 0.2756, "step": 5646, "teacher_loss": 0.25508224964141846 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.47035300731658936, "learning_rate": 2.4491831718953305e-05, "loss": 0.3042, "step": 5647, "teacher_loss": 0.28577524423599243 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 1.4768092632293701, "learning_rate": 2.449616885933208e-05, "loss": 0.8271, "step": 5648, "teacher_loss": 0.7549407482147217 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.5571060180664062, "learning_rate": 2.4500505999710857e-05, "loss": 0.2588, "step": 5649, "teacher_loss": 0.22560936212539673 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.27670279145240784, "learning_rate": 2.4504843140089635e-05, "loss": 0.213, "step": 5650, "teacher_loss": 0.20595984160900116 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.24549859762191772, "learning_rate": 2.4509180280468412e-05, "loss": 0.2625, "step": 5651, "teacher_loss": 0.2643560767173767 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.4431856870651245, "learning_rate": 2.451351742084719e-05, "loss": 0.2069, "step": 5652, "teacher_loss": 0.18060001730918884 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.13577072322368622, "learning_rate": 2.4517854561225964e-05, "loss": 0.1649, "step": 5653, "teacher_loss": 0.1681235283613205 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.4667758047580719, "learning_rate": 2.452219170160474e-05, "loss": 0.2784, "step": 5654, "teacher_loss": 0.25750458240509033 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.3781157433986664, "learning_rate": 2.452652884198352e-05, "loss": 0.1923, "step": 5655, "teacher_loss": 0.1716112494468689 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.6267328262329102, "learning_rate": 2.4530865982362297e-05, "loss": 0.3008, "step": 5656, "teacher_loss": 0.2645624279975891 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.35940712690353394, "learning_rate": 2.4535203122741074e-05, "loss": 0.3256, "step": 5657, "teacher_loss": 0.32188737392425537 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.2657119631767273, "learning_rate": 2.4539540263119852e-05, "loss": 0.21, "step": 5658, "teacher_loss": 0.20382657647132874 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.5109708905220032, "learning_rate": 2.4543877403498626e-05, "loss": 0.2585, "step": 5659, "teacher_loss": 0.2304304838180542 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.3855929970741272, "learning_rate": 2.4548214543877404e-05, "loss": 0.3898, "step": 5660, "teacher_loss": 0.39021727442741394 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.454128235578537, "learning_rate": 2.455255168425618e-05, "loss": 0.2615, "step": 5661, "teacher_loss": 0.24013438820838928 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.41541045904159546, "learning_rate": 2.4556888824634956e-05, "loss": 0.2798, "step": 5662, "teacher_loss": 0.2647482752799988 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.4303972125053406, "learning_rate": 2.4561225965013733e-05, "loss": 0.2202, "step": 5663, "teacher_loss": 0.19681212306022644 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.30703192949295044, "learning_rate": 2.456556310539251e-05, "loss": 0.2182, "step": 5664, "teacher_loss": 0.20830154418945312 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.3043895363807678, "learning_rate": 2.456990024577129e-05, "loss": 0.2124, "step": 5665, "teacher_loss": 0.20221829414367676 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.37489616870880127, "learning_rate": 2.4574237386150066e-05, "loss": 0.2544, "step": 5666, "teacher_loss": 0.24095915257930756 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.5221413969993591, "learning_rate": 2.4578574526528844e-05, "loss": 0.2835, "step": 5667, "teacher_loss": 0.2569735050201416 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 1.3497745990753174, "learning_rate": 2.458291166690762e-05, "loss": 0.408, "step": 5668, "teacher_loss": 0.3033583164215088 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.2913971543312073, "learning_rate": 2.45872488072864e-05, "loss": 0.2414, "step": 5669, "teacher_loss": 0.23583179712295532 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.7599729299545288, "learning_rate": 2.4591585947665173e-05, "loss": 0.2996, "step": 5670, "teacher_loss": 0.2484346181154251 }, { "compression_loss": 0.0, "epoch": 1.02, "label_loss": 0.3934428095817566, "learning_rate": 2.4595923088043948e-05, "loss": 0.1714, "step": 5671, "teacher_loss": 0.1467587947845459 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.3594421446323395, "learning_rate": 2.4600260228422725e-05, "loss": 0.215, "step": 5672, "teacher_loss": 0.1989109218120575 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.36442431807518005, "learning_rate": 2.4604597368801503e-05, "loss": 0.2087, "step": 5673, "teacher_loss": 0.19134564697742462 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.3963119387626648, "learning_rate": 2.460893450918028e-05, "loss": 0.2275, "step": 5674, "teacher_loss": 0.208769753575325 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.4182215929031372, "learning_rate": 2.4613271649559058e-05, "loss": 0.3111, "step": 5675, "teacher_loss": 0.2992406189441681 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.4566314220428467, "learning_rate": 2.4617608789937836e-05, "loss": 0.2726, "step": 5676, "teacher_loss": 0.2522006630897522 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 1.3192243576049805, "learning_rate": 2.4621945930316613e-05, "loss": 0.6681, "step": 5677, "teacher_loss": 0.5957131385803223 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.4689609408378601, "learning_rate": 2.462628307069539e-05, "loss": 0.2346, "step": 5678, "teacher_loss": 0.20852145552635193 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.4360184073448181, "learning_rate": 2.4630620211074165e-05, "loss": 0.2814, "step": 5679, "teacher_loss": 0.26425686478614807 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.38606777787208557, "learning_rate": 2.4634957351452943e-05, "loss": 0.216, "step": 5680, "teacher_loss": 0.19708159565925598 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.2852972149848938, "learning_rate": 2.4639294491831717e-05, "loss": 0.2439, "step": 5681, "teacher_loss": 0.23929661512374878 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.2817647457122803, "learning_rate": 2.4643631632210495e-05, "loss": 0.1758, "step": 5682, "teacher_loss": 0.16400158405303955 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.6942086219787598, "learning_rate": 2.4647968772589272e-05, "loss": 0.6155, "step": 5683, "teacher_loss": 0.6067664623260498 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.4805641770362854, "learning_rate": 2.465230591296805e-05, "loss": 0.2766, "step": 5684, "teacher_loss": 0.2539036273956299 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.3763279914855957, "learning_rate": 2.4656643053346828e-05, "loss": 0.2064, "step": 5685, "teacher_loss": 0.18757084012031555 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.35183706879615784, "learning_rate": 2.4660980193725605e-05, "loss": 0.1977, "step": 5686, "teacher_loss": 0.1805717945098877 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.1790233552455902, "learning_rate": 2.4665317334104383e-05, "loss": 0.1731, "step": 5687, "teacher_loss": 0.17249321937561035 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.4301919937133789, "learning_rate": 2.4669654474483157e-05, "loss": 0.2651, "step": 5688, "teacher_loss": 0.2467479407787323 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.3238109350204468, "learning_rate": 2.4673991614861935e-05, "loss": 0.1968, "step": 5689, "teacher_loss": 0.18274295330047607 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.5029712319374084, "learning_rate": 2.4678328755240712e-05, "loss": 0.2557, "step": 5690, "teacher_loss": 0.22824493050575256 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.26752063632011414, "learning_rate": 2.468266589561949e-05, "loss": 0.4595, "step": 5691, "teacher_loss": 0.4807853102684021 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.6211721301078796, "learning_rate": 2.4687003035998264e-05, "loss": 0.2288, "step": 5692, "teacher_loss": 0.1851685643196106 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.5095622539520264, "learning_rate": 2.4691340176377042e-05, "loss": 0.2136, "step": 5693, "teacher_loss": 0.1807289570569992 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.15095245838165283, "learning_rate": 2.469567731675582e-05, "loss": 0.1858, "step": 5694, "teacher_loss": 0.1896858662366867 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.3545226752758026, "learning_rate": 2.4700014457134597e-05, "loss": 0.2592, "step": 5695, "teacher_loss": 0.24865896999835968 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.28454339504241943, "learning_rate": 2.4704351597513375e-05, "loss": 0.2664, "step": 5696, "teacher_loss": 0.2644324004650116 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.1250162273645401, "learning_rate": 2.470868873789215e-05, "loss": 0.1747, "step": 5697, "teacher_loss": 0.18018421530723572 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.5807621479034424, "learning_rate": 2.4713025878270926e-05, "loss": 0.2632, "step": 5698, "teacher_loss": 0.22794674336910248 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.4761284291744232, "learning_rate": 2.4717363018649704e-05, "loss": 0.2414, "step": 5699, "teacher_loss": 0.21528282761573792 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.6427435278892517, "learning_rate": 2.4721700159028482e-05, "loss": 0.2438, "step": 5700, "teacher_loss": 0.19942112267017365 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.40813300013542175, "learning_rate": 2.472603729940726e-05, "loss": 0.2141, "step": 5701, "teacher_loss": 0.1925884187221527 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.17408955097198486, "learning_rate": 2.4730374439786037e-05, "loss": 0.2154, "step": 5702, "teacher_loss": 0.2200450301170349 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.279409795999527, "learning_rate": 2.473471158016481e-05, "loss": 0.1698, "step": 5703, "teacher_loss": 0.15761703252792358 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.5663573145866394, "learning_rate": 2.473904872054359e-05, "loss": 0.2847, "step": 5704, "teacher_loss": 0.2534576654434204 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.39080384373664856, "learning_rate": 2.4743385860922366e-05, "loss": 0.2409, "step": 5705, "teacher_loss": 0.2242712676525116 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.33358481526374817, "learning_rate": 2.474772300130114e-05, "loss": 0.2619, "step": 5706, "teacher_loss": 0.253900408744812 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.8228200674057007, "learning_rate": 2.4752060141679918e-05, "loss": 0.2874, "step": 5707, "teacher_loss": 0.22791078686714172 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.34778741002082825, "learning_rate": 2.4756397282058696e-05, "loss": 0.2924, "step": 5708, "teacher_loss": 0.2862427234649658 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.6013121008872986, "learning_rate": 2.4760734422437474e-05, "loss": 0.3065, "step": 5709, "teacher_loss": 0.27379727363586426 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.8529046773910522, "learning_rate": 2.476507156281625e-05, "loss": 0.4739, "step": 5710, "teacher_loss": 0.4317663908004761 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.37333786487579346, "learning_rate": 2.476940870319503e-05, "loss": 0.263, "step": 5711, "teacher_loss": 0.2507673501968384 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.33566492795944214, "learning_rate": 2.4773745843573806e-05, "loss": 0.198, "step": 5712, "teacher_loss": 0.1826917976140976 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.614032506942749, "learning_rate": 2.4778082983952584e-05, "loss": 0.3004, "step": 5713, "teacher_loss": 0.26554590463638306 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.7869497537612915, "learning_rate": 2.4782420124331358e-05, "loss": 0.3549, "step": 5714, "teacher_loss": 0.3068962097167969 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.19687432050704956, "learning_rate": 2.4786757264710133e-05, "loss": 0.1788, "step": 5715, "teacher_loss": 0.17675325274467468 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.5800275802612305, "learning_rate": 2.479109440508891e-05, "loss": 0.3885, "step": 5716, "teacher_loss": 0.36716651916503906 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.5742631554603577, "learning_rate": 2.4795431545467688e-05, "loss": 0.315, "step": 5717, "teacher_loss": 0.28618019819259644 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.5100072026252747, "learning_rate": 2.4799768685846465e-05, "loss": 0.2694, "step": 5718, "teacher_loss": 0.24268606305122375 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.5326964855194092, "learning_rate": 2.4804105826225243e-05, "loss": 0.2802, "step": 5719, "teacher_loss": 0.2521963119506836 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.1128014326095581, "learning_rate": 2.480844296660402e-05, "loss": 0.1637, "step": 5720, "teacher_loss": 0.16936154663562775 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.35093826055526733, "learning_rate": 2.4812780106982798e-05, "loss": 0.3122, "step": 5721, "teacher_loss": 0.30794641375541687 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.21749761700630188, "learning_rate": 2.4817117247361576e-05, "loss": 0.2036, "step": 5722, "teacher_loss": 0.20202505588531494 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.12070222198963165, "learning_rate": 2.4821454387740353e-05, "loss": 0.1463, "step": 5723, "teacher_loss": 0.14916247129440308 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.15647193789482117, "learning_rate": 2.4825791528119128e-05, "loss": 0.1674, "step": 5724, "teacher_loss": 0.1686602532863617 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.5443297624588013, "learning_rate": 2.4830128668497902e-05, "loss": 0.3033, "step": 5725, "teacher_loss": 0.27650344371795654 }, { "compression_loss": 0.0, "epoch": 1.03, "label_loss": 0.31578707695007324, "learning_rate": 2.483446580887668e-05, "loss": 0.1934, "step": 5726, "teacher_loss": 0.1797824501991272 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.3873903155326843, "learning_rate": 2.4838802949255457e-05, "loss": 0.2784, "step": 5727, "teacher_loss": 0.2663387954235077 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.3121715784072876, "learning_rate": 2.4843140089634235e-05, "loss": 0.2797, "step": 5728, "teacher_loss": 0.27609914541244507 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.4188960790634155, "learning_rate": 2.4847477230013012e-05, "loss": 0.3351, "step": 5729, "teacher_loss": 0.3258160352706909 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.5170630216598511, "learning_rate": 2.485181437039179e-05, "loss": 0.2877, "step": 5730, "teacher_loss": 0.262221097946167 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.4651789367198944, "learning_rate": 2.4856151510770568e-05, "loss": 0.2017, "step": 5731, "teacher_loss": 0.17238225042819977 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.271403968334198, "learning_rate": 2.4860488651149342e-05, "loss": 0.273, "step": 5732, "teacher_loss": 0.2731597125530243 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.44333982467651367, "learning_rate": 2.486482579152812e-05, "loss": 0.274, "step": 5733, "teacher_loss": 0.2551373839378357 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.3212038278579712, "learning_rate": 2.4869162931906897e-05, "loss": 0.2446, "step": 5734, "teacher_loss": 0.23614084720611572 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.5660072565078735, "learning_rate": 2.4873500072285675e-05, "loss": 0.2539, "step": 5735, "teacher_loss": 0.21925979852676392 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.7995697855949402, "learning_rate": 2.487783721266445e-05, "loss": 0.3267, "step": 5736, "teacher_loss": 0.2742080092430115 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.3201525807380676, "learning_rate": 2.4882174353043227e-05, "loss": 0.2086, "step": 5737, "teacher_loss": 0.19622795283794403 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.34195366501808167, "learning_rate": 2.4886511493422004e-05, "loss": 0.2218, "step": 5738, "teacher_loss": 0.20847046375274658 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 1.2613153457641602, "learning_rate": 2.4890848633800782e-05, "loss": 0.335, "step": 5739, "teacher_loss": 0.23206044733524323 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.4662017524242401, "learning_rate": 2.489518577417956e-05, "loss": 0.2837, "step": 5740, "teacher_loss": 0.263388067483902 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.4382407069206238, "learning_rate": 2.4899522914558334e-05, "loss": 0.2052, "step": 5741, "teacher_loss": 0.1792682707309723 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.6544212102890015, "learning_rate": 2.490386005493711e-05, "loss": 0.4402, "step": 5742, "teacher_loss": 0.41640496253967285 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.5321322083473206, "learning_rate": 2.490819719531589e-05, "loss": 0.2177, "step": 5743, "teacher_loss": 0.18279704451560974 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.4202308654785156, "learning_rate": 2.4912534335694667e-05, "loss": 0.3667, "step": 5744, "teacher_loss": 0.3607712984085083 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.6600340008735657, "learning_rate": 2.4916871476073444e-05, "loss": 0.3302, "step": 5745, "teacher_loss": 0.29359379410743713 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.5137569904327393, "learning_rate": 2.492120861645222e-05, "loss": 0.1858, "step": 5746, "teacher_loss": 0.14932073652744293 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.7411439418792725, "learning_rate": 2.4925545756830996e-05, "loss": 0.2865, "step": 5747, "teacher_loss": 0.23600199818611145 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.32975006103515625, "learning_rate": 2.4929882897209774e-05, "loss": 0.2187, "step": 5748, "teacher_loss": 0.20637497305870056 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.2602580487728119, "learning_rate": 2.493422003758855e-05, "loss": 0.2835, "step": 5749, "teacher_loss": 0.28605425357818604 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.17854523658752441, "learning_rate": 2.4938557177967326e-05, "loss": 0.1978, "step": 5750, "teacher_loss": 0.19992607831954956 }, { "epoch": 1.04, "eval_exact_match": 79.28098391674551, "eval_f1": 86.92138248670703, "step": 5750 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.7560309171676636, "learning_rate": 2.4942894318346103e-05, "loss": 0.3831, "step": 5751, "teacher_loss": 0.34162116050720215 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.386283814907074, "learning_rate": 2.494723145872488e-05, "loss": 0.3137, "step": 5752, "teacher_loss": 0.3056256175041199 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.8294663429260254, "learning_rate": 2.495156859910366e-05, "loss": 0.3401, "step": 5753, "teacher_loss": 0.2857303023338318 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.5904160141944885, "learning_rate": 2.4955905739482436e-05, "loss": 0.2127, "step": 5754, "teacher_loss": 0.17068493366241455 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.2206193208694458, "learning_rate": 2.4960242879861214e-05, "loss": 0.2853, "step": 5755, "teacher_loss": 0.29246532917022705 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.3550376892089844, "learning_rate": 2.496458002023999e-05, "loss": 0.192, "step": 5756, "teacher_loss": 0.1738990843296051 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.1980118453502655, "learning_rate": 2.4968917160618766e-05, "loss": 0.1599, "step": 5757, "teacher_loss": 0.1556473821401596 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.6552790999412537, "learning_rate": 2.4973254300997543e-05, "loss": 0.2733, "step": 5758, "teacher_loss": 0.23090949654579163 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.39710724353790283, "learning_rate": 2.4977591441376317e-05, "loss": 0.2731, "step": 5759, "teacher_loss": 0.259337842464447 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.5608937740325928, "learning_rate": 2.4981928581755095e-05, "loss": 0.3201, "step": 5760, "teacher_loss": 0.29334133863449097 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.36709702014923096, "learning_rate": 2.4986265722133873e-05, "loss": 0.2884, "step": 5761, "teacher_loss": 0.2796553671360016 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.5096365213394165, "learning_rate": 2.499060286251265e-05, "loss": 0.326, "step": 5762, "teacher_loss": 0.3056340515613556 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.6187939643859863, "learning_rate": 2.4994940002891428e-05, "loss": 0.2263, "step": 5763, "teacher_loss": 0.18266212940216064 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.472749799489975, "learning_rate": 2.4999277143270205e-05, "loss": 0.2872, "step": 5764, "teacher_loss": 0.2665541470050812 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.14188289642333984, "learning_rate": 2.5003614283648983e-05, "loss": 0.1839, "step": 5765, "teacher_loss": 0.18857935070991516 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.8677908182144165, "learning_rate": 2.500795142402776e-05, "loss": 0.4434, "step": 5766, "teacher_loss": 0.39629605412483215 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.33384984731674194, "learning_rate": 2.501228856440654e-05, "loss": 0.27, "step": 5767, "teacher_loss": 0.26286375522613525 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.3554983139038086, "learning_rate": 2.501662570478531e-05, "loss": 0.1878, "step": 5768, "teacher_loss": 0.1691453605890274 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.46972525119781494, "learning_rate": 2.5020962845164087e-05, "loss": 0.2699, "step": 5769, "teacher_loss": 0.2476898580789566 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.43315064907073975, "learning_rate": 2.5025299985542864e-05, "loss": 0.2426, "step": 5770, "teacher_loss": 0.2214576005935669 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.4194457530975342, "learning_rate": 2.5029637125921642e-05, "loss": 0.2931, "step": 5771, "teacher_loss": 0.2790180444717407 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.18961502611637115, "learning_rate": 2.503397426630042e-05, "loss": 0.1546, "step": 5772, "teacher_loss": 0.15073883533477783 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.8729780912399292, "learning_rate": 2.5038311406679197e-05, "loss": 0.3194, "step": 5773, "teacher_loss": 0.2578945755958557 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.45669692754745483, "learning_rate": 2.5042648547057975e-05, "loss": 0.3062, "step": 5774, "teacher_loss": 0.2894551157951355 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.32803791761398315, "learning_rate": 2.5046985687436753e-05, "loss": 0.2683, "step": 5775, "teacher_loss": 0.2616109251976013 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.6847606897354126, "learning_rate": 2.5051322827815527e-05, "loss": 0.3203, "step": 5776, "teacher_loss": 0.2797488570213318 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.7289848327636719, "learning_rate": 2.5055659968194304e-05, "loss": 0.3537, "step": 5777, "teacher_loss": 0.3120438754558563 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.4425666630268097, "learning_rate": 2.5059997108573082e-05, "loss": 0.3525, "step": 5778, "teacher_loss": 0.342507541179657 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.643241822719574, "learning_rate": 2.5064334248951856e-05, "loss": 0.285, "step": 5779, "teacher_loss": 0.24525006115436554 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.47755759954452515, "learning_rate": 2.5068671389330634e-05, "loss": 0.3808, "step": 5780, "teacher_loss": 0.37009796500205994 }, { "compression_loss": 0.0, "epoch": 1.04, "label_loss": 0.3420846462249756, "learning_rate": 2.507300852970941e-05, "loss": 0.1777, "step": 5781, "teacher_loss": 0.15946078300476074 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.20883983373641968, "learning_rate": 2.507734567008819e-05, "loss": 0.2447, "step": 5782, "teacher_loss": 0.24867497384548187 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.5636640787124634, "learning_rate": 2.5081682810466967e-05, "loss": 0.2474, "step": 5783, "teacher_loss": 0.21228128671646118 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.3770233988761902, "learning_rate": 2.5086019950845744e-05, "loss": 0.2283, "step": 5784, "teacher_loss": 0.21182557940483093 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.4271715581417084, "learning_rate": 2.509035709122452e-05, "loss": 0.2639, "step": 5785, "teacher_loss": 0.24580763280391693 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.5708246231079102, "learning_rate": 2.5094694231603296e-05, "loss": 0.3439, "step": 5786, "teacher_loss": 0.31866660714149475 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.2131984531879425, "learning_rate": 2.5099031371982074e-05, "loss": 0.191, "step": 5787, "teacher_loss": 0.18852703273296356 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.7692008018493652, "learning_rate": 2.510336851236085e-05, "loss": 0.2798, "step": 5788, "teacher_loss": 0.22544288635253906 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.7904120683670044, "learning_rate": 2.510770565273963e-05, "loss": 0.4143, "step": 5789, "teacher_loss": 0.37247389554977417 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.35019925236701965, "learning_rate": 2.5112042793118403e-05, "loss": 0.2063, "step": 5790, "teacher_loss": 0.1903364360332489 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.1626065969467163, "learning_rate": 2.511637993349718e-05, "loss": 0.174, "step": 5791, "teacher_loss": 0.175270214676857 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.4408935606479645, "learning_rate": 2.512071707387596e-05, "loss": 0.2522, "step": 5792, "teacher_loss": 0.23125328123569489 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.564699113368988, "learning_rate": 2.5125054214254736e-05, "loss": 0.2785, "step": 5793, "teacher_loss": 0.24667063355445862 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.22682073712348938, "learning_rate": 2.512939135463351e-05, "loss": 0.2326, "step": 5794, "teacher_loss": 0.2332293838262558 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.7990626096725464, "learning_rate": 2.5133728495012288e-05, "loss": 0.3527, "step": 5795, "teacher_loss": 0.3031332492828369 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.23610055446624756, "learning_rate": 2.5138065635391066e-05, "loss": 0.21, "step": 5796, "teacher_loss": 0.20705491304397583 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.6262083053588867, "learning_rate": 2.5142402775769843e-05, "loss": 0.4034, "step": 5797, "teacher_loss": 0.3786849081516266 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.32296305894851685, "learning_rate": 2.514673991614862e-05, "loss": 0.2141, "step": 5798, "teacher_loss": 0.2019776850938797 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 1.28407883644104, "learning_rate": 2.51510770565274e-05, "loss": 0.3252, "step": 5799, "teacher_loss": 0.21862280368804932 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.6629778742790222, "learning_rate": 2.5155414196906176e-05, "loss": 0.3525, "step": 5800, "teacher_loss": 0.3180267810821533 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.5553765892982483, "learning_rate": 2.515975133728495e-05, "loss": 0.2279, "step": 5801, "teacher_loss": 0.19147717952728271 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.46481117606163025, "learning_rate": 2.5164088477663728e-05, "loss": 0.2298, "step": 5802, "teacher_loss": 0.20370006561279297 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.4750494062900543, "learning_rate": 2.5168425618042502e-05, "loss": 0.2226, "step": 5803, "teacher_loss": 0.19451621174812317 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.4932330846786499, "learning_rate": 2.517276275842128e-05, "loss": 0.2059, "step": 5804, "teacher_loss": 0.17402797937393188 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.44331634044647217, "learning_rate": 2.5177099898800058e-05, "loss": 0.3422, "step": 5805, "teacher_loss": 0.33091285824775696 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.583695650100708, "learning_rate": 2.5181437039178835e-05, "loss": 0.288, "step": 5806, "teacher_loss": 0.2551591396331787 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.5548825263977051, "learning_rate": 2.5185774179557613e-05, "loss": 0.3794, "step": 5807, "teacher_loss": 0.35985425114631653 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.2901937961578369, "learning_rate": 2.519011131993639e-05, "loss": 0.3196, "step": 5808, "teacher_loss": 0.3228548765182495 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.3566325008869171, "learning_rate": 2.5194448460315168e-05, "loss": 0.2287, "step": 5809, "teacher_loss": 0.2145385593175888 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.1724443882703781, "learning_rate": 2.5198785600693946e-05, "loss": 0.1839, "step": 5810, "teacher_loss": 0.18521729111671448 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.2794632911682129, "learning_rate": 2.5203122741072723e-05, "loss": 0.2557, "step": 5811, "teacher_loss": 0.25300610065460205 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.3794358968734741, "learning_rate": 2.5207459881451494e-05, "loss": 0.2963, "step": 5812, "teacher_loss": 0.2870127558708191 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.442825049161911, "learning_rate": 2.5211797021830272e-05, "loss": 0.2058, "step": 5813, "teacher_loss": 0.17951908707618713 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.27354946732521057, "learning_rate": 2.521613416220905e-05, "loss": 0.2554, "step": 5814, "teacher_loss": 0.2534312903881073 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.44062986969947815, "learning_rate": 2.5220471302587827e-05, "loss": 0.2418, "step": 5815, "teacher_loss": 0.21971175074577332 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.6296927332878113, "learning_rate": 2.5224808442966605e-05, "loss": 0.5582, "step": 5816, "teacher_loss": 0.5502831935882568 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.29670774936676025, "learning_rate": 2.5229145583345382e-05, "loss": 0.2544, "step": 5817, "teacher_loss": 0.24974079430103302 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.4668574333190918, "learning_rate": 2.523348272372416e-05, "loss": 0.2763, "step": 5818, "teacher_loss": 0.25517359375953674 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.28109365701675415, "learning_rate": 2.5237819864102937e-05, "loss": 0.2226, "step": 5819, "teacher_loss": 0.21614226698875427 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.6486839056015015, "learning_rate": 2.524215700448171e-05, "loss": 0.2723, "step": 5820, "teacher_loss": 0.2304602712392807 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.3750072121620178, "learning_rate": 2.524649414486049e-05, "loss": 0.2947, "step": 5821, "teacher_loss": 0.28574198484420776 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.41910210251808167, "learning_rate": 2.5250831285239267e-05, "loss": 0.2904, "step": 5822, "teacher_loss": 0.2760849893093109 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.7422313690185547, "learning_rate": 2.525516842561804e-05, "loss": 0.3744, "step": 5823, "teacher_loss": 0.3335673213005066 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.5846238732337952, "learning_rate": 2.525950556599682e-05, "loss": 0.2464, "step": 5824, "teacher_loss": 0.20882129669189453 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.5884277820587158, "learning_rate": 2.5263842706375596e-05, "loss": 0.2111, "step": 5825, "teacher_loss": 0.1692180186510086 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.6069173812866211, "learning_rate": 2.5268179846754374e-05, "loss": 0.2826, "step": 5826, "teacher_loss": 0.2466059923171997 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.3188580870628357, "learning_rate": 2.527251698713315e-05, "loss": 0.2104, "step": 5827, "teacher_loss": 0.1984032392501831 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.5554224252700806, "learning_rate": 2.527685412751193e-05, "loss": 0.4018, "step": 5828, "teacher_loss": 0.3847217559814453 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.702427864074707, "learning_rate": 2.5281191267890703e-05, "loss": 0.2083, "step": 5829, "teacher_loss": 0.15334858000278473 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.31164172291755676, "learning_rate": 2.528552840826948e-05, "loss": 0.2783, "step": 5830, "teacher_loss": 0.27454712986946106 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.4595423936843872, "learning_rate": 2.528986554864826e-05, "loss": 0.2429, "step": 5831, "teacher_loss": 0.21886922419071198 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.8257927298545837, "learning_rate": 2.5294202689027036e-05, "loss": 0.2968, "step": 5832, "teacher_loss": 0.23804402351379395 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.5439439415931702, "learning_rate": 2.5298539829405814e-05, "loss": 0.3352, "step": 5833, "teacher_loss": 0.31195998191833496 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.35350432991981506, "learning_rate": 2.5302876969784588e-05, "loss": 0.2859, "step": 5834, "teacher_loss": 0.2783682346343994 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.422508180141449, "learning_rate": 2.5307214110163366e-05, "loss": 0.2366, "step": 5835, "teacher_loss": 0.21598462760448456 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.641747236251831, "learning_rate": 2.5311551250542143e-05, "loss": 0.4979, "step": 5836, "teacher_loss": 0.4819698929786682 }, { "compression_loss": 0.0, "epoch": 1.05, "label_loss": 0.805057168006897, "learning_rate": 2.531588839092092e-05, "loss": 0.4214, "step": 5837, "teacher_loss": 0.378801167011261 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.34252700209617615, "learning_rate": 2.5320225531299695e-05, "loss": 0.2755, "step": 5838, "teacher_loss": 0.2680216431617737 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.205826535820961, "learning_rate": 2.5324562671678473e-05, "loss": 0.1454, "step": 5839, "teacher_loss": 0.13867482542991638 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.5726264119148254, "learning_rate": 2.532889981205725e-05, "loss": 0.3103, "step": 5840, "teacher_loss": 0.2811729907989502 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.2583940625190735, "learning_rate": 2.5333236952436028e-05, "loss": 0.1791, "step": 5841, "teacher_loss": 0.1702657788991928 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.5586618185043335, "learning_rate": 2.5337574092814806e-05, "loss": 0.3026, "step": 5842, "teacher_loss": 0.2741256654262543 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.471606582403183, "learning_rate": 2.5341911233193583e-05, "loss": 0.2436, "step": 5843, "teacher_loss": 0.21825124323368073 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.30049288272857666, "learning_rate": 2.534624837357236e-05, "loss": 0.249, "step": 5844, "teacher_loss": 0.24324896931648254 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.3186829686164856, "learning_rate": 2.5350585513951135e-05, "loss": 0.2055, "step": 5845, "teacher_loss": 0.19293229281902313 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.7967687845230103, "learning_rate": 2.5354922654329913e-05, "loss": 0.3799, "step": 5846, "teacher_loss": 0.3335472643375397 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.7419020533561707, "learning_rate": 2.5359259794708687e-05, "loss": 0.3289, "step": 5847, "teacher_loss": 0.2830204665660858 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.36382153630256653, "learning_rate": 2.5363596935087465e-05, "loss": 0.2382, "step": 5848, "teacher_loss": 0.2242489457130432 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.32528695464134216, "learning_rate": 2.5367934075466242e-05, "loss": 0.4005, "step": 5849, "teacher_loss": 0.40881454944610596 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.19113288819789886, "learning_rate": 2.537227121584502e-05, "loss": 0.2025, "step": 5850, "teacher_loss": 0.20375216007232666 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 1.136780023574829, "learning_rate": 2.5376608356223798e-05, "loss": 0.323, "step": 5851, "teacher_loss": 0.23253172636032104 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.4320363998413086, "learning_rate": 2.5380945496602575e-05, "loss": 0.278, "step": 5852, "teacher_loss": 0.2608439326286316 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.42010578513145447, "learning_rate": 2.5385282636981353e-05, "loss": 0.2139, "step": 5853, "teacher_loss": 0.19101954996585846 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.1719742715358734, "learning_rate": 2.538961977736013e-05, "loss": 0.1616, "step": 5854, "teacher_loss": 0.16040191054344177 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.3774372637271881, "learning_rate": 2.5393956917738905e-05, "loss": 0.2081, "step": 5855, "teacher_loss": 0.18931914865970612 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.6080065369606018, "learning_rate": 2.539829405811768e-05, "loss": 0.2938, "step": 5856, "teacher_loss": 0.2588987648487091 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.41710320115089417, "learning_rate": 2.5402631198496457e-05, "loss": 0.2777, "step": 5857, "teacher_loss": 0.2621798813343048 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.9423816800117493, "learning_rate": 2.5406968338875234e-05, "loss": 0.524, "step": 5858, "teacher_loss": 0.4775207042694092 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.3307334780693054, "learning_rate": 2.5411305479254012e-05, "loss": 0.3065, "step": 5859, "teacher_loss": 0.3037795424461365 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.48244673013687134, "learning_rate": 2.541564261963279e-05, "loss": 0.2829, "step": 5860, "teacher_loss": 0.26077666878700256 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.49170243740081787, "learning_rate": 2.5419979760011567e-05, "loss": 0.3322, "step": 5861, "teacher_loss": 0.31453025341033936 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.6568790674209595, "learning_rate": 2.5424316900390345e-05, "loss": 0.264, "step": 5862, "teacher_loss": 0.2203400433063507 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.41657501459121704, "learning_rate": 2.5428654040769122e-05, "loss": 0.2149, "step": 5863, "teacher_loss": 0.19251692295074463 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.7967163324356079, "learning_rate": 2.54329911811479e-05, "loss": 0.3457, "step": 5864, "teacher_loss": 0.29555270075798035 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.19436734914779663, "learning_rate": 2.5437328321526674e-05, "loss": 0.2112, "step": 5865, "teacher_loss": 0.21310940384864807 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.6193044781684875, "learning_rate": 2.544166546190545e-05, "loss": 0.236, "step": 5866, "teacher_loss": 0.19336827099323273 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.6635338068008423, "learning_rate": 2.5446002602284226e-05, "loss": 0.3074, "step": 5867, "teacher_loss": 0.26778626441955566 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.4893947243690491, "learning_rate": 2.5450339742663004e-05, "loss": 0.209, "step": 5868, "teacher_loss": 0.17782796919345856 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.317904531955719, "learning_rate": 2.545467688304178e-05, "loss": 0.2147, "step": 5869, "teacher_loss": 0.20328830182552338 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.30255621671676636, "learning_rate": 2.545901402342056e-05, "loss": 0.2919, "step": 5870, "teacher_loss": 0.2906605899333954 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.5360873937606812, "learning_rate": 2.5463351163799337e-05, "loss": 0.4032, "step": 5871, "teacher_loss": 0.3884846270084381 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.4596640467643738, "learning_rate": 2.5467688304178114e-05, "loss": 0.1922, "step": 5872, "teacher_loss": 0.16249053180217743 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.7049658298492432, "learning_rate": 2.547202544455689e-05, "loss": 0.3276, "step": 5873, "teacher_loss": 0.2856695055961609 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.12569168210029602, "learning_rate": 2.5476362584935666e-05, "loss": 0.2239, "step": 5874, "teacher_loss": 0.23479053378105164 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.36887308955192566, "learning_rate": 2.5480699725314444e-05, "loss": 0.2689, "step": 5875, "teacher_loss": 0.257793128490448 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.5283715128898621, "learning_rate": 2.548503686569322e-05, "loss": 0.3153, "step": 5876, "teacher_loss": 0.2916357219219208 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.7359654903411865, "learning_rate": 2.5489374006071995e-05, "loss": 0.2732, "step": 5877, "teacher_loss": 0.2217286378145218 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 1.105780005455017, "learning_rate": 2.5493711146450773e-05, "loss": 0.3918, "step": 5878, "teacher_loss": 0.3124650716781616 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.280916303396225, "learning_rate": 2.549804828682955e-05, "loss": 0.2337, "step": 5879, "teacher_loss": 0.22843077778816223 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.15347811579704285, "learning_rate": 2.550238542720833e-05, "loss": 0.1708, "step": 5880, "teacher_loss": 0.172685444355011 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.637710690498352, "learning_rate": 2.5506722567587106e-05, "loss": 0.2786, "step": 5881, "teacher_loss": 0.23868566751480103 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 1.009932041168213, "learning_rate": 2.551105970796588e-05, "loss": 0.3413, "step": 5882, "teacher_loss": 0.26702213287353516 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.42496317625045776, "learning_rate": 2.5515396848344658e-05, "loss": 0.2825, "step": 5883, "teacher_loss": 0.2666654586791992 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.5348008275032043, "learning_rate": 2.5519733988723435e-05, "loss": 0.3021, "step": 5884, "teacher_loss": 0.27626293897628784 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.2823844850063324, "learning_rate": 2.5524071129102213e-05, "loss": 0.2508, "step": 5885, "teacher_loss": 0.2472800314426422 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 1.1398496627807617, "learning_rate": 2.552840826948099e-05, "loss": 0.3568, "step": 5886, "teacher_loss": 0.2697896957397461 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.27828001976013184, "learning_rate": 2.5532745409859768e-05, "loss": 0.2207, "step": 5887, "teacher_loss": 0.21430817246437073 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.28940826654434204, "learning_rate": 2.5537082550238543e-05, "loss": 0.2113, "step": 5888, "teacher_loss": 0.20263460278511047 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.31656792759895325, "learning_rate": 2.554141969061732e-05, "loss": 0.2262, "step": 5889, "teacher_loss": 0.2161426842212677 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.5206784605979919, "learning_rate": 2.5545756830996098e-05, "loss": 0.3178, "step": 5890, "teacher_loss": 0.2952684760093689 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.48077425360679626, "learning_rate": 2.5550093971374872e-05, "loss": 0.2698, "step": 5891, "teacher_loss": 0.246351957321167 }, { "compression_loss": 0.0, "epoch": 1.06, "label_loss": 0.27060964703559875, "learning_rate": 2.555443111175365e-05, "loss": 0.193, "step": 5892, "teacher_loss": 0.18432685732841492 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.4760136604309082, "learning_rate": 2.5558768252132427e-05, "loss": 0.2941, "step": 5893, "teacher_loss": 0.27389171719551086 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.3370354175567627, "learning_rate": 2.5563105392511205e-05, "loss": 0.3231, "step": 5894, "teacher_loss": 0.32158249616622925 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.22372743487358093, "learning_rate": 2.5567442532889983e-05, "loss": 0.2038, "step": 5895, "teacher_loss": 0.20161354541778564 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.570643424987793, "learning_rate": 2.557177967326876e-05, "loss": 0.2991, "step": 5896, "teacher_loss": 0.26888638734817505 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.6290431022644043, "learning_rate": 2.5576116813647538e-05, "loss": 0.2893, "step": 5897, "teacher_loss": 0.25152814388275146 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.31510812044143677, "learning_rate": 2.5580453954026315e-05, "loss": 0.2134, "step": 5898, "teacher_loss": 0.20211388170719147 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.12859070301055908, "learning_rate": 2.558479109440509e-05, "loss": 0.2, "step": 5899, "teacher_loss": 0.20790529251098633 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.3454161584377289, "learning_rate": 2.5589128234783864e-05, "loss": 0.2044, "step": 5900, "teacher_loss": 0.18876144289970398 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.4632948040962219, "learning_rate": 2.559346537516264e-05, "loss": 0.2802, "step": 5901, "teacher_loss": 0.25980833172798157 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.5297257304191589, "learning_rate": 2.559780251554142e-05, "loss": 0.2383, "step": 5902, "teacher_loss": 0.20586737990379333 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.7255809307098389, "learning_rate": 2.5602139655920197e-05, "loss": 0.3785, "step": 5903, "teacher_loss": 0.3399866223335266 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.4395931363105774, "learning_rate": 2.5606476796298974e-05, "loss": 0.4322, "step": 5904, "teacher_loss": 0.4313408136367798 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.4237062633037567, "learning_rate": 2.5610813936677752e-05, "loss": 0.2627, "step": 5905, "teacher_loss": 0.24483072757720947 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.27004578709602356, "learning_rate": 2.561515107705653e-05, "loss": 0.2218, "step": 5906, "teacher_loss": 0.21646207571029663 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.7203580141067505, "learning_rate": 2.5619488217435307e-05, "loss": 0.5771, "step": 5907, "teacher_loss": 0.5611433982849121 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.34088563919067383, "learning_rate": 2.5623825357814085e-05, "loss": 0.2054, "step": 5908, "teacher_loss": 0.19036734104156494 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.6013332605361938, "learning_rate": 2.562816249819286e-05, "loss": 0.3419, "step": 5909, "teacher_loss": 0.3130248486995697 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.5472279787063599, "learning_rate": 2.5632499638571633e-05, "loss": 0.3055, "step": 5910, "teacher_loss": 0.2786266803741455 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.4141944646835327, "learning_rate": 2.563683677895041e-05, "loss": 0.2094, "step": 5911, "teacher_loss": 0.18663941323757172 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.3748759329319, "learning_rate": 2.564117391932919e-05, "loss": 0.1788, "step": 5912, "teacher_loss": 0.15697060525417328 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.3228701949119568, "learning_rate": 2.5645511059707966e-05, "loss": 0.2886, "step": 5913, "teacher_loss": 0.2848265469074249 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.6506280899047852, "learning_rate": 2.5649848200086744e-05, "loss": 0.4176, "step": 5914, "teacher_loss": 0.3917348086833954 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.456601083278656, "learning_rate": 2.565418534046552e-05, "loss": 0.2415, "step": 5915, "teacher_loss": 0.21759629249572754 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.21757668256759644, "learning_rate": 2.56585224808443e-05, "loss": 0.2023, "step": 5916, "teacher_loss": 0.2006537914276123 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.48809731006622314, "learning_rate": 2.5662859621223073e-05, "loss": 0.3167, "step": 5917, "teacher_loss": 0.29768240451812744 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.5608499050140381, "learning_rate": 2.566719676160185e-05, "loss": 0.2747, "step": 5918, "teacher_loss": 0.24287322163581848 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.3928006887435913, "learning_rate": 2.567153390198063e-05, "loss": 0.2987, "step": 5919, "teacher_loss": 0.28828299045562744 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.32989317178726196, "learning_rate": 2.5675871042359406e-05, "loss": 0.2181, "step": 5920, "teacher_loss": 0.20568659901618958 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.6060162782669067, "learning_rate": 2.568020818273818e-05, "loss": 0.3946, "step": 5921, "teacher_loss": 0.37109583616256714 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.5648013353347778, "learning_rate": 2.5684545323116958e-05, "loss": 0.2508, "step": 5922, "teacher_loss": 0.21590884029865265 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.468291699886322, "learning_rate": 2.5688882463495736e-05, "loss": 0.2162, "step": 5923, "teacher_loss": 0.1881471574306488 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 1.1098542213439941, "learning_rate": 2.5693219603874513e-05, "loss": 0.4604, "step": 5924, "teacher_loss": 0.3882291913032532 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.2893470227718353, "learning_rate": 2.569755674425329e-05, "loss": 0.2013, "step": 5925, "teacher_loss": 0.19151625037193298 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.6373051404953003, "learning_rate": 2.5701893884632065e-05, "loss": 0.265, "step": 5926, "teacher_loss": 0.22358834743499756 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.25862058997154236, "learning_rate": 2.5706231025010843e-05, "loss": 0.2437, "step": 5927, "teacher_loss": 0.24203769862651825 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.33675605058670044, "learning_rate": 2.571056816538962e-05, "loss": 0.1645, "step": 5928, "teacher_loss": 0.1453983187675476 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.4254949390888214, "learning_rate": 2.5714905305768398e-05, "loss": 0.3028, "step": 5929, "teacher_loss": 0.2891344130039215 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.9150857925415039, "learning_rate": 2.5719242446147176e-05, "loss": 0.2211, "step": 5930, "teacher_loss": 0.14401228725910187 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.4399152994155884, "learning_rate": 2.5723579586525953e-05, "loss": 0.3819, "step": 5931, "teacher_loss": 0.3754241168498993 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.6309801340103149, "learning_rate": 2.5727916726904727e-05, "loss": 0.2407, "step": 5932, "teacher_loss": 0.19732235372066498 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.7707975506782532, "learning_rate": 2.5732253867283505e-05, "loss": 0.6893, "step": 5933, "teacher_loss": 0.6802951097488403 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.29874110221862793, "learning_rate": 2.5736591007662283e-05, "loss": 0.2685, "step": 5934, "teacher_loss": 0.2651068866252899 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.8050134778022766, "learning_rate": 2.5740928148041057e-05, "loss": 0.327, "step": 5935, "teacher_loss": 0.2739187777042389 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.4073670506477356, "learning_rate": 2.5745265288419835e-05, "loss": 0.2497, "step": 5936, "teacher_loss": 0.23214513063430786 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.38365501165390015, "learning_rate": 2.5749602428798612e-05, "loss": 0.1825, "step": 5937, "teacher_loss": 0.16010083258152008 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.303866982460022, "learning_rate": 2.575393956917739e-05, "loss": 0.2223, "step": 5938, "teacher_loss": 0.21328914165496826 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.2488536536693573, "learning_rate": 2.5758276709556167e-05, "loss": 0.1934, "step": 5939, "teacher_loss": 0.18718531727790833 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.2682528793811798, "learning_rate": 2.5762613849934945e-05, "loss": 0.1822, "step": 5940, "teacher_loss": 0.17265480756759644 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.22913682460784912, "learning_rate": 2.5766950990313723e-05, "loss": 0.2926, "step": 5941, "teacher_loss": 0.29960906505584717 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.7226784229278564, "learning_rate": 2.57712881306925e-05, "loss": 0.3916, "step": 5942, "teacher_loss": 0.35483598709106445 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.6695114374160767, "learning_rate": 2.5775625271071274e-05, "loss": 0.2975, "step": 5943, "teacher_loss": 0.25612255930900574 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.5774508714675903, "learning_rate": 2.577996241145005e-05, "loss": 0.2427, "step": 5944, "teacher_loss": 0.20555315911769867 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.36685413122177124, "learning_rate": 2.5784299551828826e-05, "loss": 0.2987, "step": 5945, "teacher_loss": 0.29113471508026123 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.8600008487701416, "learning_rate": 2.5788636692207604e-05, "loss": 0.7383, "step": 5946, "teacher_loss": 0.7247945666313171 }, { "compression_loss": 0.0, "epoch": 1.07, "label_loss": 0.6512706279754639, "learning_rate": 2.579297383258638e-05, "loss": 0.3005, "step": 5947, "teacher_loss": 0.2615642845630646 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.38632798194885254, "learning_rate": 2.579731097296516e-05, "loss": 0.186, "step": 5948, "teacher_loss": 0.16370287537574768 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.18378019332885742, "learning_rate": 2.5801648113343937e-05, "loss": 0.2362, "step": 5949, "teacher_loss": 0.241979718208313 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.2505667209625244, "learning_rate": 2.5805985253722714e-05, "loss": 0.227, "step": 5950, "teacher_loss": 0.22441637516021729 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.5794561505317688, "learning_rate": 2.5810322394101492e-05, "loss": 0.268, "step": 5951, "teacher_loss": 0.23337849974632263 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.21044307947158813, "learning_rate": 2.581465953448027e-05, "loss": 0.261, "step": 5952, "teacher_loss": 0.2665625512599945 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.2843470275402069, "learning_rate": 2.5818996674859044e-05, "loss": 0.2034, "step": 5953, "teacher_loss": 0.19435307383537292 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.3444425165653229, "learning_rate": 2.5823333815237818e-05, "loss": 0.2487, "step": 5954, "teacher_loss": 0.23807649314403534 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.6940368413925171, "learning_rate": 2.5827670955616596e-05, "loss": 0.3408, "step": 5955, "teacher_loss": 0.30152422189712524 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.47406646609306335, "learning_rate": 2.5832008095995373e-05, "loss": 0.2891, "step": 5956, "teacher_loss": 0.2685551643371582 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.39342159032821655, "learning_rate": 2.583634523637415e-05, "loss": 0.2497, "step": 5957, "teacher_loss": 0.23367643356323242 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 1.0082088708877563, "learning_rate": 2.584068237675293e-05, "loss": 0.3077, "step": 5958, "teacher_loss": 0.2298777997493744 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.2065315544605255, "learning_rate": 2.5845019517131706e-05, "loss": 0.2376, "step": 5959, "teacher_loss": 0.24100585281848907 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.15253156423568726, "learning_rate": 2.5849356657510484e-05, "loss": 0.1968, "step": 5960, "teacher_loss": 0.20173221826553345 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.1254030168056488, "learning_rate": 2.5853693797889258e-05, "loss": 0.2073, "step": 5961, "teacher_loss": 0.21641525626182556 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.5679243206977844, "learning_rate": 2.5858030938268036e-05, "loss": 0.2454, "step": 5962, "teacher_loss": 0.2095583826303482 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.46123939752578735, "learning_rate": 2.5862368078646813e-05, "loss": 0.298, "step": 5963, "teacher_loss": 0.2798651158809662 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.28733736276626587, "learning_rate": 2.5866705219025588e-05, "loss": 0.2206, "step": 5964, "teacher_loss": 0.21321985125541687 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.3101140856742859, "learning_rate": 2.5871042359404365e-05, "loss": 0.163, "step": 5965, "teacher_loss": 0.14664681255817413 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.2252921462059021, "learning_rate": 2.5875379499783143e-05, "loss": 0.2115, "step": 5966, "teacher_loss": 0.2099440097808838 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.6086270809173584, "learning_rate": 2.587971664016192e-05, "loss": 0.1917, "step": 5967, "teacher_loss": 0.14541780948638916 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.3337209224700928, "learning_rate": 2.5884053780540698e-05, "loss": 0.345, "step": 5968, "teacher_loss": 0.34627565741539 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.5281961560249329, "learning_rate": 2.5888390920919476e-05, "loss": 0.2711, "step": 5969, "teacher_loss": 0.2424831986427307 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.6757916212081909, "learning_rate": 2.589272806129825e-05, "loss": 0.3229, "step": 5970, "teacher_loss": 0.2836615741252899 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.5589495301246643, "learning_rate": 2.5897065201677028e-05, "loss": 0.2566, "step": 5971, "teacher_loss": 0.2230542153120041 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 1.0035693645477295, "learning_rate": 2.5901402342055805e-05, "loss": 0.3863, "step": 5972, "teacher_loss": 0.31774044036865234 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.21249422430992126, "learning_rate": 2.5905739482434583e-05, "loss": 0.1916, "step": 5973, "teacher_loss": 0.1892242431640625 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.16979198157787323, "learning_rate": 2.591007662281336e-05, "loss": 0.1955, "step": 5974, "teacher_loss": 0.19830450415611267 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.39572158455848694, "learning_rate": 2.5914413763192135e-05, "loss": 0.3735, "step": 5975, "teacher_loss": 0.37099185585975647 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.5492358207702637, "learning_rate": 2.5918750903570912e-05, "loss": 0.2311, "step": 5976, "teacher_loss": 0.1957527995109558 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.3307785093784332, "learning_rate": 2.592308804394969e-05, "loss": 0.2133, "step": 5977, "teacher_loss": 0.20025604963302612 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.6876586675643921, "learning_rate": 2.5927425184328468e-05, "loss": 0.2919, "step": 5978, "teacher_loss": 0.2479683756828308 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.18187719583511353, "learning_rate": 2.5931762324707242e-05, "loss": 0.206, "step": 5979, "teacher_loss": 0.20870481431484222 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.4458603858947754, "learning_rate": 2.593609946508602e-05, "loss": 0.2449, "step": 5980, "teacher_loss": 0.2225208282470703 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.6239303946495056, "learning_rate": 2.5940436605464797e-05, "loss": 0.2862, "step": 5981, "teacher_loss": 0.24867065250873566 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.24168676137924194, "learning_rate": 2.5944773745843575e-05, "loss": 0.1827, "step": 5982, "teacher_loss": 0.17615142464637756 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.5222713947296143, "learning_rate": 2.5949110886222352e-05, "loss": 0.2483, "step": 5983, "teacher_loss": 0.2178906500339508 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.1576201617717743, "learning_rate": 2.595344802660113e-05, "loss": 0.2353, "step": 5984, "teacher_loss": 0.2439640462398529 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.666489839553833, "learning_rate": 2.5957785166979908e-05, "loss": 0.3303, "step": 5985, "teacher_loss": 0.29294466972351074 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.35782110691070557, "learning_rate": 2.5962122307358682e-05, "loss": 0.2485, "step": 5986, "teacher_loss": 0.23636741936206818 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.3073885440826416, "learning_rate": 2.596645944773746e-05, "loss": 0.3342, "step": 5987, "teacher_loss": 0.3371885120868683 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.5758130550384521, "learning_rate": 2.5970796588116234e-05, "loss": 0.3179, "step": 5988, "teacher_loss": 0.2892420291900635 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.4360622763633728, "learning_rate": 2.597513372849501e-05, "loss": 0.2939, "step": 5989, "teacher_loss": 0.27813878655433655 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.47281211614608765, "learning_rate": 2.597947086887379e-05, "loss": 0.2242, "step": 5990, "teacher_loss": 0.19659289717674255 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.6105009317398071, "learning_rate": 2.5983808009252566e-05, "loss": 0.2288, "step": 5991, "teacher_loss": 0.18637344241142273 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.45067843794822693, "learning_rate": 2.5988145149631344e-05, "loss": 0.2379, "step": 5992, "teacher_loss": 0.2142692506313324 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.32397690415382385, "learning_rate": 2.5992482290010122e-05, "loss": 0.2699, "step": 5993, "teacher_loss": 0.26385828852653503 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.9340322613716125, "learning_rate": 2.59968194303889e-05, "loss": 0.3418, "step": 5994, "teacher_loss": 0.276030033826828 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.14384032785892487, "learning_rate": 2.6001156570767677e-05, "loss": 0.1514, "step": 5995, "teacher_loss": 0.1522909551858902 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.4057866632938385, "learning_rate": 2.6005493711146455e-05, "loss": 0.2251, "step": 5996, "teacher_loss": 0.20502130687236786 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.3707001805305481, "learning_rate": 2.6009830851525225e-05, "loss": 0.231, "step": 5997, "teacher_loss": 0.21544378995895386 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.3754444718360901, "learning_rate": 2.6014167991904003e-05, "loss": 0.2654, "step": 5998, "teacher_loss": 0.2531641721725464 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.6523469686508179, "learning_rate": 2.601850513228278e-05, "loss": 0.3695, "step": 5999, "teacher_loss": 0.3380582928657532 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.33952778577804565, "learning_rate": 2.6022842272661558e-05, "loss": 0.2098, "step": 6000, "teacher_loss": 0.19543160498142242 }, { "epoch": 1.08, "eval_exact_match": 79.5364238410596, "eval_f1": 87.05001005077841, "step": 6000 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.5937745571136475, "learning_rate": 2.6027179413040336e-05, "loss": 0.3033, "step": 6001, "teacher_loss": 0.2710226774215698 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.299640417098999, "learning_rate": 2.6031516553419114e-05, "loss": 0.2471, "step": 6002, "teacher_loss": 0.24126553535461426 }, { "compression_loss": 0.0, "epoch": 1.08, "label_loss": 0.44425278902053833, "learning_rate": 2.603585369379789e-05, "loss": 0.2404, "step": 6003, "teacher_loss": 0.21772953867912292 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.4244877099990845, "learning_rate": 2.604019083417667e-05, "loss": 0.2056, "step": 6004, "teacher_loss": 0.18127241730690002 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.19513829052448273, "learning_rate": 2.6044527974555446e-05, "loss": 0.2713, "step": 6005, "teacher_loss": 0.27981239557266235 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.40419185161590576, "learning_rate": 2.604886511493422e-05, "loss": 0.3355, "step": 6006, "teacher_loss": 0.3278812766075134 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.11089880764484406, "learning_rate": 2.6053202255312998e-05, "loss": 0.1926, "step": 6007, "teacher_loss": 0.20165523886680603 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.7942944169044495, "learning_rate": 2.6057539395691772e-05, "loss": 0.2654, "step": 6008, "teacher_loss": 0.20668178796768188 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.19885137677192688, "learning_rate": 2.606187653607055e-05, "loss": 0.2044, "step": 6009, "teacher_loss": 0.20500102639198303 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.32747045159339905, "learning_rate": 2.6066213676449328e-05, "loss": 0.2267, "step": 6010, "teacher_loss": 0.2154829502105713 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.2849360704421997, "learning_rate": 2.6070550816828105e-05, "loss": 0.1988, "step": 6011, "teacher_loss": 0.1891832947731018 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.35029563307762146, "learning_rate": 2.6074887957206883e-05, "loss": 0.2398, "step": 6012, "teacher_loss": 0.22751376032829285 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.8549168109893799, "learning_rate": 2.607922509758566e-05, "loss": 0.371, "step": 6013, "teacher_loss": 0.31726354360580444 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.5811904668807983, "learning_rate": 2.6083562237964435e-05, "loss": 0.3952, "step": 6014, "teacher_loss": 0.37448573112487793 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.20279422402381897, "learning_rate": 2.6087899378343212e-05, "loss": 0.1965, "step": 6015, "teacher_loss": 0.1957501471042633 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.27620694041252136, "learning_rate": 2.609223651872199e-05, "loss": 0.1722, "step": 6016, "teacher_loss": 0.16064776480197906 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.29156967997550964, "learning_rate": 2.6096573659100768e-05, "loss": 0.1766, "step": 6017, "teacher_loss": 0.1637905389070511 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.3402637839317322, "learning_rate": 2.6100910799479545e-05, "loss": 0.1432, "step": 6018, "teacher_loss": 0.12128326296806335 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.3765139579772949, "learning_rate": 2.610524793985832e-05, "loss": 0.2826, "step": 6019, "teacher_loss": 0.27220267057418823 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.37916266918182373, "learning_rate": 2.6109585080237097e-05, "loss": 0.2153, "step": 6020, "teacher_loss": 0.19704505801200867 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.12241204082965851, "learning_rate": 2.6113922220615875e-05, "loss": 0.1483, "step": 6021, "teacher_loss": 0.15118181705474854 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.29885610938072205, "learning_rate": 2.6118259360994652e-05, "loss": 0.2191, "step": 6022, "teacher_loss": 0.21021530032157898 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.5026082396507263, "learning_rate": 2.6122596501373427e-05, "loss": 0.2831, "step": 6023, "teacher_loss": 0.2587442994117737 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.9825842380523682, "learning_rate": 2.6126933641752204e-05, "loss": 0.3187, "step": 6024, "teacher_loss": 0.2449318766593933 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.5616557002067566, "learning_rate": 2.6131270782130982e-05, "loss": 0.2479, "step": 6025, "teacher_loss": 0.2130769044160843 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.21342787146568298, "learning_rate": 2.613560792250976e-05, "loss": 0.2465, "step": 6026, "teacher_loss": 0.25022411346435547 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.7556260824203491, "learning_rate": 2.6139945062888537e-05, "loss": 0.2636, "step": 6027, "teacher_loss": 0.2089563012123108 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.3174939751625061, "learning_rate": 2.6144282203267315e-05, "loss": 0.3547, "step": 6028, "teacher_loss": 0.35886937379837036 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.20685821771621704, "learning_rate": 2.6148619343646092e-05, "loss": 0.1798, "step": 6029, "teacher_loss": 0.1768152415752411 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.7881063222885132, "learning_rate": 2.6152956484024867e-05, "loss": 0.3484, "step": 6030, "teacher_loss": 0.29956334829330444 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.21901699900627136, "learning_rate": 2.6157293624403644e-05, "loss": 0.2243, "step": 6031, "teacher_loss": 0.224918931722641 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.3141971230506897, "learning_rate": 2.616163076478242e-05, "loss": 0.2688, "step": 6032, "teacher_loss": 0.26375895738601685 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.25679054856300354, "learning_rate": 2.6165967905161196e-05, "loss": 0.1841, "step": 6033, "teacher_loss": 0.17600971460342407 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.34429165720939636, "learning_rate": 2.6170305045539974e-05, "loss": 0.2677, "step": 6034, "teacher_loss": 0.25921428203582764 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.4749927818775177, "learning_rate": 2.617464218591875e-05, "loss": 0.2446, "step": 6035, "teacher_loss": 0.21900954842567444 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.26815804839134216, "learning_rate": 2.617897932629753e-05, "loss": 0.1884, "step": 6036, "teacher_loss": 0.17958931624889374 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.6675317287445068, "learning_rate": 2.6183316466676307e-05, "loss": 0.2503, "step": 6037, "teacher_loss": 0.20397210121154785 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.3510726988315582, "learning_rate": 2.6187653607055084e-05, "loss": 0.3506, "step": 6038, "teacher_loss": 0.3505968451499939 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.6238422393798828, "learning_rate": 2.6191990747433862e-05, "loss": 0.2448, "step": 6039, "teacher_loss": 0.20266927778720856 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.3425091803073883, "learning_rate": 2.619632788781264e-05, "loss": 0.3397, "step": 6040, "teacher_loss": 0.3393399119377136 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.6078685522079468, "learning_rate": 2.620066502819141e-05, "loss": 0.3867, "step": 6041, "teacher_loss": 0.362124502658844 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.15485689043998718, "learning_rate": 2.6205002168570188e-05, "loss": 0.1364, "step": 6042, "teacher_loss": 0.1343672275543213 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.33681994676589966, "learning_rate": 2.6209339308948966e-05, "loss": 0.2945, "step": 6043, "teacher_loss": 0.2898402512073517 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.6960360407829285, "learning_rate": 2.6213676449327743e-05, "loss": 0.2795, "step": 6044, "teacher_loss": 0.23319706320762634 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.4189578890800476, "learning_rate": 2.621801358970652e-05, "loss": 0.2388, "step": 6045, "teacher_loss": 0.21874213218688965 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.3852342367172241, "learning_rate": 2.62223507300853e-05, "loss": 0.2275, "step": 6046, "teacher_loss": 0.20999422669410706 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.4208112955093384, "learning_rate": 2.6226687870464076e-05, "loss": 0.2763, "step": 6047, "teacher_loss": 0.26026690006256104 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 1.6586482524871826, "learning_rate": 2.6231025010842854e-05, "loss": 0.3192, "step": 6048, "teacher_loss": 0.17038945853710175 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.6716893911361694, "learning_rate": 2.623536215122163e-05, "loss": 0.3835, "step": 6049, "teacher_loss": 0.3514992594718933 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.2577231824398041, "learning_rate": 2.6239699291600406e-05, "loss": 0.1738, "step": 6050, "teacher_loss": 0.1644240915775299 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.6482865810394287, "learning_rate": 2.6244036431979183e-05, "loss": 0.3334, "step": 6051, "teacher_loss": 0.2984405755996704 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.6949761509895325, "learning_rate": 2.6248373572357957e-05, "loss": 0.2554, "step": 6052, "teacher_loss": 0.20660439133644104 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.4513072967529297, "learning_rate": 2.6252710712736735e-05, "loss": 0.2422, "step": 6053, "teacher_loss": 0.21891817450523376 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.22213289141654968, "learning_rate": 2.6257047853115513e-05, "loss": 0.1995, "step": 6054, "teacher_loss": 0.19702666997909546 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.702265739440918, "learning_rate": 2.626138499349429e-05, "loss": 0.2779, "step": 6055, "teacher_loss": 0.2307310849428177 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.5064534544944763, "learning_rate": 2.6265722133873068e-05, "loss": 0.3491, "step": 6056, "teacher_loss": 0.33158668875694275 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.2842394709587097, "learning_rate": 2.6270059274251845e-05, "loss": 0.1983, "step": 6057, "teacher_loss": 0.18871907889842987 }, { "compression_loss": 0.0, "epoch": 1.09, "label_loss": 0.4116808772087097, "learning_rate": 2.627439641463062e-05, "loss": 0.1906, "step": 6058, "teacher_loss": 0.1659819781780243 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.21029409766197205, "learning_rate": 2.6278733555009397e-05, "loss": 0.1923, "step": 6059, "teacher_loss": 0.1902952492237091 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.46655088663101196, "learning_rate": 2.6283070695388175e-05, "loss": 0.2749, "step": 6060, "teacher_loss": 0.2536253333091736 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.9944543838500977, "learning_rate": 2.6287407835766953e-05, "loss": 0.2972, "step": 6061, "teacher_loss": 0.2197316586971283 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.4365714192390442, "learning_rate": 2.6291744976145727e-05, "loss": 0.2747, "step": 6062, "teacher_loss": 0.25671032071113586 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.5145626664161682, "learning_rate": 2.6296082116524504e-05, "loss": 0.2659, "step": 6063, "teacher_loss": 0.23826278746128082 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.6378750205039978, "learning_rate": 2.6300419256903282e-05, "loss": 0.3121, "step": 6064, "teacher_loss": 0.2758827209472656 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.6705365180969238, "learning_rate": 2.630475639728206e-05, "loss": 0.3703, "step": 6065, "teacher_loss": 0.33698129653930664 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.3675013482570648, "learning_rate": 2.6309093537660837e-05, "loss": 0.24, "step": 6066, "teacher_loss": 0.22585347294807434 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.29251083731651306, "learning_rate": 2.631343067803961e-05, "loss": 0.3122, "step": 6067, "teacher_loss": 0.3143903315067291 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.3683716654777527, "learning_rate": 2.631776781841839e-05, "loss": 0.2358, "step": 6068, "teacher_loss": 0.22107170522212982 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.6156793832778931, "learning_rate": 2.6322104958797167e-05, "loss": 0.2279, "step": 6069, "teacher_loss": 0.1848054975271225 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.3482230305671692, "learning_rate": 2.6326442099175944e-05, "loss": 0.2395, "step": 6070, "teacher_loss": 0.22738529741764069 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.3504316806793213, "learning_rate": 2.6330779239554722e-05, "loss": 0.2083, "step": 6071, "teacher_loss": 0.19246095418930054 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.12203864753246307, "learning_rate": 2.63351163799335e-05, "loss": 0.2279, "step": 6072, "teacher_loss": 0.23962397873401642 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.46075522899627686, "learning_rate": 2.6339453520312274e-05, "loss": 0.2758, "step": 6073, "teacher_loss": 0.2552947700023651 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.7439577579498291, "learning_rate": 2.634379066069105e-05, "loss": 0.3458, "step": 6074, "teacher_loss": 0.30151820182800293 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.3551526665687561, "learning_rate": 2.634812780106983e-05, "loss": 0.254, "step": 6075, "teacher_loss": 0.24275073409080505 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.9436540603637695, "learning_rate": 2.6352464941448603e-05, "loss": 0.3511, "step": 6076, "teacher_loss": 0.2852747440338135 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.19243931770324707, "learning_rate": 2.635680208182738e-05, "loss": 0.2469, "step": 6077, "teacher_loss": 0.2529582977294922 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.47655391693115234, "learning_rate": 2.636113922220616e-05, "loss": 0.178, "step": 6078, "teacher_loss": 0.14487934112548828 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.7089213728904724, "learning_rate": 2.6365476362584936e-05, "loss": 0.3093, "step": 6079, "teacher_loss": 0.2648907005786896 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.2391085922718048, "learning_rate": 2.6369813502963714e-05, "loss": 0.1817, "step": 6080, "teacher_loss": 0.1752692013978958 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.7359232902526855, "learning_rate": 2.637415064334249e-05, "loss": 0.2313, "step": 6081, "teacher_loss": 0.17523014545440674 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.5613267421722412, "learning_rate": 2.637848778372127e-05, "loss": 0.232, "step": 6082, "teacher_loss": 0.19544321298599243 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.4572908282279968, "learning_rate": 2.6382824924100047e-05, "loss": 0.2261, "step": 6083, "teacher_loss": 0.20043939352035522 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.45113933086395264, "learning_rate": 2.638716206447882e-05, "loss": 0.432, "step": 6084, "teacher_loss": 0.4298262894153595 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.5387110114097595, "learning_rate": 2.6391499204857595e-05, "loss": 0.2139, "step": 6085, "teacher_loss": 0.17777490615844727 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.5614681243896484, "learning_rate": 2.6395836345236373e-05, "loss": 0.2342, "step": 6086, "teacher_loss": 0.19789093732833862 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.40348750352859497, "learning_rate": 2.640017348561515e-05, "loss": 0.2325, "step": 6087, "teacher_loss": 0.21351078152656555 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.2748577296733856, "learning_rate": 2.6404510625993928e-05, "loss": 0.1867, "step": 6088, "teacher_loss": 0.17685964703559875 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.2772558033466339, "learning_rate": 2.6408847766372706e-05, "loss": 0.2171, "step": 6089, "teacher_loss": 0.21038465201854706 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.43373793363571167, "learning_rate": 2.6413184906751483e-05, "loss": 0.2217, "step": 6090, "teacher_loss": 0.1981760859489441 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.5810621976852417, "learning_rate": 2.641752204713026e-05, "loss": 0.2851, "step": 6091, "teacher_loss": 0.2522633671760559 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.25044485926628113, "learning_rate": 2.642185918750904e-05, "loss": 0.2063, "step": 6092, "teacher_loss": 0.20138826966285706 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.32210400700569153, "learning_rate": 2.6426196327887816e-05, "loss": 0.2219, "step": 6093, "teacher_loss": 0.2108059823513031 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.42475032806396484, "learning_rate": 2.643053346826659e-05, "loss": 0.2774, "step": 6094, "teacher_loss": 0.26100534200668335 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.3819844722747803, "learning_rate": 2.6434870608645365e-05, "loss": 0.2082, "step": 6095, "teacher_loss": 0.18890786170959473 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.40380099415779114, "learning_rate": 2.6439207749024142e-05, "loss": 0.3836, "step": 6096, "teacher_loss": 0.3813990652561188 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.8675791025161743, "learning_rate": 2.644354488940292e-05, "loss": 0.5083, "step": 6097, "teacher_loss": 0.46836966276168823 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.5458240509033203, "learning_rate": 2.6447882029781697e-05, "loss": 0.2763, "step": 6098, "teacher_loss": 0.24630212783813477 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.7106964588165283, "learning_rate": 2.6452219170160475e-05, "loss": 0.3054, "step": 6099, "teacher_loss": 0.2604144811630249 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 1.066803216934204, "learning_rate": 2.6456556310539253e-05, "loss": 0.3294, "step": 6100, "teacher_loss": 0.24748049676418304 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.5097984671592712, "learning_rate": 2.646089345091803e-05, "loss": 0.2988, "step": 6101, "teacher_loss": 0.275393009185791 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.5041478872299194, "learning_rate": 2.6465230591296805e-05, "loss": 0.2165, "step": 6102, "teacher_loss": 0.1845349371433258 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.6240526437759399, "learning_rate": 2.6469567731675582e-05, "loss": 0.2784, "step": 6103, "teacher_loss": 0.23996350169181824 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.2629654109477997, "learning_rate": 2.647390487205436e-05, "loss": 0.2696, "step": 6104, "teacher_loss": 0.27039217948913574 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.2751466631889343, "learning_rate": 2.6478242012433137e-05, "loss": 0.1959, "step": 6105, "teacher_loss": 0.1871006041765213 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.19068299233913422, "learning_rate": 2.648257915281191e-05, "loss": 0.139, "step": 6106, "teacher_loss": 0.13321253657341003 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.42008715867996216, "learning_rate": 2.648691629319069e-05, "loss": 0.1957, "step": 6107, "teacher_loss": 0.1707797646522522 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.18562796711921692, "learning_rate": 2.6491253433569467e-05, "loss": 0.208, "step": 6108, "teacher_loss": 0.21046766638755798 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.49823611974716187, "learning_rate": 2.6495590573948245e-05, "loss": 0.4236, "step": 6109, "teacher_loss": 0.41528481245040894 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.22772350907325745, "learning_rate": 2.6499927714327022e-05, "loss": 0.2634, "step": 6110, "teacher_loss": 0.267348974943161 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.23748721182346344, "learning_rate": 2.6504264854705796e-05, "loss": 0.2111, "step": 6111, "teacher_loss": 0.20818842947483063 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.3436656594276428, "learning_rate": 2.6508601995084574e-05, "loss": 0.2044, "step": 6112, "teacher_loss": 0.1889735460281372 }, { "compression_loss": 0.0, "epoch": 1.1, "label_loss": 0.5322515368461609, "learning_rate": 2.651293913546335e-05, "loss": 0.2163, "step": 6113, "teacher_loss": 0.18124409019947052 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.6623352766036987, "learning_rate": 2.651727627584213e-05, "loss": 0.444, "step": 6114, "teacher_loss": 0.4197794795036316 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.24485337734222412, "learning_rate": 2.6521613416220907e-05, "loss": 0.1683, "step": 6115, "teacher_loss": 0.1597769856452942 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.515722393989563, "learning_rate": 2.6525950556599685e-05, "loss": 0.2977, "step": 6116, "teacher_loss": 0.2734895944595337 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.2177710235118866, "learning_rate": 2.653028769697846e-05, "loss": 0.2703, "step": 6117, "teacher_loss": 0.27613985538482666 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.2102414071559906, "learning_rate": 2.6534624837357236e-05, "loss": 0.1416, "step": 6118, "teacher_loss": 0.13402248919010162 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.3124125897884369, "learning_rate": 2.6538961977736014e-05, "loss": 0.1724, "step": 6119, "teacher_loss": 0.15681597590446472 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.5765262842178345, "learning_rate": 2.6543299118114788e-05, "loss": 0.2686, "step": 6120, "teacher_loss": 0.2343907356262207 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.35706061124801636, "learning_rate": 2.6547636258493566e-05, "loss": 0.3485, "step": 6121, "teacher_loss": 0.3475823998451233 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.6210277080535889, "learning_rate": 2.6551973398872343e-05, "loss": 0.3031, "step": 6122, "teacher_loss": 0.26776760816574097 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.17806726694107056, "learning_rate": 2.655631053925112e-05, "loss": 0.303, "step": 6123, "teacher_loss": 0.31689774990081787 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.37102100253105164, "learning_rate": 2.65606476796299e-05, "loss": 0.2713, "step": 6124, "teacher_loss": 0.26021063327789307 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.567483127117157, "learning_rate": 2.6564984820008676e-05, "loss": 0.2305, "step": 6125, "teacher_loss": 0.1930255889892578 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.6561061143875122, "learning_rate": 2.6569321960387454e-05, "loss": 0.348, "step": 6126, "teacher_loss": 0.31376904249191284 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.048316821455955505, "learning_rate": 2.657365910076623e-05, "loss": 0.1274, "step": 6127, "teacher_loss": 0.13622258603572845 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.700487494468689, "learning_rate": 2.6577996241145006e-05, "loss": 0.5277, "step": 6128, "teacher_loss": 0.5085031986236572 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.8071776628494263, "learning_rate": 2.658233338152378e-05, "loss": 0.3102, "step": 6129, "teacher_loss": 0.25495368242263794 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.7379237413406372, "learning_rate": 2.6586670521902558e-05, "loss": 0.2368, "step": 6130, "teacher_loss": 0.18112680315971375 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.11972502619028091, "learning_rate": 2.6591007662281335e-05, "loss": 0.1386, "step": 6131, "teacher_loss": 0.14070644974708557 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.245040625333786, "learning_rate": 2.6595344802660113e-05, "loss": 0.2793, "step": 6132, "teacher_loss": 0.28315672278404236 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.8436753749847412, "learning_rate": 2.659968194303889e-05, "loss": 0.3104, "step": 6133, "teacher_loss": 0.251151442527771 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.5027743577957153, "learning_rate": 2.6604019083417668e-05, "loss": 0.2507, "step": 6134, "teacher_loss": 0.22271570563316345 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.49747928977012634, "learning_rate": 2.6608356223796446e-05, "loss": 0.4045, "step": 6135, "teacher_loss": 0.3942154049873352 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.635773777961731, "learning_rate": 2.6612693364175223e-05, "loss": 0.2378, "step": 6136, "teacher_loss": 0.19354480504989624 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.40331166982650757, "learning_rate": 2.6617030504554e-05, "loss": 0.2102, "step": 6137, "teacher_loss": 0.18878497183322906 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.21453385055065155, "learning_rate": 2.6621367644932775e-05, "loss": 0.1995, "step": 6138, "teacher_loss": 0.19777438044548035 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.8680122494697571, "learning_rate": 2.662570478531155e-05, "loss": 0.3962, "step": 6139, "teacher_loss": 0.34378787875175476 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.48129940032958984, "learning_rate": 2.6630041925690327e-05, "loss": 0.2093, "step": 6140, "teacher_loss": 0.1790616810321808 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.736588716506958, "learning_rate": 2.6634379066069105e-05, "loss": 0.3939, "step": 6141, "teacher_loss": 0.35582658648490906 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.2747771143913269, "learning_rate": 2.6638716206447882e-05, "loss": 0.2271, "step": 6142, "teacher_loss": 0.22183355689048767 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.4167356491088867, "learning_rate": 2.664305334682666e-05, "loss": 0.2534, "step": 6143, "teacher_loss": 0.23520107567310333 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.2032022476196289, "learning_rate": 2.6647390487205438e-05, "loss": 0.2951, "step": 6144, "teacher_loss": 0.30530422925949097 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.7460829615592957, "learning_rate": 2.6651727627584215e-05, "loss": 0.3492, "step": 6145, "teacher_loss": 0.30509495735168457 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.3391437232494354, "learning_rate": 2.665606476796299e-05, "loss": 0.313, "step": 6146, "teacher_loss": 0.31009337306022644 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.3658826947212219, "learning_rate": 2.6660401908341767e-05, "loss": 0.1954, "step": 6147, "teacher_loss": 0.1764914095401764 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.6718495488166809, "learning_rate": 2.6664739048720545e-05, "loss": 0.3276, "step": 6148, "teacher_loss": 0.28934958577156067 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.2850288450717926, "learning_rate": 2.6669076189099322e-05, "loss": 0.3539, "step": 6149, "teacher_loss": 0.36151546239852905 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.30485427379608154, "learning_rate": 2.6673413329478097e-05, "loss": 0.1925, "step": 6150, "teacher_loss": 0.18000689148902893 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.43627816438674927, "learning_rate": 2.6677750469856874e-05, "loss": 0.2133, "step": 6151, "teacher_loss": 0.18855538964271545 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.4119974970817566, "learning_rate": 2.6682087610235652e-05, "loss": 0.1896, "step": 6152, "teacher_loss": 0.16485372185707092 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.3464997410774231, "learning_rate": 2.668642475061443e-05, "loss": 0.2839, "step": 6153, "teacher_loss": 0.2769213318824768 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.29194486141204834, "learning_rate": 2.6690761890993207e-05, "loss": 0.1984, "step": 6154, "teacher_loss": 0.1880495250225067 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.34926748275756836, "learning_rate": 2.669509903137198e-05, "loss": 0.2332, "step": 6155, "teacher_loss": 0.22029638290405273 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.30133694410324097, "learning_rate": 2.669943617175076e-05, "loss": 0.1693, "step": 6156, "teacher_loss": 0.15467047691345215 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.5656158924102783, "learning_rate": 2.6703773312129537e-05, "loss": 0.3686, "step": 6157, "teacher_loss": 0.3467395007610321 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.32877397537231445, "learning_rate": 2.6708110452508314e-05, "loss": 0.184, "step": 6158, "teacher_loss": 0.16793489456176758 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.724859356880188, "learning_rate": 2.6712447592887092e-05, "loss": 0.2803, "step": 6159, "teacher_loss": 0.23089691996574402 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.3149811029434204, "learning_rate": 2.6716784733265866e-05, "loss": 0.2919, "step": 6160, "teacher_loss": 0.2893643081188202 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.5111450552940369, "learning_rate": 2.6721121873644644e-05, "loss": 0.2102, "step": 6161, "teacher_loss": 0.17675435543060303 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.9811246991157532, "learning_rate": 2.672545901402342e-05, "loss": 0.2637, "step": 6162, "teacher_loss": 0.18399053812026978 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.6269147396087646, "learning_rate": 2.67297961544022e-05, "loss": 0.2869, "step": 6163, "teacher_loss": 0.24911335110664368 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.42970097064971924, "learning_rate": 2.6734133294780973e-05, "loss": 0.2521, "step": 6164, "teacher_loss": 0.23242150247097015 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.5245120525360107, "learning_rate": 2.673847043515975e-05, "loss": 0.2382, "step": 6165, "teacher_loss": 0.20643754303455353 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.45311814546585083, "learning_rate": 2.674280757553853e-05, "loss": 0.2968, "step": 6166, "teacher_loss": 0.2793947458267212 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.25458672642707825, "learning_rate": 2.6747144715917306e-05, "loss": 0.2347, "step": 6167, "teacher_loss": 0.23254278302192688 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.8695564270019531, "learning_rate": 2.6751481856296084e-05, "loss": 0.4433, "step": 6168, "teacher_loss": 0.39592310786247253 }, { "compression_loss": 0.0, "epoch": 1.11, "label_loss": 0.3525991439819336, "learning_rate": 2.675581899667486e-05, "loss": 0.2139, "step": 6169, "teacher_loss": 0.19845744967460632 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.22229371964931488, "learning_rate": 2.676015613705364e-05, "loss": 0.2624, "step": 6170, "teacher_loss": 0.26684755086898804 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3580290973186493, "learning_rate": 2.6764493277432413e-05, "loss": 0.2192, "step": 6171, "teacher_loss": 0.20379358530044556 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.5995192527770996, "learning_rate": 2.676883041781119e-05, "loss": 0.2555, "step": 6172, "teacher_loss": 0.2172919362783432 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.34453830122947693, "learning_rate": 2.6773167558189965e-05, "loss": 0.2841, "step": 6173, "teacher_loss": 0.2774102985858917 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3946101665496826, "learning_rate": 2.6777504698568743e-05, "loss": 0.2388, "step": 6174, "teacher_loss": 0.22143924236297607 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.49136483669281006, "learning_rate": 2.678184183894752e-05, "loss": 0.235, "step": 6175, "teacher_loss": 0.2064749002456665 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.31167325377464294, "learning_rate": 2.6786178979326298e-05, "loss": 0.2133, "step": 6176, "teacher_loss": 0.20236200094223022 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.41216546297073364, "learning_rate": 2.6790516119705075e-05, "loss": 0.2172, "step": 6177, "teacher_loss": 0.19551342725753784 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3572585880756378, "learning_rate": 2.6794853260083853e-05, "loss": 0.4886, "step": 6178, "teacher_loss": 0.5031484961509705 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.4012985825538635, "learning_rate": 2.679919040046263e-05, "loss": 0.2282, "step": 6179, "teacher_loss": 0.2089385837316513 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3170543909072876, "learning_rate": 2.6803527540841408e-05, "loss": 0.3056, "step": 6180, "teacher_loss": 0.30435580015182495 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.4239477515220642, "learning_rate": 2.6807864681220186e-05, "loss": 0.1943, "step": 6181, "teacher_loss": 0.16879016160964966 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3119276165962219, "learning_rate": 2.6812201821598957e-05, "loss": 0.2595, "step": 6182, "teacher_loss": 0.25371548533439636 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.26219457387924194, "learning_rate": 2.6816538961977734e-05, "loss": 0.2188, "step": 6183, "teacher_loss": 0.21398219466209412 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.21266594529151917, "learning_rate": 2.6820876102356512e-05, "loss": 0.2158, "step": 6184, "teacher_loss": 0.21617019176483154 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.4246112108230591, "learning_rate": 2.682521324273529e-05, "loss": 0.4111, "step": 6185, "teacher_loss": 0.40964359045028687 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.38567304611206055, "learning_rate": 2.6829550383114067e-05, "loss": 0.3568, "step": 6186, "teacher_loss": 0.3535376787185669 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.43796199560165405, "learning_rate": 2.6833887523492845e-05, "loss": 0.2584, "step": 6187, "teacher_loss": 0.23841264843940735 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.47674989700317383, "learning_rate": 2.6838224663871622e-05, "loss": 0.2596, "step": 6188, "teacher_loss": 0.23544053733348846 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.26218241453170776, "learning_rate": 2.68425618042504e-05, "loss": 0.3698, "step": 6189, "teacher_loss": 0.38178274035453796 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3214132487773895, "learning_rate": 2.6846898944629178e-05, "loss": 0.2144, "step": 6190, "teacher_loss": 0.20247536897659302 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.17942091822624207, "learning_rate": 2.6851236085007952e-05, "loss": 0.2534, "step": 6191, "teacher_loss": 0.2615981698036194 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3345814347267151, "learning_rate": 2.685557322538673e-05, "loss": 0.2987, "step": 6192, "teacher_loss": 0.294662743806839 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.7157434821128845, "learning_rate": 2.6859910365765504e-05, "loss": 0.2595, "step": 6193, "teacher_loss": 0.2088591307401657 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3723817467689514, "learning_rate": 2.686424750614428e-05, "loss": 0.2833, "step": 6194, "teacher_loss": 0.273406445980072 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3967821002006531, "learning_rate": 2.686858464652306e-05, "loss": 0.1766, "step": 6195, "teacher_loss": 0.15211889147758484 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.37578219175338745, "learning_rate": 2.6872921786901837e-05, "loss": 0.2036, "step": 6196, "teacher_loss": 0.18442532420158386 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3875725567340851, "learning_rate": 2.6877258927280614e-05, "loss": 0.301, "step": 6197, "teacher_loss": 0.29136911034584045 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.31419432163238525, "learning_rate": 2.6881596067659392e-05, "loss": 0.3176, "step": 6198, "teacher_loss": 0.317990779876709 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.5231434106826782, "learning_rate": 2.6885933208038166e-05, "loss": 0.2533, "step": 6199, "teacher_loss": 0.22333839535713196 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.07056882977485657, "learning_rate": 2.6890270348416944e-05, "loss": 0.1496, "step": 6200, "teacher_loss": 0.15836943686008453 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.40262824296951294, "learning_rate": 2.689460748879572e-05, "loss": 0.2244, "step": 6201, "teacher_loss": 0.20458002388477325 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.22769679129123688, "learning_rate": 2.68989446291745e-05, "loss": 0.2256, "step": 6202, "teacher_loss": 0.2253933697938919 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3718760013580322, "learning_rate": 2.6903281769553277e-05, "loss": 0.2892, "step": 6203, "teacher_loss": 0.2800065577030182 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.4534824788570404, "learning_rate": 2.690761890993205e-05, "loss": 0.2105, "step": 6204, "teacher_loss": 0.18346816301345825 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.5914174318313599, "learning_rate": 2.691195605031083e-05, "loss": 0.312, "step": 6205, "teacher_loss": 0.2809494137763977 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.21935322880744934, "learning_rate": 2.6916293190689606e-05, "loss": 0.3454, "step": 6206, "teacher_loss": 0.3594011068344116 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.38528817892074585, "learning_rate": 2.6920630331068384e-05, "loss": 0.2552, "step": 6207, "teacher_loss": 0.24073925614356995 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.6815232038497925, "learning_rate": 2.6924967471447158e-05, "loss": 0.494, "step": 6208, "teacher_loss": 0.4731563925743103 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.682334303855896, "learning_rate": 2.6929304611825936e-05, "loss": 0.8132, "step": 6209, "teacher_loss": 0.827795147895813 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.8604118227958679, "learning_rate": 2.6933641752204713e-05, "loss": 0.492, "step": 6210, "teacher_loss": 0.4510447680950165 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.6672353744506836, "learning_rate": 2.693797889258349e-05, "loss": 0.3776, "step": 6211, "teacher_loss": 0.34536808729171753 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.691842794418335, "learning_rate": 2.694231603296227e-05, "loss": 0.3237, "step": 6212, "teacher_loss": 0.2827497124671936 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.3167247772216797, "learning_rate": 2.6946653173341046e-05, "loss": 0.265, "step": 6213, "teacher_loss": 0.25926733016967773 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.6608999967575073, "learning_rate": 2.6950990313719824e-05, "loss": 0.4023, "step": 6214, "teacher_loss": 0.37359610199928284 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.29286032915115356, "learning_rate": 2.6955327454098598e-05, "loss": 0.2129, "step": 6215, "teacher_loss": 0.20399153232574463 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.44547098875045776, "learning_rate": 2.6959664594477376e-05, "loss": 0.2856, "step": 6216, "teacher_loss": 0.26780247688293457 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.4220615029335022, "learning_rate": 2.696400173485615e-05, "loss": 0.3394, "step": 6217, "teacher_loss": 0.3301962912082672 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.4473878741264343, "learning_rate": 2.6968338875234927e-05, "loss": 0.2497, "step": 6218, "teacher_loss": 0.2277584969997406 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.7074957489967346, "learning_rate": 2.6972676015613705e-05, "loss": 0.302, "step": 6219, "teacher_loss": 0.2569655179977417 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.5834007263183594, "learning_rate": 2.6977013155992483e-05, "loss": 0.2672, "step": 6220, "teacher_loss": 0.23203304409980774 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.6585375070571899, "learning_rate": 2.698135029637126e-05, "loss": 0.2291, "step": 6221, "teacher_loss": 0.18136140704154968 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.4529242515563965, "learning_rate": 2.6985687436750038e-05, "loss": 0.257, "step": 6222, "teacher_loss": 0.23521792888641357 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.5263648629188538, "learning_rate": 2.6990024577128816e-05, "loss": 0.2906, "step": 6223, "teacher_loss": 0.26438677310943604 }, { "compression_loss": 0.0, "epoch": 1.12, "label_loss": 0.5613182783126831, "learning_rate": 2.6994361717507593e-05, "loss": 0.306, "step": 6224, "teacher_loss": 0.2776665687561035 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.35175371170043945, "learning_rate": 2.699869885788637e-05, "loss": 0.1934, "step": 6225, "teacher_loss": 0.1758594810962677 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.17961886525154114, "learning_rate": 2.700303599826514e-05, "loss": 0.1958, "step": 6226, "teacher_loss": 0.1976485252380371 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.5187227129936218, "learning_rate": 2.700737313864392e-05, "loss": 0.2682, "step": 6227, "teacher_loss": 0.24039678275585175 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.13387733697891235, "learning_rate": 2.7011710279022697e-05, "loss": 0.2003, "step": 6228, "teacher_loss": 0.20765471458435059 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.6254796385765076, "learning_rate": 2.7016047419401474e-05, "loss": 0.3468, "step": 6229, "teacher_loss": 0.3158687651157379 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.41606438159942627, "learning_rate": 2.7020384559780252e-05, "loss": 0.2887, "step": 6230, "teacher_loss": 0.27454304695129395 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.5500345230102539, "learning_rate": 2.702472170015903e-05, "loss": 0.2887, "step": 6231, "teacher_loss": 0.2596123218536377 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 1.114802360534668, "learning_rate": 2.7029058840537807e-05, "loss": 0.3248, "step": 6232, "teacher_loss": 0.23704317212104797 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.2791847884654999, "learning_rate": 2.7033395980916585e-05, "loss": 0.1817, "step": 6233, "teacher_loss": 0.1709081083536148 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.32286444306373596, "learning_rate": 2.7037733121295363e-05, "loss": 0.2264, "step": 6234, "teacher_loss": 0.21571099758148193 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.406974196434021, "learning_rate": 2.7042070261674137e-05, "loss": 0.254, "step": 6235, "teacher_loss": 0.23705804347991943 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.22894413769245148, "learning_rate": 2.7046407402052914e-05, "loss": 0.196, "step": 6236, "teacher_loss": 0.19238464534282684 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.8907871246337891, "learning_rate": 2.705074454243169e-05, "loss": 0.575, "step": 6237, "teacher_loss": 0.539874792098999 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.18466398119926453, "learning_rate": 2.7055081682810466e-05, "loss": 0.1928, "step": 6238, "teacher_loss": 0.1937258094549179 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.42910873889923096, "learning_rate": 2.7059418823189244e-05, "loss": 0.2672, "step": 6239, "teacher_loss": 0.2492278814315796 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.19823455810546875, "learning_rate": 2.706375596356802e-05, "loss": 0.2276, "step": 6240, "teacher_loss": 0.23089349269866943 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.2920618951320648, "learning_rate": 2.70680931039468e-05, "loss": 0.2358, "step": 6241, "teacher_loss": 0.22954139113426208 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.5309002995491028, "learning_rate": 2.7072430244325577e-05, "loss": 0.2497, "step": 6242, "teacher_loss": 0.2184501737356186 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.7090290188789368, "learning_rate": 2.707676738470435e-05, "loss": 0.4171, "step": 6243, "teacher_loss": 0.384652704000473 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.8452855348587036, "learning_rate": 2.708110452508313e-05, "loss": 0.323, "step": 6244, "teacher_loss": 0.2649414837360382 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.3568814992904663, "learning_rate": 2.7085441665461906e-05, "loss": 0.3271, "step": 6245, "teacher_loss": 0.323817640542984 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.41398942470550537, "learning_rate": 2.7089778805840684e-05, "loss": 0.2544, "step": 6246, "teacher_loss": 0.23665517568588257 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.4368191957473755, "learning_rate": 2.709411594621946e-05, "loss": 0.256, "step": 6247, "teacher_loss": 0.23593543469905853 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.452435702085495, "learning_rate": 2.7098453086598236e-05, "loss": 0.267, "step": 6248, "teacher_loss": 0.2463577389717102 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.35763415694236755, "learning_rate": 2.7102790226977013e-05, "loss": 0.3126, "step": 6249, "teacher_loss": 0.3075858950614929 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.32362717390060425, "learning_rate": 2.710712736735579e-05, "loss": 0.244, "step": 6250, "teacher_loss": 0.23513546586036682 }, { "epoch": 1.13, "eval_exact_match": 79.64049195837275, "eval_f1": 87.22254960916146, "step": 6250 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.830796480178833, "learning_rate": 2.711146450773457e-05, "loss": 0.2792, "step": 6251, "teacher_loss": 0.2179451882839203 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.13678866624832153, "learning_rate": 2.7115801648113343e-05, "loss": 0.2053, "step": 6252, "teacher_loss": 0.21286973357200623 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.20709894597530365, "learning_rate": 2.712013878849212e-05, "loss": 0.231, "step": 6253, "teacher_loss": 0.2336689829826355 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.18088707327842712, "learning_rate": 2.7124475928870898e-05, "loss": 0.1969, "step": 6254, "teacher_loss": 0.19870467483997345 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.43912485241889954, "learning_rate": 2.7128813069249676e-05, "loss": 0.3575, "step": 6255, "teacher_loss": 0.3484174311161041 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.2224666178226471, "learning_rate": 2.7133150209628453e-05, "loss": 0.2235, "step": 6256, "teacher_loss": 0.2236527055501938 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.4598965644836426, "learning_rate": 2.713748735000723e-05, "loss": 0.3054, "step": 6257, "teacher_loss": 0.28820982575416565 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.1729808896780014, "learning_rate": 2.7141824490386005e-05, "loss": 0.2324, "step": 6258, "teacher_loss": 0.23902226984500885 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.4296117424964905, "learning_rate": 2.7146161630764783e-05, "loss": 0.2808, "step": 6259, "teacher_loss": 0.2642960548400879 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.5139473080635071, "learning_rate": 2.715049877114356e-05, "loss": 0.3018, "step": 6260, "teacher_loss": 0.2782681882381439 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.29237133264541626, "learning_rate": 2.7154835911522335e-05, "loss": 0.2341, "step": 6261, "teacher_loss": 0.22762879729270935 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.46264809370040894, "learning_rate": 2.7159173051901112e-05, "loss": 0.2588, "step": 6262, "teacher_loss": 0.23618757724761963 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.6601729393005371, "learning_rate": 2.716351019227989e-05, "loss": 0.2651, "step": 6263, "teacher_loss": 0.22122298181056976 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.44145405292510986, "learning_rate": 2.7167847332658668e-05, "loss": 0.2789, "step": 6264, "teacher_loss": 0.2608572840690613 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.5389107465744019, "learning_rate": 2.7172184473037445e-05, "loss": 0.2824, "step": 6265, "teacher_loss": 0.25390520691871643 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.22644034028053284, "learning_rate": 2.7176521613416223e-05, "loss": 0.2139, "step": 6266, "teacher_loss": 0.2124623954296112 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.46022841334342957, "learning_rate": 2.7180858753795e-05, "loss": 0.2103, "step": 6267, "teacher_loss": 0.18256577849388123 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.555817723274231, "learning_rate": 2.7185195894173778e-05, "loss": 0.2953, "step": 6268, "teacher_loss": 0.2663660943508148 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.7040103673934937, "learning_rate": 2.7189533034552552e-05, "loss": 0.6986, "step": 6269, "teacher_loss": 0.6980404853820801 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.3168404698371887, "learning_rate": 2.7193870174931327e-05, "loss": 0.2468, "step": 6270, "teacher_loss": 0.23901307582855225 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.42855581641197205, "learning_rate": 2.7198207315310104e-05, "loss": 0.2102, "step": 6271, "teacher_loss": 0.18594574928283691 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.5309900641441345, "learning_rate": 2.7202544455688882e-05, "loss": 0.2689, "step": 6272, "teacher_loss": 0.23974266648292542 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.25031739473342896, "learning_rate": 2.720688159606766e-05, "loss": 0.2974, "step": 6273, "teacher_loss": 0.30264705419540405 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.7180373668670654, "learning_rate": 2.7211218736446437e-05, "loss": 0.2472, "step": 6274, "teacher_loss": 0.19484871625900269 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.3045549988746643, "learning_rate": 2.7215555876825215e-05, "loss": 0.2494, "step": 6275, "teacher_loss": 0.24322381615638733 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.42799344658851624, "learning_rate": 2.7219893017203992e-05, "loss": 0.2419, "step": 6276, "teacher_loss": 0.22117312252521515 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.8396463990211487, "learning_rate": 2.722423015758277e-05, "loss": 0.3573, "step": 6277, "teacher_loss": 0.3036838173866272 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.9512264728546143, "learning_rate": 2.7228567297961547e-05, "loss": 0.2852, "step": 6278, "teacher_loss": 0.2112230509519577 }, { "compression_loss": 0.0, "epoch": 1.13, "label_loss": 0.46625006198883057, "learning_rate": 2.7232904438340322e-05, "loss": 0.2114, "step": 6279, "teacher_loss": 0.18309065699577332 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.47699829936027527, "learning_rate": 2.7237241578719096e-05, "loss": 0.3059, "step": 6280, "teacher_loss": 0.2868354320526123 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.3526540696620941, "learning_rate": 2.7241578719097874e-05, "loss": 0.1733, "step": 6281, "teacher_loss": 0.15342223644256592 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.340457558631897, "learning_rate": 2.724591585947665e-05, "loss": 0.3057, "step": 6282, "teacher_loss": 0.3018187880516052 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.18202698230743408, "learning_rate": 2.725025299985543e-05, "loss": 0.2616, "step": 6283, "teacher_loss": 0.2704620361328125 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.29284486174583435, "learning_rate": 2.7254590140234206e-05, "loss": 0.2263, "step": 6284, "teacher_loss": 0.21886911988258362 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.5829716920852661, "learning_rate": 2.7258927280612984e-05, "loss": 0.283, "step": 6285, "teacher_loss": 0.2496703565120697 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.8197914361953735, "learning_rate": 2.726326442099176e-05, "loss": 0.3171, "step": 6286, "teacher_loss": 0.26123958826065063 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.6244129538536072, "learning_rate": 2.7267601561370536e-05, "loss": 0.2476, "step": 6287, "teacher_loss": 0.2057790756225586 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.8621256351470947, "learning_rate": 2.7271938701749314e-05, "loss": 0.3386, "step": 6288, "teacher_loss": 0.2804277241230011 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.3177306056022644, "learning_rate": 2.727627584212809e-05, "loss": 0.2355, "step": 6289, "teacher_loss": 0.22641147673130035 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4798209071159363, "learning_rate": 2.728061298250687e-05, "loss": 0.2302, "step": 6290, "teacher_loss": 0.2024591863155365 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4372042417526245, "learning_rate": 2.7284950122885643e-05, "loss": 0.2285, "step": 6291, "teacher_loss": 0.20527349412441254 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.34870606660842896, "learning_rate": 2.728928726326442e-05, "loss": 0.2252, "step": 6292, "teacher_loss": 0.21147626638412476 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.27960658073425293, "learning_rate": 2.7293624403643198e-05, "loss": 0.1821, "step": 6293, "teacher_loss": 0.17124134302139282 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.6935462951660156, "learning_rate": 2.7297961544021976e-05, "loss": 0.406, "step": 6294, "teacher_loss": 0.3740018606185913 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4411775469779968, "learning_rate": 2.7302298684400754e-05, "loss": 0.1909, "step": 6295, "teacher_loss": 0.1630869358778 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.23863573372364044, "learning_rate": 2.7306635824779528e-05, "loss": 0.265, "step": 6296, "teacher_loss": 0.267932653427124 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4179011583328247, "learning_rate": 2.7310972965158305e-05, "loss": 0.5044, "step": 6297, "teacher_loss": 0.5139556527137756 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.3631821572780609, "learning_rate": 2.7315310105537083e-05, "loss": 0.2275, "step": 6298, "teacher_loss": 0.21240012347698212 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4780872166156769, "learning_rate": 2.731964724591586e-05, "loss": 0.2341, "step": 6299, "teacher_loss": 0.2069939374923706 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.528892993927002, "learning_rate": 2.7323984386294638e-05, "loss": 0.288, "step": 6300, "teacher_loss": 0.2612582743167877 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4992755651473999, "learning_rate": 2.7328321526673416e-05, "loss": 0.2545, "step": 6301, "teacher_loss": 0.22726961970329285 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.24839726090431213, "learning_rate": 2.733265866705219e-05, "loss": 0.1976, "step": 6302, "teacher_loss": 0.19200240075588226 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.6737362146377563, "learning_rate": 2.7336995807430968e-05, "loss": 0.359, "step": 6303, "teacher_loss": 0.3240630626678467 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.1802452802658081, "learning_rate": 2.7341332947809745e-05, "loss": 0.2328, "step": 6304, "teacher_loss": 0.23865258693695068 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.3792036175727844, "learning_rate": 2.734567008818852e-05, "loss": 0.2496, "step": 6305, "teacher_loss": 0.2351485937833786 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.7047542333602905, "learning_rate": 2.7350007228567297e-05, "loss": 0.2478, "step": 6306, "teacher_loss": 0.19703811407089233 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.38650083541870117, "learning_rate": 2.7354344368946075e-05, "loss": 0.2128, "step": 6307, "teacher_loss": 0.19349591434001923 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.2799943685531616, "learning_rate": 2.7358681509324852e-05, "loss": 0.2799, "step": 6308, "teacher_loss": 0.2798810601234436 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.5141726732254028, "learning_rate": 2.736301864970363e-05, "loss": 0.3894, "step": 6309, "teacher_loss": 0.3755737841129303 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.5539348721504211, "learning_rate": 2.7367355790082408e-05, "loss": 0.3395, "step": 6310, "teacher_loss": 0.3156867027282715 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4410494565963745, "learning_rate": 2.7371692930461185e-05, "loss": 0.2387, "step": 6311, "teacher_loss": 0.21624797582626343 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 1.3120110034942627, "learning_rate": 2.7376030070839963e-05, "loss": 0.4003, "step": 6312, "teacher_loss": 0.298954039812088 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.7790467143058777, "learning_rate": 2.7380367211218737e-05, "loss": 0.2833, "step": 6313, "teacher_loss": 0.22821597754955292 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.42238277196884155, "learning_rate": 2.738470435159751e-05, "loss": 0.2331, "step": 6314, "teacher_loss": 0.2121065855026245 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4535025954246521, "learning_rate": 2.738904149197629e-05, "loss": 0.2444, "step": 6315, "teacher_loss": 0.22115977108478546 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.15294498205184937, "learning_rate": 2.7393378632355067e-05, "loss": 0.1875, "step": 6316, "teacher_loss": 0.19134274125099182 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.37593281269073486, "learning_rate": 2.7397715772733844e-05, "loss": 0.2059, "step": 6317, "teacher_loss": 0.1870015561580658 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.30167967081069946, "learning_rate": 2.7402052913112622e-05, "loss": 0.2312, "step": 6318, "teacher_loss": 0.22331474721431732 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.39041733741760254, "learning_rate": 2.74063900534914e-05, "loss": 0.2816, "step": 6319, "teacher_loss": 0.26952141523361206 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.9085977077484131, "learning_rate": 2.7410727193870177e-05, "loss": 0.3891, "step": 6320, "teacher_loss": 0.33136987686157227 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.3323662281036377, "learning_rate": 2.7415064334248955e-05, "loss": 0.2464, "step": 6321, "teacher_loss": 0.2368084341287613 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.34673595428466797, "learning_rate": 2.7419401474627732e-05, "loss": 0.2072, "step": 6322, "teacher_loss": 0.19164225459098816 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.8913214206695557, "learning_rate": 2.7423738615006507e-05, "loss": 0.3504, "step": 6323, "teacher_loss": 0.2902667820453644 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.3220446705818176, "learning_rate": 2.742807575538528e-05, "loss": 0.2573, "step": 6324, "teacher_loss": 0.2501053810119629 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.6296714544296265, "learning_rate": 2.743241289576406e-05, "loss": 0.2786, "step": 6325, "teacher_loss": 0.23959791660308838 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.7199380397796631, "learning_rate": 2.7436750036142836e-05, "loss": 0.3585, "step": 6326, "teacher_loss": 0.3183591961860657 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.36631840467453003, "learning_rate": 2.7441087176521614e-05, "loss": 0.353, "step": 6327, "teacher_loss": 0.3515172004699707 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4167264699935913, "learning_rate": 2.744542431690039e-05, "loss": 0.2074, "step": 6328, "teacher_loss": 0.18413621187210083 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.7122987508773804, "learning_rate": 2.744976145727917e-05, "loss": 0.2447, "step": 6329, "teacher_loss": 0.19270175695419312 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.31083378195762634, "learning_rate": 2.7454098597657947e-05, "loss": 0.28, "step": 6330, "teacher_loss": 0.27654772996902466 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.6642519235610962, "learning_rate": 2.7458435738036724e-05, "loss": 0.3909, "step": 6331, "teacher_loss": 0.3605444133281708 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.6344103813171387, "learning_rate": 2.74627728784155e-05, "loss": 0.2841, "step": 6332, "teacher_loss": 0.24523112177848816 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4857063293457031, "learning_rate": 2.7467110018794276e-05, "loss": 0.3079, "step": 6333, "teacher_loss": 0.2880992293357849 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.7089545726776123, "learning_rate": 2.7471447159173054e-05, "loss": 0.2574, "step": 6334, "teacher_loss": 0.20726659893989563 }, { "compression_loss": 0.0, "epoch": 1.14, "label_loss": 0.4617321193218231, "learning_rate": 2.7475784299551828e-05, "loss": 0.2639, "step": 6335, "teacher_loss": 0.2419673055410385 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.4766450822353363, "learning_rate": 2.7480121439930606e-05, "loss": 0.2375, "step": 6336, "teacher_loss": 0.21091046929359436 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.5361030697822571, "learning_rate": 2.7484458580309383e-05, "loss": 0.3004, "step": 6337, "teacher_loss": 0.2742132544517517 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.5607529282569885, "learning_rate": 2.748879572068816e-05, "loss": 0.2962, "step": 6338, "teacher_loss": 0.2667674720287323 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.4431915581226349, "learning_rate": 2.749313286106694e-05, "loss": 0.2619, "step": 6339, "teacher_loss": 0.24172131717205048 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.08878196775913239, "learning_rate": 2.7497470001445713e-05, "loss": 0.1881, "step": 6340, "teacher_loss": 0.19911187887191772 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.3350834548473358, "learning_rate": 2.750180714182449e-05, "loss": 0.2095, "step": 6341, "teacher_loss": 0.1955356001853943 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.5581668615341187, "learning_rate": 2.7506144282203268e-05, "loss": 0.2504, "step": 6342, "teacher_loss": 0.21615996956825256 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.22815459966659546, "learning_rate": 2.7510481422582045e-05, "loss": 0.1572, "step": 6343, "teacher_loss": 0.14934581518173218 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.27737295627593994, "learning_rate": 2.7514818562960823e-05, "loss": 0.1542, "step": 6344, "teacher_loss": 0.1405356526374817 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.3505832254886627, "learning_rate": 2.75191557033396e-05, "loss": 0.1634, "step": 6345, "teacher_loss": 0.142632395029068 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.45454078912734985, "learning_rate": 2.7523492843718375e-05, "loss": 0.2362, "step": 6346, "teacher_loss": 0.21190527081489563 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.5701397657394409, "learning_rate": 2.7527829984097153e-05, "loss": 0.2919, "step": 6347, "teacher_loss": 0.2609695792198181 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.6707603335380554, "learning_rate": 2.753216712447593e-05, "loss": 0.2165, "step": 6348, "teacher_loss": 0.16599249839782715 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.4907478094100952, "learning_rate": 2.7536504264854704e-05, "loss": 0.2503, "step": 6349, "teacher_loss": 0.22357916831970215 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.4619969129562378, "learning_rate": 2.7540841405233482e-05, "loss": 0.2638, "step": 6350, "teacher_loss": 0.24181979894638062 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.37157875299453735, "learning_rate": 2.754517854561226e-05, "loss": 0.1963, "step": 6351, "teacher_loss": 0.17678387463092804 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.6931254267692566, "learning_rate": 2.7549515685991037e-05, "loss": 0.3052, "step": 6352, "teacher_loss": 0.26204460859298706 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.2829945385456085, "learning_rate": 2.7553852826369815e-05, "loss": 0.1856, "step": 6353, "teacher_loss": 0.17473077774047852 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.2657064199447632, "learning_rate": 2.7558189966748593e-05, "loss": 0.2268, "step": 6354, "teacher_loss": 0.2224724441766739 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.24362745881080627, "learning_rate": 2.756252710712737e-05, "loss": 0.2257, "step": 6355, "teacher_loss": 0.223673477768898 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.74949049949646, "learning_rate": 2.7566864247506144e-05, "loss": 0.2682, "step": 6356, "teacher_loss": 0.21476420760154724 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.924878716468811, "learning_rate": 2.7571201387884922e-05, "loss": 0.2894, "step": 6357, "teacher_loss": 0.21879440546035767 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.46477210521698, "learning_rate": 2.7575538528263696e-05, "loss": 0.2589, "step": 6358, "teacher_loss": 0.2360476553440094 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.37256282567977905, "learning_rate": 2.7579875668642474e-05, "loss": 0.2644, "step": 6359, "teacher_loss": 0.2523536682128906 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.30953651666641235, "learning_rate": 2.758421280902125e-05, "loss": 0.2095, "step": 6360, "teacher_loss": 0.19843554496765137 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.41758018732070923, "learning_rate": 2.758854994940003e-05, "loss": 0.244, "step": 6361, "teacher_loss": 0.22469252347946167 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.48725467920303345, "learning_rate": 2.7592887089778807e-05, "loss": 0.2627, "step": 6362, "teacher_loss": 0.2377915382385254 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.5599921345710754, "learning_rate": 2.7597224230157584e-05, "loss": 0.2788, "step": 6363, "teacher_loss": 0.24754825234413147 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.30126145482063293, "learning_rate": 2.7601561370536362e-05, "loss": 0.166, "step": 6364, "teacher_loss": 0.15102456510066986 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.41831398010253906, "learning_rate": 2.760589851091514e-05, "loss": 0.2129, "step": 6365, "teacher_loss": 0.19006478786468506 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.34805721044540405, "learning_rate": 2.7610235651293917e-05, "loss": 0.2628, "step": 6366, "teacher_loss": 0.2533630430698395 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.6780663728713989, "learning_rate": 2.7614572791672688e-05, "loss": 0.2985, "step": 6367, "teacher_loss": 0.2562969923019409 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.31010255217552185, "learning_rate": 2.7618909932051466e-05, "loss": 0.2545, "step": 6368, "teacher_loss": 0.24829965829849243 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.11529463529586792, "learning_rate": 2.7623247072430243e-05, "loss": 0.274, "step": 6369, "teacher_loss": 0.29159271717071533 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.33310267329216003, "learning_rate": 2.762758421280902e-05, "loss": 0.1801, "step": 6370, "teacher_loss": 0.16311690211296082 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.21791435778141022, "learning_rate": 2.76319213531878e-05, "loss": 0.2104, "step": 6371, "teacher_loss": 0.20957419276237488 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.7558392286300659, "learning_rate": 2.7636258493566576e-05, "loss": 0.5143, "step": 6372, "teacher_loss": 0.48749488592147827 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.4173157513141632, "learning_rate": 2.7640595633945354e-05, "loss": 0.3341, "step": 6373, "teacher_loss": 0.32487431168556213 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.40799587965011597, "learning_rate": 2.764493277432413e-05, "loss": 0.3826, "step": 6374, "teacher_loss": 0.3797566592693329 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.44847017526626587, "learning_rate": 2.764926991470291e-05, "loss": 0.2812, "step": 6375, "teacher_loss": 0.26265496015548706 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.10728070884943008, "learning_rate": 2.7653607055081683e-05, "loss": 0.1657, "step": 6376, "teacher_loss": 0.17218562960624695 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.31788110733032227, "learning_rate": 2.765794419546046e-05, "loss": 0.1942, "step": 6377, "teacher_loss": 0.18046870827674866 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.7578850984573364, "learning_rate": 2.7662281335839235e-05, "loss": 0.2984, "step": 6378, "teacher_loss": 0.24736475944519043 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.5061662793159485, "learning_rate": 2.7666618476218013e-05, "loss": 0.3042, "step": 6379, "teacher_loss": 0.2817625105381012 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.38620129227638245, "learning_rate": 2.767095561659679e-05, "loss": 0.2618, "step": 6380, "teacher_loss": 0.24792803823947906 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.31748366355895996, "learning_rate": 2.7675292756975568e-05, "loss": 0.24, "step": 6381, "teacher_loss": 0.23133675754070282 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.40637660026550293, "learning_rate": 2.7679629897354346e-05, "loss": 0.3156, "step": 6382, "teacher_loss": 0.3055616319179535 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.2733974754810333, "learning_rate": 2.7683967037733123e-05, "loss": 0.2232, "step": 6383, "teacher_loss": 0.21762242913246155 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.3107895255088806, "learning_rate": 2.7688304178111897e-05, "loss": 0.207, "step": 6384, "teacher_loss": 0.19541393220424652 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.6396842002868652, "learning_rate": 2.7692641318490675e-05, "loss": 0.5034, "step": 6385, "teacher_loss": 0.48821765184402466 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.3159215450286865, "learning_rate": 2.7696978458869453e-05, "loss": 0.2052, "step": 6386, "teacher_loss": 0.19284874200820923 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.25341832637786865, "learning_rate": 2.770131559924823e-05, "loss": 0.2182, "step": 6387, "teacher_loss": 0.21430335938930511 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.869004487991333, "learning_rate": 2.7705652739627008e-05, "loss": 0.4049, "step": 6388, "teacher_loss": 0.3533214330673218 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.23382866382598877, "learning_rate": 2.7709989880005782e-05, "loss": 0.1245, "step": 6389, "teacher_loss": 0.11231796443462372 }, { "compression_loss": 0.0, "epoch": 1.15, "label_loss": 0.3859778046607971, "learning_rate": 2.771432702038456e-05, "loss": 0.2162, "step": 6390, "teacher_loss": 0.19735205173492432 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.652147114276886, "learning_rate": 2.7718664160763337e-05, "loss": 0.3079, "step": 6391, "teacher_loss": 0.26961907744407654 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 1.125064730644226, "learning_rate": 2.7723001301142115e-05, "loss": 0.2868, "step": 6392, "teacher_loss": 0.19364196062088013 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.8060420751571655, "learning_rate": 2.772733844152089e-05, "loss": 0.3543, "step": 6393, "teacher_loss": 0.3040893077850342 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.47238391637802124, "learning_rate": 2.7731675581899667e-05, "loss": 0.2335, "step": 6394, "teacher_loss": 0.20696324110031128 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.3843284249305725, "learning_rate": 2.7736012722278445e-05, "loss": 0.3142, "step": 6395, "teacher_loss": 0.30645751953125 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.42373013496398926, "learning_rate": 2.7740349862657222e-05, "loss": 0.3323, "step": 6396, "teacher_loss": 0.32218581438064575 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.7030600309371948, "learning_rate": 2.7744687003036e-05, "loss": 0.3194, "step": 6397, "teacher_loss": 0.27676939964294434 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.5010223388671875, "learning_rate": 2.7749024143414777e-05, "loss": 0.2565, "step": 6398, "teacher_loss": 0.22930167615413666 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.7198001146316528, "learning_rate": 2.7753361283793555e-05, "loss": 0.2998, "step": 6399, "teacher_loss": 0.2531326115131378 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.41181817650794983, "learning_rate": 2.775769842417233e-05, "loss": 0.2249, "step": 6400, "teacher_loss": 0.20407897233963013 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.7670482397079468, "learning_rate": 2.7762035564551107e-05, "loss": 0.276, "step": 6401, "teacher_loss": 0.2214670479297638 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.5739279389381409, "learning_rate": 2.776637270492988e-05, "loss": 0.2865, "step": 6402, "teacher_loss": 0.2546163499355316 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.9249714612960815, "learning_rate": 2.777070984530866e-05, "loss": 0.804, "step": 6403, "teacher_loss": 0.7905073165893555 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.589565634727478, "learning_rate": 2.7775046985687436e-05, "loss": 0.3068, "step": 6404, "teacher_loss": 0.27539947628974915 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.5522127151489258, "learning_rate": 2.7779384126066214e-05, "loss": 0.3152, "step": 6405, "teacher_loss": 0.2889009118080139 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.42046046257019043, "learning_rate": 2.778372126644499e-05, "loss": 0.2388, "step": 6406, "teacher_loss": 0.21865659952163696 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.35354235768318176, "learning_rate": 2.778805840682377e-05, "loss": 0.2875, "step": 6407, "teacher_loss": 0.28016796708106995 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.4919174313545227, "learning_rate": 2.7792395547202547e-05, "loss": 0.2095, "step": 6408, "teacher_loss": 0.17809349298477173 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.25068795680999756, "learning_rate": 2.7796732687581324e-05, "loss": 0.2386, "step": 6409, "teacher_loss": 0.23725396394729614 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 1.0543603897094727, "learning_rate": 2.7801069827960102e-05, "loss": 0.4795, "step": 6410, "teacher_loss": 0.41560447216033936 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.4111000895500183, "learning_rate": 2.7805406968338873e-05, "loss": 0.2818, "step": 6411, "teacher_loss": 0.2674700915813446 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.35354477167129517, "learning_rate": 2.780974410871765e-05, "loss": 0.2229, "step": 6412, "teacher_loss": 0.2083551287651062 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.34267279505729675, "learning_rate": 2.7814081249096428e-05, "loss": 0.2741, "step": 6413, "teacher_loss": 0.26642781496047974 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.3548782467842102, "learning_rate": 2.7818418389475206e-05, "loss": 0.2437, "step": 6414, "teacher_loss": 0.23138342797756195 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.2918523848056793, "learning_rate": 2.7822755529853983e-05, "loss": 0.2586, "step": 6415, "teacher_loss": 0.25490593910217285 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.4233061969280243, "learning_rate": 2.782709267023276e-05, "loss": 0.3156, "step": 6416, "teacher_loss": 0.3036794066429138 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.4841456413269043, "learning_rate": 2.783142981061154e-05, "loss": 0.2942, "step": 6417, "teacher_loss": 0.273131400346756 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.4677685499191284, "learning_rate": 2.7835766950990316e-05, "loss": 0.2407, "step": 6418, "teacher_loss": 0.2155158817768097 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.8999346494674683, "learning_rate": 2.7840104091369094e-05, "loss": 0.2721, "step": 6419, "teacher_loss": 0.2022942304611206 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.44143545627593994, "learning_rate": 2.7844441231747868e-05, "loss": 0.2192, "step": 6420, "teacher_loss": 0.19446223974227905 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.2656756043434143, "learning_rate": 2.7848778372126646e-05, "loss": 0.1776, "step": 6421, "teacher_loss": 0.16781297326087952 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.3032439053058624, "learning_rate": 2.785311551250542e-05, "loss": 0.2425, "step": 6422, "teacher_loss": 0.23575055599212646 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.6015926003456116, "learning_rate": 2.7857452652884198e-05, "loss": 0.3123, "step": 6423, "teacher_loss": 0.2801334261894226 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.5455918908119202, "learning_rate": 2.7861789793262975e-05, "loss": 0.4244, "step": 6424, "teacher_loss": 0.4109860062599182 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.12163172662258148, "learning_rate": 2.7866126933641753e-05, "loss": 0.2045, "step": 6425, "teacher_loss": 0.21372094750404358 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.36771732568740845, "learning_rate": 2.787046407402053e-05, "loss": 0.2344, "step": 6426, "teacher_loss": 0.2195802927017212 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.45391595363616943, "learning_rate": 2.7874801214399308e-05, "loss": 0.2415, "step": 6427, "teacher_loss": 0.2179490029811859 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.4787466824054718, "learning_rate": 2.7879138354778082e-05, "loss": 0.203, "step": 6428, "teacher_loss": 0.17241042852401733 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.45165571570396423, "learning_rate": 2.788347549515686e-05, "loss": 0.2606, "step": 6429, "teacher_loss": 0.2393563687801361 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.4721669852733612, "learning_rate": 2.7887812635535638e-05, "loss": 0.2889, "step": 6430, "teacher_loss": 0.2685520648956299 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.6080912351608276, "learning_rate": 2.7892149775914415e-05, "loss": 0.2955, "step": 6431, "teacher_loss": 0.26077860593795776 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.38433215022087097, "learning_rate": 2.7896486916293193e-05, "loss": 0.2592, "step": 6432, "teacher_loss": 0.24534134566783905 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.30968979001045227, "learning_rate": 2.7900824056671967e-05, "loss": 0.1914, "step": 6433, "teacher_loss": 0.17823836207389832 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.40799680352211, "learning_rate": 2.7905161197050745e-05, "loss": 0.2744, "step": 6434, "teacher_loss": 0.25960564613342285 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.825791597366333, "learning_rate": 2.7909498337429522e-05, "loss": 0.2314, "step": 6435, "teacher_loss": 0.16538411378860474 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.7094988822937012, "learning_rate": 2.79138354778083e-05, "loss": 0.4116, "step": 6436, "teacher_loss": 0.3785497546195984 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.4178553521633148, "learning_rate": 2.7918172618187074e-05, "loss": 0.3332, "step": 6437, "teacher_loss": 0.3238462805747986 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.528026819229126, "learning_rate": 2.7922509758565852e-05, "loss": 0.3393, "step": 6438, "teacher_loss": 0.31837016344070435 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.5983232855796814, "learning_rate": 2.792684689894463e-05, "loss": 0.426, "step": 6439, "teacher_loss": 0.40688955783843994 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.303390771150589, "learning_rate": 2.7931184039323407e-05, "loss": 0.3675, "step": 6440, "teacher_loss": 0.3746604919433594 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.4176335632801056, "learning_rate": 2.7935521179702185e-05, "loss": 0.3742, "step": 6441, "teacher_loss": 0.3694085478782654 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.5791257619857788, "learning_rate": 2.7939858320080962e-05, "loss": 0.1743, "step": 6442, "teacher_loss": 0.1293712556362152 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.34995460510253906, "learning_rate": 2.794419546045974e-05, "loss": 0.2539, "step": 6443, "teacher_loss": 0.24321651458740234 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.5420399904251099, "learning_rate": 2.7948532600838514e-05, "loss": 0.2735, "step": 6444, "teacher_loss": 0.2436596304178238 }, { "compression_loss": 0.0, "epoch": 1.16, "label_loss": 0.2640339732170105, "learning_rate": 2.7952869741217292e-05, "loss": 0.2675, "step": 6445, "teacher_loss": 0.26785755157470703 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.34202006459236145, "learning_rate": 2.7957206881596066e-05, "loss": 0.2812, "step": 6446, "teacher_loss": 0.2744370102882385 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.37827402353286743, "learning_rate": 2.7961544021974844e-05, "loss": 0.2138, "step": 6447, "teacher_loss": 0.19549629092216492 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.4497312307357788, "learning_rate": 2.796588116235362e-05, "loss": 0.1989, "step": 6448, "teacher_loss": 0.17098847031593323 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.4654259979724884, "learning_rate": 2.79702183027324e-05, "loss": 0.223, "step": 6449, "teacher_loss": 0.19611376523971558 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.41498297452926636, "learning_rate": 2.7974555443111177e-05, "loss": 0.3033, "step": 6450, "teacher_loss": 0.2908935248851776 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.3507779538631439, "learning_rate": 2.7978892583489954e-05, "loss": 0.2721, "step": 6451, "teacher_loss": 0.26335179805755615 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.7619916796684265, "learning_rate": 2.7983229723868732e-05, "loss": 0.2607, "step": 6452, "teacher_loss": 0.20505085587501526 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.49838903546333313, "learning_rate": 2.798756686424751e-05, "loss": 0.2618, "step": 6453, "teacher_loss": 0.2355421483516693 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.3548928499221802, "learning_rate": 2.7991904004626284e-05, "loss": 0.2126, "step": 6454, "teacher_loss": 0.19674894213676453 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.3909909725189209, "learning_rate": 2.7996241145005058e-05, "loss": 0.1766, "step": 6455, "teacher_loss": 0.15283125638961792 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.6628071665763855, "learning_rate": 2.8000578285383835e-05, "loss": 0.3085, "step": 6456, "teacher_loss": 0.2690792381763458 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.43428581953048706, "learning_rate": 2.8004915425762613e-05, "loss": 0.2508, "step": 6457, "teacher_loss": 0.23045343160629272 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.3233993649482727, "learning_rate": 2.800925256614139e-05, "loss": 0.2795, "step": 6458, "teacher_loss": 0.2745901644229889 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.2688933312892914, "learning_rate": 2.801358970652017e-05, "loss": 0.2246, "step": 6459, "teacher_loss": 0.21971148252487183 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.5207000374794006, "learning_rate": 2.8017926846898946e-05, "loss": 0.2268, "step": 6460, "teacher_loss": 0.19416257739067078 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.16202065348625183, "learning_rate": 2.8022263987277724e-05, "loss": 0.2715, "step": 6461, "teacher_loss": 0.28370770812034607 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 1.190521001815796, "learning_rate": 2.80266011276565e-05, "loss": 0.3236, "step": 6462, "teacher_loss": 0.22726945579051971 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.35429704189300537, "learning_rate": 2.803093826803528e-05, "loss": 0.2654, "step": 6463, "teacher_loss": 0.25551217794418335 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.4185071587562561, "learning_rate": 2.8035275408414053e-05, "loss": 0.2358, "step": 6464, "teacher_loss": 0.21551606059074402 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.37365108728408813, "learning_rate": 2.8039612548792827e-05, "loss": 0.2347, "step": 6465, "teacher_loss": 0.21926668286323547 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.4165058434009552, "learning_rate": 2.8043949689171605e-05, "loss": 0.2196, "step": 6466, "teacher_loss": 0.1976912021636963 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.4885011613368988, "learning_rate": 2.8048286829550383e-05, "loss": 0.22, "step": 6467, "teacher_loss": 0.19020432233810425 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.7185476422309875, "learning_rate": 2.805262396992916e-05, "loss": 0.2972, "step": 6468, "teacher_loss": 0.25042128562927246 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.861733078956604, "learning_rate": 2.8056961110307938e-05, "loss": 0.3242, "step": 6469, "teacher_loss": 0.26442667841911316 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.6002152562141418, "learning_rate": 2.8061298250686715e-05, "loss": 0.2609, "step": 6470, "teacher_loss": 0.22321179509162903 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.5005057454109192, "learning_rate": 2.8065635391065493e-05, "loss": 0.3142, "step": 6471, "teacher_loss": 0.2934834957122803 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.3781481683254242, "learning_rate": 2.806997253144427e-05, "loss": 0.3115, "step": 6472, "teacher_loss": 0.30409160256385803 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.6160641312599182, "learning_rate": 2.8074309671823045e-05, "loss": 0.2426, "step": 6473, "teacher_loss": 0.20109286904335022 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.72745680809021, "learning_rate": 2.8078646812201822e-05, "loss": 0.2808, "step": 6474, "teacher_loss": 0.23116064071655273 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.6917792558670044, "learning_rate": 2.80829839525806e-05, "loss": 0.3515, "step": 6475, "teacher_loss": 0.31372568011283875 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.565274715423584, "learning_rate": 2.8087321092959374e-05, "loss": 0.313, "step": 6476, "teacher_loss": 0.2849982976913452 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.19094763696193695, "learning_rate": 2.8091658233338152e-05, "loss": 0.1602, "step": 6477, "teacher_loss": 0.15672853589057922 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.4896821975708008, "learning_rate": 2.809599537371693e-05, "loss": 0.3127, "step": 6478, "teacher_loss": 0.29306167364120483 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.46105965971946716, "learning_rate": 2.8100332514095707e-05, "loss": 0.2, "step": 6479, "teacher_loss": 0.17096011340618134 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.33770179748535156, "learning_rate": 2.8104669654474485e-05, "loss": 0.2038, "step": 6480, "teacher_loss": 0.18888059258460999 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.3461548686027527, "learning_rate": 2.810900679485326e-05, "loss": 0.2147, "step": 6481, "teacher_loss": 0.20009593665599823 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.1822448968887329, "learning_rate": 2.8113343935232037e-05, "loss": 0.3037, "step": 6482, "teacher_loss": 0.3172001838684082 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.4877607822418213, "learning_rate": 2.8117681075610814e-05, "loss": 0.2831, "step": 6483, "teacher_loss": 0.26035308837890625 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.5004500150680542, "learning_rate": 2.8122018215989592e-05, "loss": 0.2864, "step": 6484, "teacher_loss": 0.2626700699329376 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.5033208131790161, "learning_rate": 2.812635535636837e-05, "loss": 0.2337, "step": 6485, "teacher_loss": 0.20375798642635345 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.6915962100028992, "learning_rate": 2.8130692496747147e-05, "loss": 0.4521, "step": 6486, "teacher_loss": 0.4255412518978119 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.655550479888916, "learning_rate": 2.813502963712592e-05, "loss": 0.4921, "step": 6487, "teacher_loss": 0.4739099144935608 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.23684318363666534, "learning_rate": 2.81393667775047e-05, "loss": 0.196, "step": 6488, "teacher_loss": 0.1914912760257721 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.7285459637641907, "learning_rate": 2.8143703917883477e-05, "loss": 0.4511, "step": 6489, "teacher_loss": 0.42024916410446167 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.2557194232940674, "learning_rate": 2.814804105826225e-05, "loss": 0.2404, "step": 6490, "teacher_loss": 0.23871949315071106 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.48193615674972534, "learning_rate": 2.815237819864103e-05, "loss": 0.2937, "step": 6491, "teacher_loss": 0.2728140950202942 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.36556559801101685, "learning_rate": 2.8156715339019806e-05, "loss": 0.2347, "step": 6492, "teacher_loss": 0.22018060088157654 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.5264362692832947, "learning_rate": 2.8161052479398584e-05, "loss": 0.2476, "step": 6493, "teacher_loss": 0.21665555238723755 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.5201779007911682, "learning_rate": 2.816538961977736e-05, "loss": 0.2926, "step": 6494, "teacher_loss": 0.26726996898651123 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.6681616306304932, "learning_rate": 2.816972676015614e-05, "loss": 0.2924, "step": 6495, "teacher_loss": 0.2507016658782959 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.49516937136650085, "learning_rate": 2.8174063900534917e-05, "loss": 0.3344, "step": 6496, "teacher_loss": 0.31656545400619507 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.17480704188346863, "learning_rate": 2.8178401040913694e-05, "loss": 0.2306, "step": 6497, "teacher_loss": 0.23676884174346924 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.428730845451355, "learning_rate": 2.818273818129247e-05, "loss": 0.2939, "step": 6498, "teacher_loss": 0.2789068818092346 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.17639249563217163, "learning_rate": 2.8187075321671243e-05, "loss": 0.1406, "step": 6499, "teacher_loss": 0.13656772673130035 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.42274466156959534, "learning_rate": 2.819141246205002e-05, "loss": 0.2263, "step": 6500, "teacher_loss": 0.204467311501503 }, { "epoch": 1.17, "eval_exact_match": 79.63103122043519, "eval_f1": 87.0633606447321, "step": 6500 }, { "compression_loss": 0.0, "epoch": 1.17, "label_loss": 0.4441624879837036, "learning_rate": 2.8195749602428798e-05, "loss": 0.3149, "step": 6501, "teacher_loss": 0.30049407482147217 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.3862485885620117, "learning_rate": 2.8200086742807576e-05, "loss": 0.222, "step": 6502, "teacher_loss": 0.2037688046693802 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.27982306480407715, "learning_rate": 2.8204423883186353e-05, "loss": 0.2054, "step": 6503, "teacher_loss": 0.19711607694625854 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.4871073365211487, "learning_rate": 2.820876102356513e-05, "loss": 0.2411, "step": 6504, "teacher_loss": 0.21380865573883057 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.36022287607192993, "learning_rate": 2.821309816394391e-05, "loss": 0.3466, "step": 6505, "teacher_loss": 0.3450416326522827 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.4648471474647522, "learning_rate": 2.8217435304322686e-05, "loss": 0.304, "step": 6506, "teacher_loss": 0.28616178035736084 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.22943875193595886, "learning_rate": 2.8221772444701464e-05, "loss": 0.2092, "step": 6507, "teacher_loss": 0.20695193111896515 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.617201030254364, "learning_rate": 2.8226109585080238e-05, "loss": 0.2288, "step": 6508, "teacher_loss": 0.1856650412082672 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.8473166227340698, "learning_rate": 2.8230446725459012e-05, "loss": 0.3197, "step": 6509, "teacher_loss": 0.2610397934913635 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.38283032178878784, "learning_rate": 2.823478386583779e-05, "loss": 0.163, "step": 6510, "teacher_loss": 0.13860836625099182 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.5371262431144714, "learning_rate": 2.8239121006216567e-05, "loss": 0.2377, "step": 6511, "teacher_loss": 0.20439651608467102 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.4067457914352417, "learning_rate": 2.8243458146595345e-05, "loss": 0.2463, "step": 6512, "teacher_loss": 0.22847366333007812 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.3021693825721741, "learning_rate": 2.8247795286974123e-05, "loss": 0.2207, "step": 6513, "teacher_loss": 0.2116512954235077 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.11785378307104111, "learning_rate": 2.82521324273529e-05, "loss": 0.1479, "step": 6514, "teacher_loss": 0.1512056589126587 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.18052011728286743, "learning_rate": 2.8256469567731678e-05, "loss": 0.1785, "step": 6515, "teacher_loss": 0.178291916847229 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.5591962337493896, "learning_rate": 2.8260806708110456e-05, "loss": 0.2511, "step": 6516, "teacher_loss": 0.21683087944984436 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.7162905931472778, "learning_rate": 2.826514384848923e-05, "loss": 0.2837, "step": 6517, "teacher_loss": 0.23564772307872772 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.3718409538269043, "learning_rate": 2.8269480988868007e-05, "loss": 0.2447, "step": 6518, "teacher_loss": 0.23061034083366394 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.7232087850570679, "learning_rate": 2.8273818129246785e-05, "loss": 0.2946, "step": 6519, "teacher_loss": 0.24693775177001953 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.5598447918891907, "learning_rate": 2.827815526962556e-05, "loss": 0.2183, "step": 6520, "teacher_loss": 0.18029527366161346 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.5116032958030701, "learning_rate": 2.8282492410004337e-05, "loss": 0.3776, "step": 6521, "teacher_loss": 0.36270803213119507 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.6097493171691895, "learning_rate": 2.8286829550383114e-05, "loss": 0.2895, "step": 6522, "teacher_loss": 0.25388604402542114 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.48980289697647095, "learning_rate": 2.8291166690761892e-05, "loss": 0.3296, "step": 6523, "teacher_loss": 0.3118417263031006 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.3335012197494507, "learning_rate": 2.829550383114067e-05, "loss": 0.2726, "step": 6524, "teacher_loss": 0.26579126715660095 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.25868645310401917, "learning_rate": 2.8299840971519444e-05, "loss": 0.2219, "step": 6525, "teacher_loss": 0.21784977614879608 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 1.0642547607421875, "learning_rate": 2.830417811189822e-05, "loss": 0.3549, "step": 6526, "teacher_loss": 0.27602994441986084 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.30355343222618103, "learning_rate": 2.8308515252277e-05, "loss": 0.1674, "step": 6527, "teacher_loss": 0.15225505828857422 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.6845999360084534, "learning_rate": 2.8312852392655777e-05, "loss": 0.2762, "step": 6528, "teacher_loss": 0.2308567762374878 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.27771762013435364, "learning_rate": 2.8317189533034554e-05, "loss": 0.2462, "step": 6529, "teacher_loss": 0.24269534647464752 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.21253381669521332, "learning_rate": 2.8321526673413332e-05, "loss": 0.2491, "step": 6530, "teacher_loss": 0.2531481683254242 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.16174480319023132, "learning_rate": 2.8325863813792106e-05, "loss": 0.1774, "step": 6531, "teacher_loss": 0.17912143468856812 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.32329800724983215, "learning_rate": 2.8330200954170884e-05, "loss": 0.4324, "step": 6532, "teacher_loss": 0.4445135295391083 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.3365422785282135, "learning_rate": 2.833453809454966e-05, "loss": 0.3073, "step": 6533, "teacher_loss": 0.3040579855442047 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.46901994943618774, "learning_rate": 2.8338875234928436e-05, "loss": 0.3771, "step": 6534, "teacher_loss": 0.3668709993362427 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.2946434020996094, "learning_rate": 2.8343212375307213e-05, "loss": 0.3345, "step": 6535, "teacher_loss": 0.3389154076576233 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.3871268033981323, "learning_rate": 2.834754951568599e-05, "loss": 0.1954, "step": 6536, "teacher_loss": 0.17414361238479614 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.4594939351081848, "learning_rate": 2.835188665606477e-05, "loss": 0.2725, "step": 6537, "teacher_loss": 0.2517177164554596 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.2386641502380371, "learning_rate": 2.8356223796443546e-05, "loss": 0.2476, "step": 6538, "teacher_loss": 0.24860814213752747 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.330252468585968, "learning_rate": 2.8360560936822324e-05, "loss": 0.2066, "step": 6539, "teacher_loss": 0.192832350730896 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.47183674573898315, "learning_rate": 2.83648980772011e-05, "loss": 0.2883, "step": 6540, "teacher_loss": 0.26787739992141724 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.37910300493240356, "learning_rate": 2.836923521757988e-05, "loss": 0.2663, "step": 6541, "teacher_loss": 0.2537977695465088 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.9918789863586426, "learning_rate": 2.8373572357958653e-05, "loss": 0.597, "step": 6542, "teacher_loss": 0.5531030893325806 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.6531774997711182, "learning_rate": 2.8377909498337428e-05, "loss": 0.2253, "step": 6543, "teacher_loss": 0.17774729430675507 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.4606553912162781, "learning_rate": 2.8382246638716205e-05, "loss": 0.2498, "step": 6544, "teacher_loss": 0.22633501887321472 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 1.1842460632324219, "learning_rate": 2.8386583779094983e-05, "loss": 0.5024, "step": 6545, "teacher_loss": 0.42660731077194214 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 1.2034978866577148, "learning_rate": 2.839092091947376e-05, "loss": 0.3708, "step": 6546, "teacher_loss": 0.27823951840400696 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.4330669045448303, "learning_rate": 2.8395258059852538e-05, "loss": 0.3111, "step": 6547, "teacher_loss": 0.2975241243839264 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.5385148525238037, "learning_rate": 2.8399595200231316e-05, "loss": 0.2926, "step": 6548, "teacher_loss": 0.26525411009788513 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.5660387873649597, "learning_rate": 2.8403932340610093e-05, "loss": 0.2853, "step": 6549, "teacher_loss": 0.2540532946586609 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.20650511980056763, "learning_rate": 2.840826948098887e-05, "loss": 0.1709, "step": 6550, "teacher_loss": 0.1669941395521164 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.7499240636825562, "learning_rate": 2.841260662136765e-05, "loss": 0.3219, "step": 6551, "teacher_loss": 0.27432459592819214 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.616345226764679, "learning_rate": 2.8416943761746423e-05, "loss": 0.3452, "step": 6552, "teacher_loss": 0.31503725051879883 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.5716365575790405, "learning_rate": 2.8421280902125197e-05, "loss": 0.6612, "step": 6553, "teacher_loss": 0.6711024045944214 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.39616528153419495, "learning_rate": 2.8425618042503975e-05, "loss": 0.2395, "step": 6554, "teacher_loss": 0.22213412821292877 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.580011248588562, "learning_rate": 2.8429955182882752e-05, "loss": 0.4085, "step": 6555, "teacher_loss": 0.38944506645202637 }, { "compression_loss": 0.0, "epoch": 1.18, "label_loss": 0.3728428781032562, "learning_rate": 2.843429232326153e-05, "loss": 0.2272, "step": 6556, "teacher_loss": 0.21099919080734253 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.2690393030643463, "learning_rate": 2.8438629463640308e-05, "loss": 0.3013, "step": 6557, "teacher_loss": 0.3049301505088806 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.4846295416355133, "learning_rate": 2.8442966604019085e-05, "loss": 0.3562, "step": 6558, "teacher_loss": 0.3419593572616577 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.4713246822357178, "learning_rate": 2.8447303744397863e-05, "loss": 0.2438, "step": 6559, "teacher_loss": 0.21851110458374023 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 1.3691496849060059, "learning_rate": 2.845164088477664e-05, "loss": 0.6125, "step": 6560, "teacher_loss": 0.5284755229949951 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.514237642288208, "learning_rate": 2.8455978025155415e-05, "loss": 0.4164, "step": 6561, "teacher_loss": 0.4055802822113037 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.2566221356391907, "learning_rate": 2.8460315165534192e-05, "loss": 0.2939, "step": 6562, "teacher_loss": 0.2980666756629944 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.32291653752326965, "learning_rate": 2.846465230591297e-05, "loss": 0.2083, "step": 6563, "teacher_loss": 0.1955586075782776 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.8036931753158569, "learning_rate": 2.8468989446291744e-05, "loss": 0.3654, "step": 6564, "teacher_loss": 0.31672126054763794 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.4677231013774872, "learning_rate": 2.8473326586670522e-05, "loss": 0.3455, "step": 6565, "teacher_loss": 0.3319653868675232 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.5142677426338196, "learning_rate": 2.84776637270493e-05, "loss": 0.3033, "step": 6566, "teacher_loss": 0.2798454463481903 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.2735411524772644, "learning_rate": 2.8482000867428077e-05, "loss": 0.1833, "step": 6567, "teacher_loss": 0.17324435710906982 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.31275707483291626, "learning_rate": 2.8486338007806855e-05, "loss": 0.2251, "step": 6568, "teacher_loss": 0.21538996696472168 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.4707309305667877, "learning_rate": 2.849067514818563e-05, "loss": 0.3363, "step": 6569, "teacher_loss": 0.3213208019733429 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.7752500772476196, "learning_rate": 2.8495012288564406e-05, "loss": 0.304, "step": 6570, "teacher_loss": 0.2516320049762726 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.2664434015750885, "learning_rate": 2.8499349428943184e-05, "loss": 0.2395, "step": 6571, "teacher_loss": 0.23646846413612366 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.41426560282707214, "learning_rate": 2.8503686569321962e-05, "loss": 0.4227, "step": 6572, "teacher_loss": 0.42368924617767334 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.5273882746696472, "learning_rate": 2.850802370970074e-05, "loss": 0.2277, "step": 6573, "teacher_loss": 0.19439566135406494 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.5038734674453735, "learning_rate": 2.8512360850079514e-05, "loss": 0.2748, "step": 6574, "teacher_loss": 0.24934571981430054 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.729423463344574, "learning_rate": 2.851669799045829e-05, "loss": 0.3781, "step": 6575, "teacher_loss": 0.3391048312187195 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.5298543572425842, "learning_rate": 2.852103513083707e-05, "loss": 0.3803, "step": 6576, "teacher_loss": 0.3637186288833618 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.3247554898262024, "learning_rate": 2.8525372271215846e-05, "loss": 0.2286, "step": 6577, "teacher_loss": 0.21786251664161682 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.5020084381103516, "learning_rate": 2.852970941159462e-05, "loss": 0.2289, "step": 6578, "teacher_loss": 0.19850805401802063 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.7192646861076355, "learning_rate": 2.8534046551973398e-05, "loss": 0.3949, "step": 6579, "teacher_loss": 0.35884955525398254 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.18281190097332, "learning_rate": 2.8538383692352176e-05, "loss": 0.1863, "step": 6580, "teacher_loss": 0.1866425722837448 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.251709908246994, "learning_rate": 2.8542720832730954e-05, "loss": 0.1922, "step": 6581, "teacher_loss": 0.18557637929916382 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.5090420842170715, "learning_rate": 2.854705797310973e-05, "loss": 0.2757, "step": 6582, "teacher_loss": 0.2497730851173401 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.3213367462158203, "learning_rate": 2.855139511348851e-05, "loss": 0.2323, "step": 6583, "teacher_loss": 0.22244617342948914 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.34558022022247314, "learning_rate": 2.8555732253867286e-05, "loss": 0.2649, "step": 6584, "teacher_loss": 0.2559163570404053 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.21683089435100555, "learning_rate": 2.856006939424606e-05, "loss": 0.2107, "step": 6585, "teacher_loss": 0.21000029146671295 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.6775074005126953, "learning_rate": 2.8564406534624838e-05, "loss": 0.2808, "step": 6586, "teacher_loss": 0.2367245852947235 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.319302499294281, "learning_rate": 2.8568743675003612e-05, "loss": 0.3563, "step": 6587, "teacher_loss": 0.3603971302509308 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.4299470782279968, "learning_rate": 2.857308081538239e-05, "loss": 0.2441, "step": 6588, "teacher_loss": 0.2234463095664978 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.28330114483833313, "learning_rate": 2.8577417955761168e-05, "loss": 0.2017, "step": 6589, "teacher_loss": 0.1926369071006775 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.2400030493736267, "learning_rate": 2.8581755096139945e-05, "loss": 0.2206, "step": 6590, "teacher_loss": 0.21847641468048096 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.28033968806266785, "learning_rate": 2.8586092236518723e-05, "loss": 0.1746, "step": 6591, "teacher_loss": 0.16286763548851013 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.546729326248169, "learning_rate": 2.85904293768975e-05, "loss": 0.1642, "step": 6592, "teacher_loss": 0.12164296209812164 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.36625936627388, "learning_rate": 2.8594766517276278e-05, "loss": 0.268, "step": 6593, "teacher_loss": 0.257112979888916 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.4279859960079193, "learning_rate": 2.8599103657655056e-05, "loss": 0.2416, "step": 6594, "teacher_loss": 0.22083792090415955 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.5370991826057434, "learning_rate": 2.8603440798033833e-05, "loss": 0.2155, "step": 6595, "teacher_loss": 0.17976446449756622 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.4296658933162689, "learning_rate": 2.8607777938412604e-05, "loss": 0.213, "step": 6596, "teacher_loss": 0.18888802826404572 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.3599996566772461, "learning_rate": 2.8612115078791382e-05, "loss": 0.3225, "step": 6597, "teacher_loss": 0.3183194398880005 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.7844080328941345, "learning_rate": 2.861645221917016e-05, "loss": 0.3243, "step": 6598, "teacher_loss": 0.27320629358291626 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.36250829696655273, "learning_rate": 2.8620789359548937e-05, "loss": 0.2214, "step": 6599, "teacher_loss": 0.20575733482837677 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.26487189531326294, "learning_rate": 2.8625126499927715e-05, "loss": 0.2644, "step": 6600, "teacher_loss": 0.26434916257858276 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.7904253005981445, "learning_rate": 2.8629463640306492e-05, "loss": 0.3388, "step": 6601, "teacher_loss": 0.2886185050010681 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.6863570213317871, "learning_rate": 2.863380078068527e-05, "loss": 0.3363, "step": 6602, "teacher_loss": 0.29735904932022095 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.3948451578617096, "learning_rate": 2.8638137921064048e-05, "loss": 0.2276, "step": 6603, "teacher_loss": 0.2090112417936325 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.6098463535308838, "learning_rate": 2.8642475061442825e-05, "loss": 0.3182, "step": 6604, "teacher_loss": 0.28574201464653015 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.5835431218147278, "learning_rate": 2.86468122018216e-05, "loss": 0.2352, "step": 6605, "teacher_loss": 0.19644707441329956 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.4386568069458008, "learning_rate": 2.8651149342200377e-05, "loss": 0.2379, "step": 6606, "teacher_loss": 0.21559371054172516 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.3548958897590637, "learning_rate": 2.865548648257915e-05, "loss": 0.3312, "step": 6607, "teacher_loss": 0.32857823371887207 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.6731530427932739, "learning_rate": 2.865982362295793e-05, "loss": 0.2753, "step": 6608, "teacher_loss": 0.23104353249073029 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.10890144854784012, "learning_rate": 2.8664160763336707e-05, "loss": 0.158, "step": 6609, "teacher_loss": 0.16347917914390564 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.8006898164749146, "learning_rate": 2.8668497903715484e-05, "loss": 0.4598, "step": 6610, "teacher_loss": 0.42188167572021484 }, { "compression_loss": 0.0, "epoch": 1.19, "label_loss": 0.3550529479980469, "learning_rate": 2.8672835044094262e-05, "loss": 0.2205, "step": 6611, "teacher_loss": 0.20557433366775513 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.5019738674163818, "learning_rate": 2.867717218447304e-05, "loss": 0.4294, "step": 6612, "teacher_loss": 0.42134717106819153 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.3378683924674988, "learning_rate": 2.8681509324851817e-05, "loss": 0.2179, "step": 6613, "teacher_loss": 0.2046101838350296 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.3226926922798157, "learning_rate": 2.868584646523059e-05, "loss": 0.2946, "step": 6614, "teacher_loss": 0.2915023863315582 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.5214836001396179, "learning_rate": 2.869018360560937e-05, "loss": 0.2337, "step": 6615, "teacher_loss": 0.20174537599086761 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 1.2825591564178467, "learning_rate": 2.8694520745988147e-05, "loss": 0.4112, "step": 6616, "teacher_loss": 0.3143288493156433 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.5607950091362, "learning_rate": 2.8698857886366924e-05, "loss": 0.2505, "step": 6617, "teacher_loss": 0.21606168150901794 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.6755258440971375, "learning_rate": 2.87031950267457e-05, "loss": 0.3026, "step": 6618, "teacher_loss": 0.2611140310764313 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.3434546887874603, "learning_rate": 2.8707532167124476e-05, "loss": 0.2245, "step": 6619, "teacher_loss": 0.21127015352249146 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.3250124454498291, "learning_rate": 2.8711869307503254e-05, "loss": 0.1893, "step": 6620, "teacher_loss": 0.1742463856935501 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.33393198251724243, "learning_rate": 2.871620644788203e-05, "loss": 0.2183, "step": 6621, "teacher_loss": 0.2055039405822754 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.35815316438674927, "learning_rate": 2.8720543588260806e-05, "loss": 0.2118, "step": 6622, "teacher_loss": 0.19549237191677094 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.47120630741119385, "learning_rate": 2.8724880728639583e-05, "loss": 0.2138, "step": 6623, "teacher_loss": 0.18520459532737732 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.30158618092536926, "learning_rate": 2.872921786901836e-05, "loss": 0.2376, "step": 6624, "teacher_loss": 0.23050986230373383 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.3577457070350647, "learning_rate": 2.873355500939714e-05, "loss": 0.366, "step": 6625, "teacher_loss": 0.36687615513801575 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.5216197967529297, "learning_rate": 2.8737892149775916e-05, "loss": 0.2291, "step": 6626, "teacher_loss": 0.1965874582529068 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.4354352355003357, "learning_rate": 2.8742229290154694e-05, "loss": 0.2185, "step": 6627, "teacher_loss": 0.19436612725257874 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.4702063202857971, "learning_rate": 2.874656643053347e-05, "loss": 0.2932, "step": 6628, "teacher_loss": 0.2735503911972046 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.398676335811615, "learning_rate": 2.8750903570912246e-05, "loss": 0.2501, "step": 6629, "teacher_loss": 0.2335902750492096 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.3202178478240967, "learning_rate": 2.8755240711291023e-05, "loss": 0.3217, "step": 6630, "teacher_loss": 0.3218112587928772 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.2990373969078064, "learning_rate": 2.8759577851669797e-05, "loss": 0.2986, "step": 6631, "teacher_loss": 0.29856571555137634 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 1.0931466817855835, "learning_rate": 2.8763914992048575e-05, "loss": 0.4435, "step": 6632, "teacher_loss": 0.3712702989578247 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.5398867726325989, "learning_rate": 2.8768252132427353e-05, "loss": 0.2397, "step": 6633, "teacher_loss": 0.20639733970165253 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.4573324918746948, "learning_rate": 2.877258927280613e-05, "loss": 0.2388, "step": 6634, "teacher_loss": 0.2145230621099472 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.28099197149276733, "learning_rate": 2.8776926413184908e-05, "loss": 0.2576, "step": 6635, "teacher_loss": 0.25499945878982544 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.3202160596847534, "learning_rate": 2.8781263553563685e-05, "loss": 0.3592, "step": 6636, "teacher_loss": 0.3635649085044861 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.26653724908828735, "learning_rate": 2.8785600693942463e-05, "loss": 0.1894, "step": 6637, "teacher_loss": 0.18082335591316223 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.3379601836204529, "learning_rate": 2.878993783432124e-05, "loss": 0.2285, "step": 6638, "teacher_loss": 0.21635481715202332 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.577174186706543, "learning_rate": 2.879427497470002e-05, "loss": 0.2514, "step": 6639, "teacher_loss": 0.21521402895450592 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.4185717701911926, "learning_rate": 2.879861211507879e-05, "loss": 0.2294, "step": 6640, "teacher_loss": 0.20843356847763062 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.6753969788551331, "learning_rate": 2.8802949255457567e-05, "loss": 0.2656, "step": 6641, "teacher_loss": 0.2200138121843338 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.7188202738761902, "learning_rate": 2.8807286395836344e-05, "loss": 0.4033, "step": 6642, "teacher_loss": 0.36827534437179565 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.49699872732162476, "learning_rate": 2.8811623536215122e-05, "loss": 0.3033, "step": 6643, "teacher_loss": 0.28176286816596985 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.45720675587654114, "learning_rate": 2.88159606765939e-05, "loss": 0.2886, "step": 6644, "teacher_loss": 0.2698439359664917 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.6596108675003052, "learning_rate": 2.8820297816972677e-05, "loss": 0.216, "step": 6645, "teacher_loss": 0.1666649580001831 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.6086090207099915, "learning_rate": 2.8824634957351455e-05, "loss": 0.3046, "step": 6646, "teacher_loss": 0.27079203724861145 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.4501751661300659, "learning_rate": 2.8828972097730233e-05, "loss": 0.2966, "step": 6647, "teacher_loss": 0.27955523133277893 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.06954265385866165, "learning_rate": 2.883330923810901e-05, "loss": 0.3078, "step": 6648, "teacher_loss": 0.33429253101348877 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.40952762961387634, "learning_rate": 2.8837646378487784e-05, "loss": 0.2356, "step": 6649, "teacher_loss": 0.216274693608284 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.6217672824859619, "learning_rate": 2.8841983518866562e-05, "loss": 0.2936, "step": 6650, "teacher_loss": 0.25718235969543457 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.5222299098968506, "learning_rate": 2.8846320659245336e-05, "loss": 0.3137, "step": 6651, "teacher_loss": 0.29052168130874634 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.3392089009284973, "learning_rate": 2.8850657799624114e-05, "loss": 0.2378, "step": 6652, "teacher_loss": 0.2265438735485077 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.687258780002594, "learning_rate": 2.885499494000289e-05, "loss": 0.4006, "step": 6653, "teacher_loss": 0.3688029646873474 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.520557165145874, "learning_rate": 2.885933208038167e-05, "loss": 0.2989, "step": 6654, "teacher_loss": 0.27431195974349976 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.29005491733551025, "learning_rate": 2.8863669220760447e-05, "loss": 0.2166, "step": 6655, "teacher_loss": 0.20847874879837036 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.256401389837265, "learning_rate": 2.8868006361139224e-05, "loss": 0.2782, "step": 6656, "teacher_loss": 0.2806296944618225 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.20678222179412842, "learning_rate": 2.8872343501518002e-05, "loss": 0.3778, "step": 6657, "teacher_loss": 0.39678436517715454 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.4107311964035034, "learning_rate": 2.8876680641896776e-05, "loss": 0.2398, "step": 6658, "teacher_loss": 0.22079822421073914 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.8440202474594116, "learning_rate": 2.8881017782275554e-05, "loss": 0.2901, "step": 6659, "teacher_loss": 0.22854480147361755 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.19675973057746887, "learning_rate": 2.888535492265433e-05, "loss": 0.1634, "step": 6660, "teacher_loss": 0.15974470973014832 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.60176020860672, "learning_rate": 2.888969206303311e-05, "loss": 0.3025, "step": 6661, "teacher_loss": 0.26924076676368713 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.5388343930244446, "learning_rate": 2.8894029203411883e-05, "loss": 0.2981, "step": 6662, "teacher_loss": 0.27132290601730347 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.23070412874221802, "learning_rate": 2.889836634379066e-05, "loss": 0.2998, "step": 6663, "teacher_loss": 0.3074338734149933 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.4498476982116699, "learning_rate": 2.890270348416944e-05, "loss": 0.2971, "step": 6664, "teacher_loss": 0.28008365631103516 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.7022789120674133, "learning_rate": 2.8907040624548216e-05, "loss": 0.2903, "step": 6665, "teacher_loss": 0.24454087018966675 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.2166617214679718, "learning_rate": 2.891137776492699e-05, "loss": 0.1539, "step": 6666, "teacher_loss": 0.1469482183456421 }, { "compression_loss": 0.0, "epoch": 1.2, "label_loss": 0.26323434710502625, "learning_rate": 2.8915714905305768e-05, "loss": 0.2308, "step": 6667, "teacher_loss": 0.22717049717903137 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.46240225434303284, "learning_rate": 2.8920052045684546e-05, "loss": 0.2526, "step": 6668, "teacher_loss": 0.22924943268299103 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.5485644936561584, "learning_rate": 2.8924389186063323e-05, "loss": 0.2741, "step": 6669, "teacher_loss": 0.24356421828269958 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.509672999382019, "learning_rate": 2.89287263264421e-05, "loss": 0.2127, "step": 6670, "teacher_loss": 0.17970570921897888 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.17245179414749146, "learning_rate": 2.893306346682088e-05, "loss": 0.221, "step": 6671, "teacher_loss": 0.226434588432312 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.4979356527328491, "learning_rate": 2.8937400607199653e-05, "loss": 0.3593, "step": 6672, "teacher_loss": 0.34384244680404663 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.6386487483978271, "learning_rate": 2.894173774757843e-05, "loss": 0.389, "step": 6673, "teacher_loss": 0.36125892400741577 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.5501528978347778, "learning_rate": 2.8946074887957208e-05, "loss": 0.2757, "step": 6674, "teacher_loss": 0.24518704414367676 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.3335050940513611, "learning_rate": 2.8950412028335982e-05, "loss": 0.256, "step": 6675, "teacher_loss": 0.24737989902496338 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.6204159259796143, "learning_rate": 2.895474916871476e-05, "loss": 0.3445, "step": 6676, "teacher_loss": 0.31383490562438965 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.45950818061828613, "learning_rate": 2.8959086309093537e-05, "loss": 0.2217, "step": 6677, "teacher_loss": 0.19529595971107483 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.7635051608085632, "learning_rate": 2.8963423449472315e-05, "loss": 0.2965, "step": 6678, "teacher_loss": 0.24461869895458221 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.32681602239608765, "learning_rate": 2.8967760589851093e-05, "loss": 0.2042, "step": 6679, "teacher_loss": 0.19054418802261353 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.6956307888031006, "learning_rate": 2.897209773022987e-05, "loss": 0.2578, "step": 6680, "teacher_loss": 0.20915260910987854 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.5187751650810242, "learning_rate": 2.8976434870608648e-05, "loss": 0.2748, "step": 6681, "teacher_loss": 0.2476627379655838 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.19392740726470947, "learning_rate": 2.8980772010987426e-05, "loss": 0.166, "step": 6682, "teacher_loss": 0.16284844279289246 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.37802237272262573, "learning_rate": 2.89851091513662e-05, "loss": 0.2296, "step": 6683, "teacher_loss": 0.21308761835098267 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.7346153259277344, "learning_rate": 2.8989446291744974e-05, "loss": 0.2987, "step": 6684, "teacher_loss": 0.25031134486198425 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.3306788504123688, "learning_rate": 2.899378343212375e-05, "loss": 0.1675, "step": 6685, "teacher_loss": 0.14933837950229645 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.11020047217607498, "learning_rate": 2.899812057250253e-05, "loss": 0.1711, "step": 6686, "teacher_loss": 0.1778886914253235 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.21904563903808594, "learning_rate": 2.9002457712881307e-05, "loss": 0.2115, "step": 6687, "teacher_loss": 0.2106417715549469 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.7435877323150635, "learning_rate": 2.9006794853260085e-05, "loss": 0.2854, "step": 6688, "teacher_loss": 0.23450762033462524 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.31456121802330017, "learning_rate": 2.9011131993638862e-05, "loss": 0.2086, "step": 6689, "teacher_loss": 0.1968565583229065 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.8701610565185547, "learning_rate": 2.901546913401764e-05, "loss": 0.3369, "step": 6690, "teacher_loss": 0.27767497301101685 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.5021225214004517, "learning_rate": 2.9019806274396417e-05, "loss": 0.6658, "step": 6691, "teacher_loss": 0.6840010285377502 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.7837206721305847, "learning_rate": 2.9024143414775195e-05, "loss": 0.3258, "step": 6692, "teacher_loss": 0.2749195098876953 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.18329495191574097, "learning_rate": 2.902848055515397e-05, "loss": 0.1873, "step": 6693, "teacher_loss": 0.18776541948318481 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.314161479473114, "learning_rate": 2.9032817695532744e-05, "loss": 0.2779, "step": 6694, "teacher_loss": 0.2738340497016907 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.4953298568725586, "learning_rate": 2.903715483591152e-05, "loss": 0.2429, "step": 6695, "teacher_loss": 0.2148183435201645 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.3808950185775757, "learning_rate": 2.90414919762903e-05, "loss": 0.3526, "step": 6696, "teacher_loss": 0.3495006561279297 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.13821841776371002, "learning_rate": 2.9045829116669076e-05, "loss": 0.1962, "step": 6697, "teacher_loss": 0.20265763998031616 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.3015595078468323, "learning_rate": 2.9050166257047854e-05, "loss": 0.2276, "step": 6698, "teacher_loss": 0.21937115490436554 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.404385507106781, "learning_rate": 2.905450339742663e-05, "loss": 0.2427, "step": 6699, "teacher_loss": 0.2246958315372467 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.6914939284324646, "learning_rate": 2.905884053780541e-05, "loss": 0.3853, "step": 6700, "teacher_loss": 0.3512588143348694 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.376018762588501, "learning_rate": 2.9063177678184187e-05, "loss": 0.3181, "step": 6701, "teacher_loss": 0.3116256594657898 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.5555792450904846, "learning_rate": 2.906751481856296e-05, "loss": 0.2687, "step": 6702, "teacher_loss": 0.2368665337562561 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.2588590681552887, "learning_rate": 2.907185195894174e-05, "loss": 0.2589, "step": 6703, "teacher_loss": 0.25895532965660095 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.6085354685783386, "learning_rate": 2.9076189099320516e-05, "loss": 0.2443, "step": 6704, "teacher_loss": 0.20385757088661194 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.49690741300582886, "learning_rate": 2.908052623969929e-05, "loss": 0.2494, "step": 6705, "teacher_loss": 0.22194766998291016 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.2839939594268799, "learning_rate": 2.9084863380078068e-05, "loss": 0.3178, "step": 6706, "teacher_loss": 0.32156872749328613 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.29617321491241455, "learning_rate": 2.9089200520456846e-05, "loss": 0.2386, "step": 6707, "teacher_loss": 0.23216180503368378 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.6152980327606201, "learning_rate": 2.9093537660835623e-05, "loss": 0.3247, "step": 6708, "teacher_loss": 0.2924190163612366 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.2449013739824295, "learning_rate": 2.90978748012144e-05, "loss": 0.1819, "step": 6709, "teacher_loss": 0.174947589635849 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 1.5153051614761353, "learning_rate": 2.9102211941593175e-05, "loss": 0.7138, "step": 6710, "teacher_loss": 0.624751091003418 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.5430542230606079, "learning_rate": 2.9106549081971953e-05, "loss": 0.2183, "step": 6711, "teacher_loss": 0.18225625157356262 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.613983154296875, "learning_rate": 2.911088622235073e-05, "loss": 0.2474, "step": 6712, "teacher_loss": 0.2066924273967743 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.4491587281227112, "learning_rate": 2.9115223362729508e-05, "loss": 0.4486, "step": 6713, "teacher_loss": 0.448542058467865 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.2147151529788971, "learning_rate": 2.9119560503108286e-05, "loss": 0.2123, "step": 6714, "teacher_loss": 0.21206744015216827 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.3600481152534485, "learning_rate": 2.9123897643487063e-05, "loss": 0.2656, "step": 6715, "teacher_loss": 0.25508952140808105 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.26259976625442505, "learning_rate": 2.9128234783865838e-05, "loss": 0.2581, "step": 6716, "teacher_loss": 0.25764578580856323 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.2889711856842041, "learning_rate": 2.9132571924244615e-05, "loss": 0.3403, "step": 6717, "teacher_loss": 0.3460536003112793 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.5844759345054626, "learning_rate": 2.9136909064623393e-05, "loss": 0.3128, "step": 6718, "teacher_loss": 0.28263533115386963 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.35446763038635254, "learning_rate": 2.9141246205002167e-05, "loss": 0.2943, "step": 6719, "teacher_loss": 0.2875811457633972 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.19606265425682068, "learning_rate": 2.9145583345380945e-05, "loss": 0.3495, "step": 6720, "teacher_loss": 0.36655569076538086 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.4621555805206299, "learning_rate": 2.9149920485759722e-05, "loss": 0.3256, "step": 6721, "teacher_loss": 0.3104441165924072 }, { "compression_loss": 0.0, "epoch": 1.21, "label_loss": 0.23333001136779785, "learning_rate": 2.91542576261385e-05, "loss": 0.2579, "step": 6722, "teacher_loss": 0.2606227993965149 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.39802294969558716, "learning_rate": 2.9158594766517278e-05, "loss": 0.2167, "step": 6723, "teacher_loss": 0.19659563899040222 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.4610007405281067, "learning_rate": 2.9162931906896055e-05, "loss": 0.2659, "step": 6724, "teacher_loss": 0.24419671297073364 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.2381897270679474, "learning_rate": 2.9167269047274833e-05, "loss": 0.1551, "step": 6725, "teacher_loss": 0.14586922526359558 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.22514736652374268, "learning_rate": 2.917160618765361e-05, "loss": 0.2137, "step": 6726, "teacher_loss": 0.21245279908180237 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.2898510694503784, "learning_rate": 2.9175943328032385e-05, "loss": 0.2542, "step": 6727, "teacher_loss": 0.2502511739730835 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.8047339916229248, "learning_rate": 2.918028046841116e-05, "loss": 0.5926, "step": 6728, "teacher_loss": 0.5690531730651855 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.45106422901153564, "learning_rate": 2.9184617608789937e-05, "loss": 0.1948, "step": 6729, "teacher_loss": 0.16636264324188232 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.529430091381073, "learning_rate": 2.9188954749168714e-05, "loss": 0.2796, "step": 6730, "teacher_loss": 0.2518298625946045 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.42361733317375183, "learning_rate": 2.9193291889547492e-05, "loss": 0.3716, "step": 6731, "teacher_loss": 0.36581915616989136 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.49601200222969055, "learning_rate": 2.919762902992627e-05, "loss": 0.1831, "step": 6732, "teacher_loss": 0.14833585917949677 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.36111313104629517, "learning_rate": 2.9201966170305047e-05, "loss": 0.2332, "step": 6733, "teacher_loss": 0.2189566195011139 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.4475724697113037, "learning_rate": 2.9206303310683825e-05, "loss": 0.4658, "step": 6734, "teacher_loss": 0.4678168296813965 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.8518809080123901, "learning_rate": 2.9210640451062602e-05, "loss": 0.319, "step": 6735, "teacher_loss": 0.2597949206829071 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.17262616753578186, "learning_rate": 2.921497759144138e-05, "loss": 0.1578, "step": 6736, "teacher_loss": 0.15610334277153015 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.5860235691070557, "learning_rate": 2.9219314731820154e-05, "loss": 0.344, "step": 6737, "teacher_loss": 0.31710493564605713 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.4595158100128174, "learning_rate": 2.922365187219893e-05, "loss": 0.2374, "step": 6738, "teacher_loss": 0.21273310482501984 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.28825536370277405, "learning_rate": 2.9227989012577706e-05, "loss": 0.1638, "step": 6739, "teacher_loss": 0.14993247389793396 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.44624924659729004, "learning_rate": 2.9232326152956484e-05, "loss": 0.2774, "step": 6740, "teacher_loss": 0.2586672306060791 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.8333386182785034, "learning_rate": 2.923666329333526e-05, "loss": 0.3883, "step": 6741, "teacher_loss": 0.3388686776161194 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.20976148545742035, "learning_rate": 2.924100043371404e-05, "loss": 0.2282, "step": 6742, "teacher_loss": 0.23023319244384766 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.28107914328575134, "learning_rate": 2.9245337574092816e-05, "loss": 0.1964, "step": 6743, "teacher_loss": 0.18703222274780273 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.5735495090484619, "learning_rate": 2.9249674714471594e-05, "loss": 0.24, "step": 6744, "teacher_loss": 0.20295582711696625 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.5373222827911377, "learning_rate": 2.9254011854850372e-05, "loss": 0.2017, "step": 6745, "teacher_loss": 0.16438668966293335 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.41825371980667114, "learning_rate": 2.9258348995229146e-05, "loss": 0.3412, "step": 6746, "teacher_loss": 0.33268415927886963 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.4269944429397583, "learning_rate": 2.9262686135607924e-05, "loss": 0.1984, "step": 6747, "teacher_loss": 0.17300693690776825 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.5833960175514221, "learning_rate": 2.92670232759867e-05, "loss": 0.3967, "step": 6748, "teacher_loss": 0.37595587968826294 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.23058518767356873, "learning_rate": 2.9271360416365475e-05, "loss": 0.1651, "step": 6749, "teacher_loss": 0.15784680843353271 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.47358250617980957, "learning_rate": 2.9275697556744253e-05, "loss": 0.2516, "step": 6750, "teacher_loss": 0.22691065073013306 }, { "epoch": 1.22, "eval_exact_match": 79.47019867549669, "eval_f1": 87.08673632005052, "step": 6750 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.14281851053237915, "learning_rate": 2.928003469712303e-05, "loss": 0.1492, "step": 6751, "teacher_loss": 0.149949312210083 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.5668355822563171, "learning_rate": 2.928437183750181e-05, "loss": 0.3003, "step": 6752, "teacher_loss": 0.27065181732177734 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.26105135679244995, "learning_rate": 2.9288708977880586e-05, "loss": 0.1884, "step": 6753, "teacher_loss": 0.18028172850608826 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.31634920835494995, "learning_rate": 2.929304611825936e-05, "loss": 0.2223, "step": 6754, "teacher_loss": 0.2118682563304901 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.36808857321739197, "learning_rate": 2.9297383258638138e-05, "loss": 0.1891, "step": 6755, "teacher_loss": 0.16924867033958435 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.6843034029006958, "learning_rate": 2.9301720399016915e-05, "loss": 0.4061, "step": 6756, "teacher_loss": 0.37513428926467896 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.251117080450058, "learning_rate": 2.9306057539395693e-05, "loss": 0.2274, "step": 6757, "teacher_loss": 0.22479116916656494 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.7825935482978821, "learning_rate": 2.931039467977447e-05, "loss": 0.322, "step": 6758, "teacher_loss": 0.2708261013031006 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.32662642002105713, "learning_rate": 2.9314731820153248e-05, "loss": 0.1781, "step": 6759, "teacher_loss": 0.1616249829530716 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.4021502137184143, "learning_rate": 2.9319068960532023e-05, "loss": 0.3657, "step": 6760, "teacher_loss": 0.3616999387741089 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.3031701445579529, "learning_rate": 2.93234061009108e-05, "loss": 0.2345, "step": 6761, "teacher_loss": 0.2269253134727478 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.36989375948905945, "learning_rate": 2.9327743241289578e-05, "loss": 0.2905, "step": 6762, "teacher_loss": 0.2817283272743225 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.18691489100456238, "learning_rate": 2.9332080381668352e-05, "loss": 0.2453, "step": 6763, "teacher_loss": 0.2517717480659485 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.8611816167831421, "learning_rate": 2.933641752204713e-05, "loss": 0.5945, "step": 6764, "teacher_loss": 0.5648839473724365 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.3448590040206909, "learning_rate": 2.9340754662425907e-05, "loss": 0.2392, "step": 6765, "teacher_loss": 0.22744786739349365 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.5437180995941162, "learning_rate": 2.9345091802804685e-05, "loss": 0.4036, "step": 6766, "teacher_loss": 0.3880671262741089 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.6874891519546509, "learning_rate": 2.9349428943183462e-05, "loss": 0.3058, "step": 6767, "teacher_loss": 0.26336002349853516 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.5390881299972534, "learning_rate": 2.935376608356224e-05, "loss": 0.5098, "step": 6768, "teacher_loss": 0.5065224766731262 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.1827385425567627, "learning_rate": 2.9358103223941018e-05, "loss": 0.2088, "step": 6769, "teacher_loss": 0.2116827368736267 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.46948230266571045, "learning_rate": 2.9362440364319792e-05, "loss": 0.2793, "step": 6770, "teacher_loss": 0.2581137418746948 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.23580576479434967, "learning_rate": 2.936677750469857e-05, "loss": 0.187, "step": 6771, "teacher_loss": 0.181581050157547 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.7922438383102417, "learning_rate": 2.9371114645077344e-05, "loss": 0.2944, "step": 6772, "teacher_loss": 0.23913924396038055 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.3974483013153076, "learning_rate": 2.937545178545612e-05, "loss": 0.2784, "step": 6773, "teacher_loss": 0.2651284337043762 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.4929448366165161, "learning_rate": 2.93797889258349e-05, "loss": 0.2425, "step": 6774, "teacher_loss": 0.21462717652320862 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.5806875824928284, "learning_rate": 2.9384126066213677e-05, "loss": 0.3026, "step": 6775, "teacher_loss": 0.27164673805236816 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.2874259948730469, "learning_rate": 2.9388463206592454e-05, "loss": 0.2783, "step": 6776, "teacher_loss": 0.2772985100746155 }, { "compression_loss": 0.0, "epoch": 1.22, "label_loss": 0.5340349674224854, "learning_rate": 2.9392800346971232e-05, "loss": 0.2609, "step": 6777, "teacher_loss": 0.2305103838443756 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.7590471506118774, "learning_rate": 2.939713748735001e-05, "loss": 0.2973, "step": 6778, "teacher_loss": 0.24595658481121063 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.4682631492614746, "learning_rate": 2.9401474627728787e-05, "loss": 0.2181, "step": 6779, "teacher_loss": 0.1903439164161682 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.24390703439712524, "learning_rate": 2.9405811768107565e-05, "loss": 0.2025, "step": 6780, "teacher_loss": 0.19793207943439484 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.6501028537750244, "learning_rate": 2.9410148908486336e-05, "loss": 0.3255, "step": 6781, "teacher_loss": 0.28944146633148193 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.20671355724334717, "learning_rate": 2.9414486048865113e-05, "loss": 0.2272, "step": 6782, "teacher_loss": 0.22946389019489288 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.462342232465744, "learning_rate": 2.941882318924389e-05, "loss": 0.281, "step": 6783, "teacher_loss": 0.26079607009887695 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.6197547912597656, "learning_rate": 2.942316032962267e-05, "loss": 0.3039, "step": 6784, "teacher_loss": 0.2688131034374237 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.46017757058143616, "learning_rate": 2.9427497470001446e-05, "loss": 0.3332, "step": 6785, "teacher_loss": 0.31913435459136963 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.49768027663230896, "learning_rate": 2.9431834610380224e-05, "loss": 0.2101, "step": 6786, "teacher_loss": 0.17813362181186676 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.5406091213226318, "learning_rate": 2.9436171750759e-05, "loss": 0.3867, "step": 6787, "teacher_loss": 0.36960452795028687 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.3446793258190155, "learning_rate": 2.944050889113778e-05, "loss": 0.268, "step": 6788, "teacher_loss": 0.25949758291244507 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.6057062149047852, "learning_rate": 2.9444846031516557e-05, "loss": 0.3534, "step": 6789, "teacher_loss": 0.32541871070861816 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.3547956347465515, "learning_rate": 2.944918317189533e-05, "loss": 0.3472, "step": 6790, "teacher_loss": 0.3463127613067627 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.12161026149988174, "learning_rate": 2.945352031227411e-05, "loss": 0.225, "step": 6791, "teacher_loss": 0.2365073263645172 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.7441861033439636, "learning_rate": 2.9457857452652883e-05, "loss": 0.2944, "step": 6792, "teacher_loss": 0.24440601468086243 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.30074024200439453, "learning_rate": 2.946219459303166e-05, "loss": 0.2229, "step": 6793, "teacher_loss": 0.21424353122711182 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.8027220368385315, "learning_rate": 2.9466531733410438e-05, "loss": 0.29, "step": 6794, "teacher_loss": 0.2330566644668579 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.8364348411560059, "learning_rate": 2.9470868873789216e-05, "loss": 0.3632, "step": 6795, "teacher_loss": 0.3106558322906494 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.28005072474479675, "learning_rate": 2.9475206014167993e-05, "loss": 0.2533, "step": 6796, "teacher_loss": 0.2503625154495239 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.21674242615699768, "learning_rate": 2.947954315454677e-05, "loss": 0.1867, "step": 6797, "teacher_loss": 0.18339872360229492 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.26481059193611145, "learning_rate": 2.948388029492555e-05, "loss": 0.2692, "step": 6798, "teacher_loss": 0.269730806350708 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.5599538087844849, "learning_rate": 2.9488217435304323e-05, "loss": 0.2251, "step": 6799, "teacher_loss": 0.1878460943698883 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.7688077688217163, "learning_rate": 2.94925545756831e-05, "loss": 0.3846, "step": 6800, "teacher_loss": 0.34188467264175415 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.5688630938529968, "learning_rate": 2.9496891716061878e-05, "loss": 0.3011, "step": 6801, "teacher_loss": 0.27129557728767395 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.5558436512947083, "learning_rate": 2.9501228856440656e-05, "loss": 0.256, "step": 6802, "teacher_loss": 0.2227122187614441 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.39539194107055664, "learning_rate": 2.950556599681943e-05, "loss": 0.2597, "step": 6803, "teacher_loss": 0.2446136474609375 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.2846057415008545, "learning_rate": 2.9509903137198207e-05, "loss": 0.2233, "step": 6804, "teacher_loss": 0.21647527813911438 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.6721909046173096, "learning_rate": 2.9514240277576985e-05, "loss": 0.3141, "step": 6805, "teacher_loss": 0.2742575705051422 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.3298438489437103, "learning_rate": 2.9518577417955763e-05, "loss": 0.253, "step": 6806, "teacher_loss": 0.24450691044330597 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.5269618630409241, "learning_rate": 2.9522914558334537e-05, "loss": 0.1944, "step": 6807, "teacher_loss": 0.15742658078670502 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.9994422197341919, "learning_rate": 2.9527251698713314e-05, "loss": 0.3766, "step": 6808, "teacher_loss": 0.30739444494247437 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.44173574447631836, "learning_rate": 2.9531588839092092e-05, "loss": 0.2821, "step": 6809, "teacher_loss": 0.26436513662338257 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 1.069823980331421, "learning_rate": 2.953592597947087e-05, "loss": 0.4399, "step": 6810, "teacher_loss": 0.36993080377578735 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.3338903486728668, "learning_rate": 2.9540263119849647e-05, "loss": 0.1966, "step": 6811, "teacher_loss": 0.1812984198331833 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.18092194199562073, "learning_rate": 2.9544600260228425e-05, "loss": 0.1459, "step": 6812, "teacher_loss": 0.14199167490005493 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.4730415344238281, "learning_rate": 2.9548937400607203e-05, "loss": 0.2613, "step": 6813, "teacher_loss": 0.2377467155456543 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.6410003304481506, "learning_rate": 2.9553274540985977e-05, "loss": 0.2682, "step": 6814, "teacher_loss": 0.22678926587104797 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.674156904220581, "learning_rate": 2.9557611681364754e-05, "loss": 0.3221, "step": 6815, "teacher_loss": 0.282967209815979 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.304797887802124, "learning_rate": 2.956194882174353e-05, "loss": 0.2537, "step": 6816, "teacher_loss": 0.24801921844482422 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.3887847661972046, "learning_rate": 2.9566285962122306e-05, "loss": 0.3147, "step": 6817, "teacher_loss": 0.3065136671066284 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.7741252779960632, "learning_rate": 2.9570623102501084e-05, "loss": 0.4325, "step": 6818, "teacher_loss": 0.3945150077342987 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.6991695165634155, "learning_rate": 2.957496024287986e-05, "loss": 0.3367, "step": 6819, "teacher_loss": 0.2964194715023041 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.6699053645133972, "learning_rate": 2.957929738325864e-05, "loss": 0.3404, "step": 6820, "teacher_loss": 0.3037913143634796 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.8536896705627441, "learning_rate": 2.9583634523637417e-05, "loss": 0.5411, "step": 6821, "teacher_loss": 0.5063959360122681 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.21807242929935455, "learning_rate": 2.9587971664016194e-05, "loss": 0.237, "step": 6822, "teacher_loss": 0.23915085196495056 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.26867300271987915, "learning_rate": 2.9592308804394972e-05, "loss": 0.2159, "step": 6823, "teacher_loss": 0.2100232094526291 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.23638300597667694, "learning_rate": 2.959664594477375e-05, "loss": 0.1484, "step": 6824, "teacher_loss": 0.13858887553215027 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.27739402651786804, "learning_rate": 2.960098308515252e-05, "loss": 0.1937, "step": 6825, "teacher_loss": 0.1844204068183899 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.5223261117935181, "learning_rate": 2.9605320225531298e-05, "loss": 0.3899, "step": 6826, "teacher_loss": 0.37519919872283936 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.2585000991821289, "learning_rate": 2.9609657365910076e-05, "loss": 0.1791, "step": 6827, "teacher_loss": 0.17022843658924103 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.22532272338867188, "learning_rate": 2.9613994506288853e-05, "loss": 0.313, "step": 6828, "teacher_loss": 0.32278066873550415 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.20142707228660583, "learning_rate": 2.961833164666763e-05, "loss": 0.1553, "step": 6829, "teacher_loss": 0.15014129877090454 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.26881927251815796, "learning_rate": 2.962266878704641e-05, "loss": 0.2514, "step": 6830, "teacher_loss": 0.24944570660591125 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.5249983072280884, "learning_rate": 2.9627005927425186e-05, "loss": 0.2256, "step": 6831, "teacher_loss": 0.19232457876205444 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.31515008211135864, "learning_rate": 2.9631343067803964e-05, "loss": 0.2086, "step": 6832, "teacher_loss": 0.1967654526233673 }, { "compression_loss": 0.0, "epoch": 1.23, "label_loss": 0.38841694593429565, "learning_rate": 2.963568020818274e-05, "loss": 0.4299, "step": 6833, "teacher_loss": 0.4345610737800598 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.43253010511398315, "learning_rate": 2.9640017348561516e-05, "loss": 0.234, "step": 6834, "teacher_loss": 0.21198254823684692 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.6204953193664551, "learning_rate": 2.9644354488940293e-05, "loss": 0.3299, "step": 6835, "teacher_loss": 0.2976353168487549 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 1.1702687740325928, "learning_rate": 2.9648691629319068e-05, "loss": 0.6191, "step": 6836, "teacher_loss": 0.5578266382217407 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.4147545397281647, "learning_rate": 2.9653028769697845e-05, "loss": 0.2885, "step": 6837, "teacher_loss": 0.2744476795196533 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.6010729074478149, "learning_rate": 2.9657365910076623e-05, "loss": 0.3336, "step": 6838, "teacher_loss": 0.30388563871383667 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.19058087468147278, "learning_rate": 2.96617030504554e-05, "loss": 0.1857, "step": 6839, "teacher_loss": 0.1851271688938141 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.5324939489364624, "learning_rate": 2.9666040190834178e-05, "loss": 0.2746, "step": 6840, "teacher_loss": 0.2458955943584442 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.2717173397541046, "learning_rate": 2.9670377331212956e-05, "loss": 0.1946, "step": 6841, "teacher_loss": 0.18598389625549316 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.45083948969841003, "learning_rate": 2.9674714471591733e-05, "loss": 0.2737, "step": 6842, "teacher_loss": 0.2539963126182556 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.5311942100524902, "learning_rate": 2.9679051611970508e-05, "loss": 0.2673, "step": 6843, "teacher_loss": 0.23803116381168365 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.28006845712661743, "learning_rate": 2.9683388752349285e-05, "loss": 0.1979, "step": 6844, "teacher_loss": 0.18875573575496674 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.5967820882797241, "learning_rate": 2.9687725892728063e-05, "loss": 0.4121, "step": 6845, "teacher_loss": 0.39161211252212524 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.45988965034484863, "learning_rate": 2.969206303310684e-05, "loss": 0.2334, "step": 6846, "teacher_loss": 0.2082492858171463 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.18419361114501953, "learning_rate": 2.9696400173485615e-05, "loss": 0.1609, "step": 6847, "teacher_loss": 0.15836480259895325 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.18904048204421997, "learning_rate": 2.9700737313864392e-05, "loss": 0.1908, "step": 6848, "teacher_loss": 0.19102071225643158 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.4729083776473999, "learning_rate": 2.970507445424317e-05, "loss": 0.3216, "step": 6849, "teacher_loss": 0.30473610758781433 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.39026209712028503, "learning_rate": 2.9709411594621948e-05, "loss": 0.269, "step": 6850, "teacher_loss": 0.2554818391799927 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.790266752243042, "learning_rate": 2.9713748735000722e-05, "loss": 0.3625, "step": 6851, "teacher_loss": 0.3149150013923645 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.3125373125076294, "learning_rate": 2.97180858753795e-05, "loss": 0.2478, "step": 6852, "teacher_loss": 0.24058011174201965 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.5254619121551514, "learning_rate": 2.9722423015758277e-05, "loss": 0.2782, "step": 6853, "teacher_loss": 0.2506744861602783 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.819421648979187, "learning_rate": 2.9726760156137055e-05, "loss": 0.3386, "step": 6854, "teacher_loss": 0.28517329692840576 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.34773337841033936, "learning_rate": 2.9731097296515832e-05, "loss": 0.3924, "step": 6855, "teacher_loss": 0.397353857755661 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.34012508392333984, "learning_rate": 2.973543443689461e-05, "loss": 0.2446, "step": 6856, "teacher_loss": 0.23400932550430298 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.6877815127372742, "learning_rate": 2.9739771577273387e-05, "loss": 0.482, "step": 6857, "teacher_loss": 0.45912912487983704 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.5139047503471375, "learning_rate": 2.9744108717652162e-05, "loss": 0.3612, "step": 6858, "teacher_loss": 0.34422624111175537 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.37956053018569946, "learning_rate": 2.974844585803094e-05, "loss": 0.3155, "step": 6859, "teacher_loss": 0.30836912989616394 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.48840436339378357, "learning_rate": 2.9752782998409714e-05, "loss": 0.3013, "step": 6860, "teacher_loss": 0.2804993987083435 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.43081966042518616, "learning_rate": 2.975712013878849e-05, "loss": 0.227, "step": 6861, "teacher_loss": 0.20438753068447113 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.48362863063812256, "learning_rate": 2.976145727916727e-05, "loss": 0.2828, "step": 6862, "teacher_loss": 0.26046252250671387 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.2711610794067383, "learning_rate": 2.9765794419546046e-05, "loss": 0.2627, "step": 6863, "teacher_loss": 0.26176947355270386 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.5587612390518188, "learning_rate": 2.9770131559924824e-05, "loss": 0.2485, "step": 6864, "teacher_loss": 0.2140185534954071 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.36521023511886597, "learning_rate": 2.97744687003036e-05, "loss": 0.2951, "step": 6865, "teacher_loss": 0.2873638868331909 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.3985061049461365, "learning_rate": 2.977880584068238e-05, "loss": 0.2811, "step": 6866, "teacher_loss": 0.2680205702781677 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.2042558789253235, "learning_rate": 2.9783142981061157e-05, "loss": 0.2335, "step": 6867, "teacher_loss": 0.23671205341815948 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.48562318086624146, "learning_rate": 2.978748012143993e-05, "loss": 0.25, "step": 6868, "teacher_loss": 0.22379852831363678 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.38065552711486816, "learning_rate": 2.9791817261818705e-05, "loss": 0.3019, "step": 6869, "teacher_loss": 0.2931460738182068 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.49717220664024353, "learning_rate": 2.9796154402197483e-05, "loss": 0.2187, "step": 6870, "teacher_loss": 0.18780797719955444 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.2969147562980652, "learning_rate": 2.980049154257626e-05, "loss": 0.2706, "step": 6871, "teacher_loss": 0.26768600940704346 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.7951686382293701, "learning_rate": 2.9804828682955038e-05, "loss": 0.3124, "step": 6872, "teacher_loss": 0.2588126063346863 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.6118711233139038, "learning_rate": 2.9809165823333816e-05, "loss": 0.2345, "step": 6873, "teacher_loss": 0.19254979491233826 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.4556140899658203, "learning_rate": 2.9813502963712594e-05, "loss": 0.3576, "step": 6874, "teacher_loss": 0.3467370271682739 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.5523602962493896, "learning_rate": 2.981784010409137e-05, "loss": 0.2713, "step": 6875, "teacher_loss": 0.24008990824222565 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.21420562267303467, "learning_rate": 2.982217724447015e-05, "loss": 0.1738, "step": 6876, "teacher_loss": 0.1693510115146637 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.3408447206020355, "learning_rate": 2.9826514384848926e-05, "loss": 0.2987, "step": 6877, "teacher_loss": 0.2940502166748047 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.4900956153869629, "learning_rate": 2.98308515252277e-05, "loss": 0.237, "step": 6878, "teacher_loss": 0.20887476205825806 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.6433770656585693, "learning_rate": 2.9835188665606475e-05, "loss": 0.3834, "step": 6879, "teacher_loss": 0.35452088713645935 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.5121951699256897, "learning_rate": 2.9839525805985252e-05, "loss": 0.2383, "step": 6880, "teacher_loss": 0.20788998901844025 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.25369513034820557, "learning_rate": 2.984386294636403e-05, "loss": 0.3165, "step": 6881, "teacher_loss": 0.32352787256240845 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.2857291102409363, "learning_rate": 2.9848200086742808e-05, "loss": 0.1954, "step": 6882, "teacher_loss": 0.18533584475517273 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.666456401348114, "learning_rate": 2.9852537227121585e-05, "loss": 0.2391, "step": 6883, "teacher_loss": 0.19161485135555267 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.2401951253414154, "learning_rate": 2.9856874367500363e-05, "loss": 0.2017, "step": 6884, "teacher_loss": 0.19739894568920135 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.23822197318077087, "learning_rate": 2.986121150787914e-05, "loss": 0.2036, "step": 6885, "teacher_loss": 0.19979208707809448 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.43056944012641907, "learning_rate": 2.9865548648257918e-05, "loss": 0.2553, "step": 6886, "teacher_loss": 0.23586609959602356 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.2658616602420807, "learning_rate": 2.9869885788636692e-05, "loss": 0.1962, "step": 6887, "teacher_loss": 0.18841680884361267 }, { "compression_loss": 0.0, "epoch": 1.24, "label_loss": 0.9721401333808899, "learning_rate": 2.987422292901547e-05, "loss": 0.5472, "step": 6888, "teacher_loss": 0.5000076293945312 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.4527629613876343, "learning_rate": 2.9878560069394248e-05, "loss": 0.346, "step": 6889, "teacher_loss": 0.3341861367225647 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.3134571313858032, "learning_rate": 2.9882897209773022e-05, "loss": 0.2373, "step": 6890, "teacher_loss": 0.2288718819618225 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.26660627126693726, "learning_rate": 2.98872343501518e-05, "loss": 0.2379, "step": 6891, "teacher_loss": 0.23465800285339355 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.4978344440460205, "learning_rate": 2.9891571490530577e-05, "loss": 0.3412, "step": 6892, "teacher_loss": 0.3237534165382385 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.1745733916759491, "learning_rate": 2.9895908630909355e-05, "loss": 0.1842, "step": 6893, "teacher_loss": 0.1852794885635376 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.5690048336982727, "learning_rate": 2.9900245771288132e-05, "loss": 0.3396, "step": 6894, "teacher_loss": 0.31414559483528137 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.6939668655395508, "learning_rate": 2.9904582911666907e-05, "loss": 0.6192, "step": 6895, "teacher_loss": 0.6108812093734741 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.2658732533454895, "learning_rate": 2.9908920052045684e-05, "loss": 0.2071, "step": 6896, "teacher_loss": 0.2005537748336792 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.31617599725723267, "learning_rate": 2.9913257192424462e-05, "loss": 0.2143, "step": 6897, "teacher_loss": 0.20297953486442566 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.44130343198776245, "learning_rate": 2.991759433280324e-05, "loss": 0.1942, "step": 6898, "teacher_loss": 0.16669508814811707 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.320975661277771, "learning_rate": 2.9921931473182017e-05, "loss": 0.1906, "step": 6899, "teacher_loss": 0.176153302192688 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.19834718108177185, "learning_rate": 2.9926268613560795e-05, "loss": 0.2705, "step": 6900, "teacher_loss": 0.2785126566886902 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.3560863733291626, "learning_rate": 2.993060575393957e-05, "loss": 0.226, "step": 6901, "teacher_loss": 0.21155351400375366 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 1.0060230493545532, "learning_rate": 2.9934942894318347e-05, "loss": 0.4203, "step": 6902, "teacher_loss": 0.355190634727478 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.3327946066856384, "learning_rate": 2.9939280034697124e-05, "loss": 0.2583, "step": 6903, "teacher_loss": 0.24997329711914062 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.1829233467578888, "learning_rate": 2.99436171750759e-05, "loss": 0.2206, "step": 6904, "teacher_loss": 0.22479453682899475 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.25130724906921387, "learning_rate": 2.9947954315454676e-05, "loss": 0.2324, "step": 6905, "teacher_loss": 0.23026320338249207 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.5618460178375244, "learning_rate": 2.9952291455833454e-05, "loss": 0.2513, "step": 6906, "teacher_loss": 0.2168234884738922 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.3922111392021179, "learning_rate": 2.995662859621223e-05, "loss": 0.2009, "step": 6907, "teacher_loss": 0.17964893579483032 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.6414545774459839, "learning_rate": 2.996096573659101e-05, "loss": 0.3158, "step": 6908, "teacher_loss": 0.2796213626861572 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 1.6081366539001465, "learning_rate": 2.9965302876969787e-05, "loss": 0.4216, "step": 6909, "teacher_loss": 0.2897984981536865 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.5534387230873108, "learning_rate": 2.9969640017348564e-05, "loss": 0.3425, "step": 6910, "teacher_loss": 0.3190078139305115 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.581301748752594, "learning_rate": 2.9973977157727342e-05, "loss": 0.2854, "step": 6911, "teacher_loss": 0.25256484746932983 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.6544755697250366, "learning_rate": 2.9978314298106116e-05, "loss": 0.3177, "step": 6912, "teacher_loss": 0.28024822473526 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.43475785851478577, "learning_rate": 2.998265143848489e-05, "loss": 0.2988, "step": 6913, "teacher_loss": 0.28374341130256653 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.9851709604263306, "learning_rate": 2.9986988578863668e-05, "loss": 0.3127, "step": 6914, "teacher_loss": 0.23798255622386932 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.26312679052352905, "learning_rate": 2.9991325719242446e-05, "loss": 0.2314, "step": 6915, "teacher_loss": 0.22791871428489685 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.1698855459690094, "learning_rate": 2.9995662859621223e-05, "loss": 0.1603, "step": 6916, "teacher_loss": 0.15923890471458435 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.5187318921089172, "learning_rate": 3e-05, "loss": 0.3849, "step": 6917, "teacher_loss": 0.37001535296440125 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.301098108291626, "learning_rate": 2.9999999828047488e-05, "loss": 0.2912, "step": 6918, "teacher_loss": 0.29009896516799927 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.18328280746936798, "learning_rate": 2.9999999312189952e-05, "loss": 0.2228, "step": 6919, "teacher_loss": 0.22715647518634796 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.5072847604751587, "learning_rate": 2.99999984524274e-05, "loss": 0.3987, "step": 6920, "teacher_loss": 0.3866388201713562 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.546576976776123, "learning_rate": 2.9999997248759857e-05, "loss": 0.2685, "step": 6921, "teacher_loss": 0.2375963032245636 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 1.1361072063446045, "learning_rate": 2.9999995701187352e-05, "loss": 0.5425, "step": 6922, "teacher_loss": 0.4765434265136719 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.3432465195655823, "learning_rate": 2.9999993809709916e-05, "loss": 0.2318, "step": 6923, "teacher_loss": 0.21941183507442474 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.3132544159889221, "learning_rate": 2.9999991574327596e-05, "loss": 0.2747, "step": 6924, "teacher_loss": 0.27045562863349915 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.2802899479866028, "learning_rate": 2.999998899504044e-05, "loss": 0.2751, "step": 6925, "teacher_loss": 0.27454817295074463 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.3673436641693115, "learning_rate": 2.999998607184851e-05, "loss": 0.3004, "step": 6926, "teacher_loss": 0.29298263788223267 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.684203028678894, "learning_rate": 2.999998280475187e-05, "loss": 0.4967, "step": 6927, "teacher_loss": 0.47590622305870056 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.6588788032531738, "learning_rate": 2.9999979193750598e-05, "loss": 0.2766, "step": 6928, "teacher_loss": 0.23409071564674377 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.28143876791000366, "learning_rate": 2.9999975238844774e-05, "loss": 0.2499, "step": 6929, "teacher_loss": 0.24644330143928528 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.39426904916763306, "learning_rate": 2.999997094003449e-05, "loss": 0.2554, "step": 6930, "teacher_loss": 0.23998206853866577 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.6348534822463989, "learning_rate": 2.9999966297319848e-05, "loss": 0.2804, "step": 6931, "teacher_loss": 0.24103891849517822 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.41482460498809814, "learning_rate": 2.9999961310700946e-05, "loss": 0.3531, "step": 6932, "teacher_loss": 0.34620028734207153 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.9377168416976929, "learning_rate": 2.9999955980177908e-05, "loss": 0.6643, "step": 6933, "teacher_loss": 0.6339671611785889 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.424083411693573, "learning_rate": 2.9999950305750844e-05, "loss": 0.2704, "step": 6934, "teacher_loss": 0.2533418834209442 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.189805805683136, "learning_rate": 2.99999442874199e-05, "loss": 0.1725, "step": 6935, "teacher_loss": 0.1705356240272522 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.6979736089706421, "learning_rate": 2.9999937925185203e-05, "loss": 0.3509, "step": 6936, "teacher_loss": 0.3123500943183899 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.43820276856422424, "learning_rate": 2.99999312190469e-05, "loss": 0.4557, "step": 6937, "teacher_loss": 0.45765697956085205 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.6402939558029175, "learning_rate": 2.9999924169005146e-05, "loss": 0.4397, "step": 6938, "teacher_loss": 0.4173782169818878 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.41320082545280457, "learning_rate": 2.9999916775060108e-05, "loss": 0.2059, "step": 6939, "teacher_loss": 0.18290458619594574 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.25503039360046387, "learning_rate": 2.9999909037211945e-05, "loss": 0.2367, "step": 6940, "teacher_loss": 0.23467963933944702 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.959038257598877, "learning_rate": 2.9999900955460843e-05, "loss": 0.4326, "step": 6941, "teacher_loss": 0.3740893602371216 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.7127895355224609, "learning_rate": 2.999989252980698e-05, "loss": 0.2587, "step": 6942, "teacher_loss": 0.2082202136516571 }, { "compression_loss": 0.0, "epoch": 1.25, "label_loss": 0.41858986020088196, "learning_rate": 2.9999883760250553e-05, "loss": 0.3065, "step": 6943, "teacher_loss": 0.2939937114715576 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.3572905659675598, "learning_rate": 2.999987464679177e-05, "loss": 0.2274, "step": 6944, "teacher_loss": 0.21299190819263458 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.6799027323722839, "learning_rate": 2.999986518943083e-05, "loss": 0.2383, "step": 6945, "teacher_loss": 0.18928247690200806 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.3660587668418884, "learning_rate": 2.9999855388167953e-05, "loss": 0.273, "step": 6946, "teacher_loss": 0.26270514726638794 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.3960840106010437, "learning_rate": 2.9999845243003365e-05, "loss": 0.2293, "step": 6947, "teacher_loss": 0.2107643187046051 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.3464674949645996, "learning_rate": 2.9999834753937294e-05, "loss": 0.2427, "step": 6948, "teacher_loss": 0.23114603757858276 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.34996238350868225, "learning_rate": 2.9999823920969986e-05, "loss": 0.2348, "step": 6949, "teacher_loss": 0.22201739251613617 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.2559641897678375, "learning_rate": 2.9999812744101686e-05, "loss": 0.1982, "step": 6950, "teacher_loss": 0.19174398481845856 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.24504128098487854, "learning_rate": 2.9999801223332654e-05, "loss": 0.2475, "step": 6951, "teacher_loss": 0.24778872728347778 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.6323034763336182, "learning_rate": 2.9999789358663152e-05, "loss": 0.3495, "step": 6952, "teacher_loss": 0.31812146306037903 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.6625404953956604, "learning_rate": 2.999977715009345e-05, "loss": 0.2925, "step": 6953, "teacher_loss": 0.25139766931533813 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.3109323978424072, "learning_rate": 2.999976459762383e-05, "loss": 0.2439, "step": 6954, "teacher_loss": 0.23647215962409973 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.6176930069923401, "learning_rate": 2.9999751701254575e-05, "loss": 0.2786, "step": 6955, "teacher_loss": 0.24094988405704498 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5596502423286438, "learning_rate": 2.9999738460985993e-05, "loss": 0.3511, "step": 6956, "teacher_loss": 0.32790353894233704 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.20909713208675385, "learning_rate": 2.999972487681838e-05, "loss": 0.2004, "step": 6957, "teacher_loss": 0.19944703578948975 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.2424902319908142, "learning_rate": 2.9999710948752037e-05, "loss": 0.1666, "step": 6958, "teacher_loss": 0.15821288526058197 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.4242701232433319, "learning_rate": 2.99996966767873e-05, "loss": 0.2311, "step": 6959, "teacher_loss": 0.20968323945999146 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.2454378306865692, "learning_rate": 2.999968206092449e-05, "loss": 0.2263, "step": 6960, "teacher_loss": 0.2242169976234436 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.7741588354110718, "learning_rate": 2.9999667101163943e-05, "loss": 0.2586, "step": 6961, "teacher_loss": 0.20134606957435608 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.32335567474365234, "learning_rate": 2.9999651797505995e-05, "loss": 0.2167, "step": 6962, "teacher_loss": 0.204800546169281 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5957107543945312, "learning_rate": 2.9999636149951007e-05, "loss": 0.3502, "step": 6963, "teacher_loss": 0.3229502737522125 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.3543875813484192, "learning_rate": 2.9999620158499334e-05, "loss": 0.3022, "step": 6964, "teacher_loss": 0.29643508791923523 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.18114180862903595, "learning_rate": 2.999960382315134e-05, "loss": 0.2001, "step": 6965, "teacher_loss": 0.20221254229545593 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.2476223111152649, "learning_rate": 2.9999587143907402e-05, "loss": 0.1979, "step": 6966, "teacher_loss": 0.19242295622825623 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5913947820663452, "learning_rate": 2.9999570120767902e-05, "loss": 0.2916, "step": 6967, "teacher_loss": 0.25832730531692505 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.41619378328323364, "learning_rate": 2.999955275373323e-05, "loss": 0.2489, "step": 6968, "teacher_loss": 0.23034590482711792 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.2973743677139282, "learning_rate": 2.9999535042803782e-05, "loss": 0.1976, "step": 6969, "teacher_loss": 0.18653151392936707 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.33213841915130615, "learning_rate": 2.9999516987979972e-05, "loss": 0.2726, "step": 6970, "teacher_loss": 0.26597338914871216 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.18278184533119202, "learning_rate": 2.9999498589262204e-05, "loss": 0.1717, "step": 6971, "teacher_loss": 0.17041435837745667 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.756203830242157, "learning_rate": 2.9999479846650904e-05, "loss": 0.3278, "step": 6972, "teacher_loss": 0.28016287088394165 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.3646557033061981, "learning_rate": 2.9999460760146503e-05, "loss": 0.2077, "step": 6973, "teacher_loss": 0.19028012454509735 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.4121394455432892, "learning_rate": 2.999944132974944e-05, "loss": 0.2106, "step": 6974, "teacher_loss": 0.18819761276245117 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5164729952812195, "learning_rate": 2.9999421555460155e-05, "loss": 0.2277, "step": 6975, "teacher_loss": 0.19556701183319092 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5639902353286743, "learning_rate": 2.99994014372791e-05, "loss": 0.2336, "step": 6976, "teacher_loss": 0.1969141960144043 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.1770978718996048, "learning_rate": 2.999938097520675e-05, "loss": 0.1738, "step": 6977, "teacher_loss": 0.17346912622451782 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.61199551820755, "learning_rate": 2.9999360169243556e-05, "loss": 0.3848, "step": 6978, "teacher_loss": 0.3595999777317047 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.35419583320617676, "learning_rate": 2.999933901939001e-05, "loss": 0.1854, "step": 6979, "teacher_loss": 0.16664013266563416 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.516225278377533, "learning_rate": 2.999931752564659e-05, "loss": 0.2545, "step": 6980, "teacher_loss": 0.22547045350074768 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 1.218553066253662, "learning_rate": 2.999929568801379e-05, "loss": 0.7922, "step": 6981, "teacher_loss": 0.7448649406433105 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5328869223594666, "learning_rate": 2.9999273506492104e-05, "loss": 0.2671, "step": 6982, "teacher_loss": 0.23754283785820007 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.40544432401657104, "learning_rate": 2.9999250981082053e-05, "loss": 0.305, "step": 6983, "teacher_loss": 0.29385462403297424 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5449610352516174, "learning_rate": 2.9999228111784143e-05, "loss": 0.2381, "step": 6984, "teacher_loss": 0.2040400356054306 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.285076379776001, "learning_rate": 2.9999204898598907e-05, "loss": 0.1831, "step": 6985, "teacher_loss": 0.1717534214258194 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5044917464256287, "learning_rate": 2.999918134152687e-05, "loss": 0.2475, "step": 6986, "teacher_loss": 0.21893665194511414 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.3204505443572998, "learning_rate": 2.999915744056857e-05, "loss": 0.2422, "step": 6987, "teacher_loss": 0.2334899604320526 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.4988766312599182, "learning_rate": 2.9999133195724563e-05, "loss": 0.4315, "step": 6988, "teacher_loss": 0.42405635118484497 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.43875131011009216, "learning_rate": 2.9999108606995405e-05, "loss": 0.2377, "step": 6989, "teacher_loss": 0.21536210179328918 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.307504802942276, "learning_rate": 2.9999083674381658e-05, "loss": 0.3261, "step": 6990, "teacher_loss": 0.328177809715271 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.3905857801437378, "learning_rate": 2.999905839788388e-05, "loss": 0.308, "step": 6991, "teacher_loss": 0.2988031208515167 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.6156218647956848, "learning_rate": 2.9999032777502675e-05, "loss": 0.2405, "step": 6992, "teacher_loss": 0.1988511085510254 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5167202949523926, "learning_rate": 2.9999006813238615e-05, "loss": 0.5218, "step": 6993, "teacher_loss": 0.522392213344574 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.09809376299381256, "learning_rate": 2.9998980505092296e-05, "loss": 0.1933, "step": 6994, "teacher_loss": 0.20384229719638824 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.29984307289123535, "learning_rate": 2.999895385306432e-05, "loss": 0.2412, "step": 6995, "teacher_loss": 0.23470190167427063 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5619527101516724, "learning_rate": 2.9998926857155306e-05, "loss": 0.1881, "step": 6996, "teacher_loss": 0.14651578664779663 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.5999884605407715, "learning_rate": 2.9998899517365866e-05, "loss": 0.2291, "step": 6997, "teacher_loss": 0.1878499984741211 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.317263126373291, "learning_rate": 2.9998871833696633e-05, "loss": 0.2472, "step": 6998, "teacher_loss": 0.23938389122486115 }, { "compression_loss": 0.0, "epoch": 1.26, "label_loss": 0.3636245131492615, "learning_rate": 2.9998843806148235e-05, "loss": 0.1898, "step": 6999, "teacher_loss": 0.17049682140350342 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.5415074229240417, "learning_rate": 2.999881543472132e-05, "loss": 0.2611, "step": 7000, "teacher_loss": 0.2299949675798416 }, { "epoch": 1.27, "eval_exact_match": 78.97824030274361, "eval_f1": 86.59077935465757, "step": 7000 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.812138557434082, "learning_rate": 2.9998786719416534e-05, "loss": 0.3343, "step": 7001, "teacher_loss": 0.2812288999557495 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.21095333993434906, "learning_rate": 2.9998757660234536e-05, "loss": 0.2068, "step": 7002, "teacher_loss": 0.20632602274417877 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.3168070316314697, "learning_rate": 2.9998728257175992e-05, "loss": 0.2739, "step": 7003, "teacher_loss": 0.2691390812397003 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.2506276071071625, "learning_rate": 2.999869851024158e-05, "loss": 0.2158, "step": 7004, "teacher_loss": 0.21193963289260864 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.5702749490737915, "learning_rate": 2.999866841943198e-05, "loss": 0.2747, "step": 7005, "teacher_loss": 0.24183884263038635 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.7808693647384644, "learning_rate": 2.9998637984747883e-05, "loss": 0.4939, "step": 7006, "teacher_loss": 0.46204569935798645 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.08443786948919296, "learning_rate": 2.999860720618998e-05, "loss": 0.1544, "step": 7007, "teacher_loss": 0.16213035583496094 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.7030962705612183, "learning_rate": 2.9998576083758987e-05, "loss": 0.3729, "step": 7008, "teacher_loss": 0.3362564742565155 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.6169146299362183, "learning_rate": 2.999854461745561e-05, "loss": 0.3939, "step": 7009, "teacher_loss": 0.36915820837020874 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.32285699248313904, "learning_rate": 2.9998512807280573e-05, "loss": 0.1998, "step": 7010, "teacher_loss": 0.18611471354961395 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.5274523496627808, "learning_rate": 2.9998480653234607e-05, "loss": 0.2732, "step": 7011, "teacher_loss": 0.24495935440063477 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.4237682819366455, "learning_rate": 2.9998448155318445e-05, "loss": 0.3258, "step": 7012, "teacher_loss": 0.31496158242225647 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.44937026500701904, "learning_rate": 2.9998415313532835e-05, "loss": 0.3272, "step": 7013, "teacher_loss": 0.31362441182136536 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.5593776702880859, "learning_rate": 2.999838212787853e-05, "loss": 0.4883, "step": 7014, "teacher_loss": 0.4803839921951294 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.6155896186828613, "learning_rate": 2.999834859835629e-05, "loss": 0.2935, "step": 7015, "teacher_loss": 0.2577052116394043 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.8464725017547607, "learning_rate": 2.9998314724966886e-05, "loss": 0.3437, "step": 7016, "teacher_loss": 0.2878072261810303 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.40568894147872925, "learning_rate": 2.999828050771109e-05, "loss": 0.1958, "step": 7017, "teacher_loss": 0.1725206971168518 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.9212563633918762, "learning_rate": 2.999824594658969e-05, "loss": 0.3004, "step": 7018, "teacher_loss": 0.23143593966960907 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.2578338384628296, "learning_rate": 2.9998211041603477e-05, "loss": 0.1723, "step": 7019, "teacher_loss": 0.162795752286911 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.5548545122146606, "learning_rate": 2.999817579275325e-05, "loss": 0.3069, "step": 7020, "teacher_loss": 0.2793705463409424 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.41252005100250244, "learning_rate": 2.9998140200039827e-05, "loss": 0.2356, "step": 7021, "teacher_loss": 0.21598170697689056 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.8479819893836975, "learning_rate": 2.9998104263464005e-05, "loss": 0.3621, "step": 7022, "teacher_loss": 0.30815866589546204 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.3009198307991028, "learning_rate": 2.9998067983026624e-05, "loss": 0.2491, "step": 7023, "teacher_loss": 0.24330535531044006 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.32685768604278564, "learning_rate": 2.9998031358728514e-05, "loss": 0.2254, "step": 7024, "teacher_loss": 0.2141449898481369 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.3330312967300415, "learning_rate": 2.9997994390570507e-05, "loss": 0.2375, "step": 7025, "teacher_loss": 0.2269349992275238 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.36006152629852295, "learning_rate": 2.9997957078553458e-05, "loss": 0.2864, "step": 7026, "teacher_loss": 0.2782706618309021 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.412614643573761, "learning_rate": 2.9997919422678214e-05, "loss": 0.1891, "step": 7027, "teacher_loss": 0.16427794098854065 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.5532370805740356, "learning_rate": 2.999788142294565e-05, "loss": 0.3049, "step": 7028, "teacher_loss": 0.2773568630218506 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.676288902759552, "learning_rate": 2.999784307935663e-05, "loss": 0.2681, "step": 7029, "teacher_loss": 0.22275730967521667 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.4067785143852234, "learning_rate": 2.9997804391912028e-05, "loss": 0.2385, "step": 7030, "teacher_loss": 0.21977736055850983 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 1.1693761348724365, "learning_rate": 2.999776536061274e-05, "loss": 0.3556, "step": 7031, "teacher_loss": 0.2652049660682678 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.8631430864334106, "learning_rate": 2.9997725985459663e-05, "loss": 0.276, "step": 7032, "teacher_loss": 0.21076250076293945 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.7710654735565186, "learning_rate": 2.999768626645369e-05, "loss": 0.694, "step": 7033, "teacher_loss": 0.6854475140571594 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.19282880425453186, "learning_rate": 2.9997646203595734e-05, "loss": 0.1962, "step": 7034, "teacher_loss": 0.19652104377746582 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.3251870274543762, "learning_rate": 2.9997605796886722e-05, "loss": 0.2064, "step": 7035, "teacher_loss": 0.19319823384284973 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.36963504552841187, "learning_rate": 2.999756504632757e-05, "loss": 0.2815, "step": 7036, "teacher_loss": 0.2717294991016388 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.6764625310897827, "learning_rate": 2.999752395191922e-05, "loss": 0.2891, "step": 7037, "teacher_loss": 0.24601122736930847 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.09965810179710388, "learning_rate": 2.9997482513662605e-05, "loss": 0.1486, "step": 7038, "teacher_loss": 0.1540473997592926 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.6917380690574646, "learning_rate": 2.9997440731558685e-05, "loss": 0.3847, "step": 7039, "teacher_loss": 0.350607693195343 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 1.0150301456451416, "learning_rate": 2.9997398605608415e-05, "loss": 0.3568, "step": 7040, "teacher_loss": 0.28369972109794617 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.5362294912338257, "learning_rate": 2.9997356135812756e-05, "loss": 0.2874, "step": 7041, "teacher_loss": 0.2597554326057434 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.6085846424102783, "learning_rate": 2.999731332217269e-05, "loss": 0.3735, "step": 7042, "teacher_loss": 0.34737643599510193 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.5395544767379761, "learning_rate": 2.9997270164689188e-05, "loss": 0.3561, "step": 7043, "teacher_loss": 0.3357166647911072 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.8649001717567444, "learning_rate": 2.9997226663363247e-05, "loss": 0.3615, "step": 7044, "teacher_loss": 0.30562055110931396 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.47918346524238586, "learning_rate": 2.9997182818195862e-05, "loss": 0.2675, "step": 7045, "teacher_loss": 0.2439352571964264 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.5492284297943115, "learning_rate": 2.999713862918804e-05, "loss": 0.3206, "step": 7046, "teacher_loss": 0.29521557688713074 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.398273229598999, "learning_rate": 2.9997094096340794e-05, "loss": 0.3009, "step": 7047, "teacher_loss": 0.29005739092826843 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.43202292919158936, "learning_rate": 2.9997049219655144e-05, "loss": 0.23, "step": 7048, "teacher_loss": 0.2075108140707016 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.20541566610336304, "learning_rate": 2.9997003999132115e-05, "loss": 0.23, "step": 7049, "teacher_loss": 0.23278382420539856 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 1.0899384021759033, "learning_rate": 2.9996958434772755e-05, "loss": 0.4126, "step": 7050, "teacher_loss": 0.3373585641384125 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.15908819437026978, "learning_rate": 2.9996912526578096e-05, "loss": 0.193, "step": 7051, "teacher_loss": 0.19680100679397583 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.22236061096191406, "learning_rate": 2.9996866274549193e-05, "loss": 0.2171, "step": 7052, "teacher_loss": 0.21649497747421265 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.4085918366909027, "learning_rate": 2.9996819678687113e-05, "loss": 0.2229, "step": 7053, "teacher_loss": 0.20228153467178345 }, { "compression_loss": 0.0, "epoch": 1.27, "label_loss": 0.34753674268722534, "learning_rate": 2.9996772738992923e-05, "loss": 0.3218, "step": 7054, "teacher_loss": 0.318892240524292 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.34627026319503784, "learning_rate": 2.9996725455467693e-05, "loss": 0.2098, "step": 7055, "teacher_loss": 0.19466429948806763 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.23840349912643433, "learning_rate": 2.9996677828112512e-05, "loss": 0.1899, "step": 7056, "teacher_loss": 0.18447700142860413 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.3372713029384613, "learning_rate": 2.9996629856928473e-05, "loss": 0.2061, "step": 7057, "teacher_loss": 0.19154666364192963 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.5389776229858398, "learning_rate": 2.999658154191667e-05, "loss": 0.3625, "step": 7058, "teacher_loss": 0.3428640067577362 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.9021604657173157, "learning_rate": 2.9996532883078218e-05, "loss": 0.5237, "step": 7059, "teacher_loss": 0.4817003607749939 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.6048001050949097, "learning_rate": 2.9996483880414224e-05, "loss": 0.289, "step": 7060, "teacher_loss": 0.2539489269256592 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.5220537185668945, "learning_rate": 2.9996434533925822e-05, "loss": 0.2579, "step": 7061, "teacher_loss": 0.22858746349811554 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.7754589319229126, "learning_rate": 2.9996384843614135e-05, "loss": 0.4231, "step": 7062, "teacher_loss": 0.3839249908924103 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 1.1422266960144043, "learning_rate": 2.9996334809480303e-05, "loss": 0.3817, "step": 7063, "teacher_loss": 0.297219455242157 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.46671149134635925, "learning_rate": 2.999628443152548e-05, "loss": 0.207, "step": 7064, "teacher_loss": 0.17811360955238342 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.2162889838218689, "learning_rate": 2.9996233709750814e-05, "loss": 0.2023, "step": 7065, "teacher_loss": 0.20074713230133057 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.7021883726119995, "learning_rate": 2.9996182644157467e-05, "loss": 0.3715, "step": 7066, "teacher_loss": 0.33478182554244995 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.32095256447792053, "learning_rate": 2.9996131234746613e-05, "loss": 0.3045, "step": 7067, "teacher_loss": 0.3027231693267822 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.5422878265380859, "learning_rate": 2.9996079481519435e-05, "loss": 0.29, "step": 7068, "teacher_loss": 0.26200637221336365 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.1546006202697754, "learning_rate": 2.9996027384477114e-05, "loss": 0.2669, "step": 7069, "teacher_loss": 0.2793290615081787 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 1.2620408535003662, "learning_rate": 2.9995974943620844e-05, "loss": 0.4436, "step": 7070, "teacher_loss": 0.35264214873313904 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.2649785876274109, "learning_rate": 2.9995922158951827e-05, "loss": 0.3955, "step": 7071, "teacher_loss": 0.4100552499294281 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.43429136276245117, "learning_rate": 2.999586903047128e-05, "loss": 0.2507, "step": 7072, "teacher_loss": 0.23028427362442017 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.9815129041671753, "learning_rate": 2.999581555818041e-05, "loss": 0.3295, "step": 7073, "teacher_loss": 0.2571081519126892 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.47569000720977783, "learning_rate": 2.9995761742080454e-05, "loss": 0.1801, "step": 7074, "teacher_loss": 0.14725643396377563 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.19973039627075195, "learning_rate": 2.999570758217264e-05, "loss": 0.2124, "step": 7075, "teacher_loss": 0.21381694078445435 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.3188217282295227, "learning_rate": 2.999565307845821e-05, "loss": 0.2721, "step": 7076, "teacher_loss": 0.26690739393234253 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.5309109091758728, "learning_rate": 2.9995598230938416e-05, "loss": 0.2501, "step": 7077, "teacher_loss": 0.21892693638801575 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.36627280712127686, "learning_rate": 2.999554303961451e-05, "loss": 0.2569, "step": 7078, "teacher_loss": 0.24470072984695435 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.6151841878890991, "learning_rate": 2.999548750448776e-05, "loss": 0.297, "step": 7079, "teacher_loss": 0.26165279746055603 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.2857910990715027, "learning_rate": 2.9995431625559445e-05, "loss": 0.2861, "step": 7080, "teacher_loss": 0.2861219644546509 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.511077344417572, "learning_rate": 2.999537540283084e-05, "loss": 0.3178, "step": 7081, "teacher_loss": 0.2963334321975708 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.4285435676574707, "learning_rate": 2.9995318836303235e-05, "loss": 0.333, "step": 7082, "teacher_loss": 0.3224106431007385 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.38699859380722046, "learning_rate": 2.999526192597793e-05, "loss": 0.1956, "step": 7083, "teacher_loss": 0.17428764700889587 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.5516075491905212, "learning_rate": 2.999520467185622e-05, "loss": 0.2553, "step": 7084, "teacher_loss": 0.2223854809999466 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.5824899077415466, "learning_rate": 2.999514707393943e-05, "loss": 0.2916, "step": 7085, "teacher_loss": 0.2593334913253784 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.49483001232147217, "learning_rate": 2.9995089132228877e-05, "loss": 0.3306, "step": 7086, "teacher_loss": 0.31230786442756653 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.40986403822898865, "learning_rate": 2.9995030846725886e-05, "loss": 0.3109, "step": 7087, "teacher_loss": 0.2999221086502075 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.419546514749527, "learning_rate": 2.9994972217431796e-05, "loss": 0.2665, "step": 7088, "teacher_loss": 0.24949800968170166 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.2925000786781311, "learning_rate": 2.9994913244347947e-05, "loss": 0.2494, "step": 7089, "teacher_loss": 0.2446572333574295 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.29219719767570496, "learning_rate": 2.9994853927475695e-05, "loss": 0.2532, "step": 7090, "teacher_loss": 0.24883972108364105 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.2977334260940552, "learning_rate": 2.9994794266816398e-05, "loss": 0.2466, "step": 7091, "teacher_loss": 0.24091535806655884 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.6371045112609863, "learning_rate": 2.9994734262371433e-05, "loss": 0.2041, "step": 7092, "teacher_loss": 0.15596921741962433 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.35970252752304077, "learning_rate": 2.999467391414216e-05, "loss": 0.2361, "step": 7093, "teacher_loss": 0.22237008810043335 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.6484271287918091, "learning_rate": 2.9994613222129977e-05, "loss": 0.2918, "step": 7094, "teacher_loss": 0.2521324157714844 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.6021228432655334, "learning_rate": 2.9994552186336265e-05, "loss": 0.6345, "step": 7095, "teacher_loss": 0.6381402015686035 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.3679259717464447, "learning_rate": 2.9994490806762428e-05, "loss": 0.2794, "step": 7096, "teacher_loss": 0.2695087790489197 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.37677910923957825, "learning_rate": 2.999442908340987e-05, "loss": 0.3153, "step": 7097, "teacher_loss": 0.3084523677825928 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.7784925699234009, "learning_rate": 2.999436701628001e-05, "loss": 0.4028, "step": 7098, "teacher_loss": 0.3610305190086365 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.7022049427032471, "learning_rate": 2.999430460537427e-05, "loss": 0.4192, "step": 7099, "teacher_loss": 0.3877614438533783 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.682978630065918, "learning_rate": 2.999424185069408e-05, "loss": 0.4808, "step": 7100, "teacher_loss": 0.45838505029678345 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.6455715298652649, "learning_rate": 2.9994178752240885e-05, "loss": 0.2615, "step": 7101, "teacher_loss": 0.21881821751594543 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.4900006651878357, "learning_rate": 2.9994115310016124e-05, "loss": 0.2626, "step": 7102, "teacher_loss": 0.2373484969139099 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.27053219079971313, "learning_rate": 2.999405152402125e-05, "loss": 0.1927, "step": 7103, "teacher_loss": 0.18406102061271667 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.3029423952102661, "learning_rate": 2.9993987394257732e-05, "loss": 0.2984, "step": 7104, "teacher_loss": 0.2978520393371582 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.2939951419830322, "learning_rate": 2.9993922920727034e-05, "loss": 0.2282, "step": 7105, "teacher_loss": 0.22093728184700012 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.7132244110107422, "learning_rate": 2.999385810343064e-05, "loss": 0.246, "step": 7106, "teacher_loss": 0.1940385401248932 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.38666030764579773, "learning_rate": 2.999379294237003e-05, "loss": 0.2666, "step": 7107, "teacher_loss": 0.2532583177089691 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.6941266655921936, "learning_rate": 2.9993727437546708e-05, "loss": 0.3745, "step": 7108, "teacher_loss": 0.3390410840511322 }, { "compression_loss": 0.0, "epoch": 1.28, "label_loss": 0.39075225591659546, "learning_rate": 2.9993661588962165e-05, "loss": 0.2317, "step": 7109, "teacher_loss": 0.21402406692504883 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.25054943561553955, "learning_rate": 2.9993595396617916e-05, "loss": 0.2321, "step": 7110, "teacher_loss": 0.2300378382205963 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.3752928674221039, "learning_rate": 2.999352886051548e-05, "loss": 0.2067, "step": 7111, "teacher_loss": 0.18799328804016113 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.22528742253780365, "learning_rate": 2.999346198065638e-05, "loss": 0.2084, "step": 7112, "teacher_loss": 0.20649507641792297 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.799654483795166, "learning_rate": 2.9993394757042144e-05, "loss": 0.4136, "step": 7113, "teacher_loss": 0.37069135904312134 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.3277563452720642, "learning_rate": 2.9993327189674323e-05, "loss": 0.2022, "step": 7114, "teacher_loss": 0.1882563829421997 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.47933632135391235, "learning_rate": 2.999325927855446e-05, "loss": 0.2216, "step": 7115, "teacher_loss": 0.19295820593833923 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.7805335521697998, "learning_rate": 2.9993191023684117e-05, "loss": 0.4002, "step": 7116, "teacher_loss": 0.3579510748386383 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.26992034912109375, "learning_rate": 2.9993122425064853e-05, "loss": 0.2685, "step": 7117, "teacher_loss": 0.2683701515197754 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.3277221918106079, "learning_rate": 2.9993053482698246e-05, "loss": 0.2403, "step": 7118, "teacher_loss": 0.23059161007404327 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.9460057616233826, "learning_rate": 2.999298419658587e-05, "loss": 0.3593, "step": 7119, "teacher_loss": 0.2940809428691864 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.443335622549057, "learning_rate": 2.9992914566729322e-05, "loss": 0.2804, "step": 7120, "teacher_loss": 0.2623181939125061 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.4839046001434326, "learning_rate": 2.9992844593130192e-05, "loss": 0.3337, "step": 7121, "teacher_loss": 0.3170493245124817 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.5446857810020447, "learning_rate": 2.9992774275790086e-05, "loss": 0.3235, "step": 7122, "teacher_loss": 0.2989093065261841 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.31440305709838867, "learning_rate": 2.9992703614710617e-05, "loss": 0.2488, "step": 7123, "teacher_loss": 0.24146537482738495 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.8490537405014038, "learning_rate": 2.9992632609893404e-05, "loss": 0.2901, "step": 7124, "teacher_loss": 0.22794979810714722 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.9768196940422058, "learning_rate": 2.9992561261340078e-05, "loss": 0.2772, "step": 7125, "teacher_loss": 0.19944468140602112 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.39531081914901733, "learning_rate": 2.999248956905227e-05, "loss": 0.2577, "step": 7126, "teacher_loss": 0.24241138994693756 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.46143633127212524, "learning_rate": 2.9992417533031624e-05, "loss": 0.3469, "step": 7127, "teacher_loss": 0.3341299295425415 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.7066437005996704, "learning_rate": 2.9992345153279798e-05, "loss": 0.3754, "step": 7128, "teacher_loss": 0.33864402770996094 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.3455314040184021, "learning_rate": 2.9992272429798444e-05, "loss": 0.4258, "step": 7129, "teacher_loss": 0.4347544014453888 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.8617435693740845, "learning_rate": 2.9992199362589232e-05, "loss": 0.2916, "step": 7130, "teacher_loss": 0.22819873690605164 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.5560719966888428, "learning_rate": 2.999212595165384e-05, "loss": 0.3574, "step": 7131, "teacher_loss": 0.33533573150634766 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.39647501707077026, "learning_rate": 2.999205219699395e-05, "loss": 0.2981, "step": 7132, "teacher_loss": 0.2871958017349243 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.8323809504508972, "learning_rate": 2.999197809861125e-05, "loss": 0.2977, "step": 7133, "teacher_loss": 0.23826347291469574 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.1918633133172989, "learning_rate": 2.999190365650744e-05, "loss": 0.2057, "step": 7134, "teacher_loss": 0.20724640786647797 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.6777631044387817, "learning_rate": 2.9991828870684224e-05, "loss": 0.4649, "step": 7135, "teacher_loss": 0.44128215312957764 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.2537914216518402, "learning_rate": 2.9991753741143323e-05, "loss": 0.2023, "step": 7136, "teacher_loss": 0.19660988450050354 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.4911045432090759, "learning_rate": 2.9991678267886458e-05, "loss": 0.4027, "step": 7137, "teacher_loss": 0.3928261399269104 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.6986756920814514, "learning_rate": 2.9991602450915355e-05, "loss": 0.4151, "step": 7138, "teacher_loss": 0.38354456424713135 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.3526952266693115, "learning_rate": 2.9991526290231757e-05, "loss": 0.3109, "step": 7139, "teacher_loss": 0.306215763092041 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.7459595203399658, "learning_rate": 2.9991449785837405e-05, "loss": 0.3501, "step": 7140, "teacher_loss": 0.30617064237594604 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.6981514096260071, "learning_rate": 2.9991372937734057e-05, "loss": 0.2819, "step": 7141, "teacher_loss": 0.23560896515846252 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.31977200508117676, "learning_rate": 2.9991295745923476e-05, "loss": 0.2066, "step": 7142, "teacher_loss": 0.1939723640680313 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.47662538290023804, "learning_rate": 2.999121821040743e-05, "loss": 0.2765, "step": 7143, "teacher_loss": 0.25425055623054504 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.5940206050872803, "learning_rate": 2.9991140331187695e-05, "loss": 0.4467, "step": 7144, "teacher_loss": 0.4303293824195862 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.3084886968135834, "learning_rate": 2.9991062108266058e-05, "loss": 0.2447, "step": 7145, "teacher_loss": 0.2376406490802765 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.6255487203598022, "learning_rate": 2.999098354164431e-05, "loss": 0.2428, "step": 7146, "teacher_loss": 0.20024079084396362 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.3910936713218689, "learning_rate": 2.999090463132426e-05, "loss": 0.2731, "step": 7147, "teacher_loss": 0.2599979639053345 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.36146464943885803, "learning_rate": 2.999082537730771e-05, "loss": 0.232, "step": 7148, "teacher_loss": 0.21756085753440857 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.48681795597076416, "learning_rate": 2.9990745779596477e-05, "loss": 0.2883, "step": 7149, "teacher_loss": 0.2662583589553833 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.28821074962615967, "learning_rate": 2.999066583819239e-05, "loss": 0.1824, "step": 7150, "teacher_loss": 0.17066849768161774 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.360034704208374, "learning_rate": 2.9990585553097278e-05, "loss": 0.2121, "step": 7151, "teacher_loss": 0.19562184810638428 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.3686564862728119, "learning_rate": 2.9990504924312982e-05, "loss": 0.2756, "step": 7152, "teacher_loss": 0.2652715742588043 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 1.0196260213851929, "learning_rate": 2.9990423951841355e-05, "loss": 0.46, "step": 7153, "teacher_loss": 0.3978257477283478 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.47543373703956604, "learning_rate": 2.9990342635684245e-05, "loss": 0.3111, "step": 7154, "teacher_loss": 0.29283860325813293 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.36916032433509827, "learning_rate": 2.999026097584353e-05, "loss": 0.2307, "step": 7155, "teacher_loss": 0.21527311205863953 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.2077256739139557, "learning_rate": 2.9990178972321073e-05, "loss": 0.2493, "step": 7156, "teacher_loss": 0.2538982629776001 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.4414152503013611, "learning_rate": 2.9990096625118747e-05, "loss": 0.2288, "step": 7157, "teacher_loss": 0.20521017909049988 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.7102265954017639, "learning_rate": 2.9990013934238453e-05, "loss": 0.3283, "step": 7158, "teacher_loss": 0.2858346700668335 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.3479458689689636, "learning_rate": 2.9989930899682084e-05, "loss": 0.2229, "step": 7159, "teacher_loss": 0.20900848507881165 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.5895493626594543, "learning_rate": 2.9989847521451542e-05, "loss": 0.2572, "step": 7160, "teacher_loss": 0.22029028832912445 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.5886654853820801, "learning_rate": 2.9989763799548735e-05, "loss": 0.2714, "step": 7161, "teacher_loss": 0.23613232374191284 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.7484886646270752, "learning_rate": 2.9989679733975588e-05, "loss": 0.4315, "step": 7162, "teacher_loss": 0.3962434232234955 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.37206408381462097, "learning_rate": 2.9989595324734022e-05, "loss": 0.2553, "step": 7163, "teacher_loss": 0.24228841066360474 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.4293583631515503, "learning_rate": 2.998951057182598e-05, "loss": 0.3142, "step": 7164, "teacher_loss": 0.3014185428619385 }, { "compression_loss": 0.0, "epoch": 1.29, "label_loss": 0.5553855299949646, "learning_rate": 2.9989425475253398e-05, "loss": 0.2994, "step": 7165, "teacher_loss": 0.2709383964538574 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.48882007598876953, "learning_rate": 2.9989340035018233e-05, "loss": 0.3505, "step": 7166, "teacher_loss": 0.335141122341156 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.7337089776992798, "learning_rate": 2.9989254251122445e-05, "loss": 0.5625, "step": 7167, "teacher_loss": 0.5434370040893555 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.18416103720664978, "learning_rate": 2.998916812356799e-05, "loss": 0.2409, "step": 7168, "teacher_loss": 0.24715732038021088 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.3693809509277344, "learning_rate": 2.9989081652356853e-05, "loss": 0.2378, "step": 7169, "teacher_loss": 0.2231893688440323 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.8484172821044922, "learning_rate": 2.9988994837491017e-05, "loss": 0.3488, "step": 7170, "teacher_loss": 0.29326844215393066 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.26507607102394104, "learning_rate": 2.9988907678972462e-05, "loss": 0.1844, "step": 7171, "teacher_loss": 0.1754055917263031 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.30666136741638184, "learning_rate": 2.9988820176803194e-05, "loss": 0.2726, "step": 7172, "teacher_loss": 0.26880425214767456 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.5847693681716919, "learning_rate": 2.9988732330985223e-05, "loss": 0.2438, "step": 7173, "teacher_loss": 0.20587779581546783 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.8975269794464111, "learning_rate": 2.9988644141520557e-05, "loss": 0.6467, "step": 7174, "teacher_loss": 0.6188441514968872 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.7468587160110474, "learning_rate": 2.998855560841122e-05, "loss": 0.3792, "step": 7175, "teacher_loss": 0.3383748531341553 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.5169508457183838, "learning_rate": 2.9988466731659236e-05, "loss": 0.3108, "step": 7176, "teacher_loss": 0.2879098057746887 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.18669115006923676, "learning_rate": 2.998837751126665e-05, "loss": 0.2968, "step": 7177, "teacher_loss": 0.30907437205314636 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.45584535598754883, "learning_rate": 2.9988287947235505e-05, "loss": 0.301, "step": 7178, "teacher_loss": 0.28374212980270386 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.31353482604026794, "learning_rate": 2.9988198039567853e-05, "loss": 0.2093, "step": 7179, "teacher_loss": 0.197670117020607 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.47423866391181946, "learning_rate": 2.998810778826576e-05, "loss": 0.2429, "step": 7180, "teacher_loss": 0.21721352636814117 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.34817564487457275, "learning_rate": 2.9988017193331294e-05, "loss": 0.19, "step": 7181, "teacher_loss": 0.1724245250225067 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.4925878643989563, "learning_rate": 2.9987926254766527e-05, "loss": 0.2297, "step": 7182, "teacher_loss": 0.20054292678833008 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.4395211338996887, "learning_rate": 2.9987834972573544e-05, "loss": 0.2944, "step": 7183, "teacher_loss": 0.27822357416152954 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.7453950047492981, "learning_rate": 2.9987743346754447e-05, "loss": 0.3412, "step": 7184, "teacher_loss": 0.29634031653404236 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.3255147933959961, "learning_rate": 2.9987651377311325e-05, "loss": 0.1968, "step": 7185, "teacher_loss": 0.18244364857673645 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.5304484367370605, "learning_rate": 2.9987559064246296e-05, "loss": 0.2257, "step": 7186, "teacher_loss": 0.19182555377483368 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.6397098302841187, "learning_rate": 2.998746640756147e-05, "loss": 0.4704, "step": 7187, "teacher_loss": 0.4515638053417206 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.36433055996894836, "learning_rate": 2.9987373407258977e-05, "loss": 0.2345, "step": 7188, "teacher_loss": 0.22003450989723206 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.303589403629303, "learning_rate": 2.9987280063340946e-05, "loss": 0.2524, "step": 7189, "teacher_loss": 0.24666264653205872 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.46916067600250244, "learning_rate": 2.9987186375809513e-05, "loss": 0.2504, "step": 7190, "teacher_loss": 0.22612908482551575 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.19502362608909607, "learning_rate": 2.9987092344666835e-05, "loss": 0.237, "step": 7191, "teacher_loss": 0.2416379302740097 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.4044319987297058, "learning_rate": 2.998699796991506e-05, "loss": 0.2648, "step": 7192, "teacher_loss": 0.24926723539829254 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.33592790365219116, "learning_rate": 2.998690325155636e-05, "loss": 0.2243, "step": 7193, "teacher_loss": 0.2118675410747528 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.8964804410934448, "learning_rate": 2.9986808189592897e-05, "loss": 0.3862, "step": 7194, "teacher_loss": 0.32946592569351196 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.2786426842212677, "learning_rate": 2.9986712784026857e-05, "loss": 0.1845, "step": 7195, "teacher_loss": 0.17401817440986633 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.8743244409561157, "learning_rate": 2.9986617034860425e-05, "loss": 0.4471, "step": 7196, "teacher_loss": 0.3996553421020508 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.1617792248725891, "learning_rate": 2.9986520942095797e-05, "loss": 0.2246, "step": 7197, "teacher_loss": 0.23155635595321655 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.728060781955719, "learning_rate": 2.9986424505735174e-05, "loss": 0.6774, "step": 7198, "teacher_loss": 0.6718258857727051 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.4503127336502075, "learning_rate": 2.998632772578077e-05, "loss": 0.1932, "step": 7199, "teacher_loss": 0.16466861963272095 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.4381275773048401, "learning_rate": 2.9986230602234804e-05, "loss": 0.2348, "step": 7200, "teacher_loss": 0.2122519612312317 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.41315221786499023, "learning_rate": 2.99861331350995e-05, "loss": 0.1811, "step": 7201, "teacher_loss": 0.15530559420585632 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.18667566776275635, "learning_rate": 2.998603532437709e-05, "loss": 0.1219, "step": 7202, "teacher_loss": 0.11466941982507706 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.5891701579093933, "learning_rate": 2.9985937170069825e-05, "loss": 0.3042, "step": 7203, "teacher_loss": 0.2725449800491333 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.4828670024871826, "learning_rate": 2.9985838672179954e-05, "loss": 0.3554, "step": 7204, "teacher_loss": 0.34118831157684326 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.498772531747818, "learning_rate": 2.9985739830709724e-05, "loss": 0.4497, "step": 7205, "teacher_loss": 0.444266140460968 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.20209330320358276, "learning_rate": 2.9985640645661414e-05, "loss": 0.187, "step": 7206, "teacher_loss": 0.1853528916835785 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.4938291013240814, "learning_rate": 2.9985541117037295e-05, "loss": 0.3122, "step": 7207, "teacher_loss": 0.2919834852218628 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.31927141547203064, "learning_rate": 2.9985441244839642e-05, "loss": 0.2126, "step": 7208, "teacher_loss": 0.200755774974823 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.2732795178890228, "learning_rate": 2.998534102907075e-05, "loss": 0.2277, "step": 7209, "teacher_loss": 0.22258678078651428 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.43740200996398926, "learning_rate": 2.998524046973292e-05, "loss": 0.2202, "step": 7210, "teacher_loss": 0.19607087969779968 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.5119949579238892, "learning_rate": 2.9985139566828457e-05, "loss": 0.327, "step": 7211, "teacher_loss": 0.3064630329608917 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.6115130186080933, "learning_rate": 2.9985038320359667e-05, "loss": 0.2583, "step": 7212, "teacher_loss": 0.21901297569274902 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 1.0921207666397095, "learning_rate": 2.9984936730328873e-05, "loss": 0.2745, "step": 7213, "teacher_loss": 0.1836167722940445 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.475678026676178, "learning_rate": 2.9984834796738412e-05, "loss": 0.3437, "step": 7214, "teacher_loss": 0.3289860486984253 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 1.2674893140792847, "learning_rate": 2.9984732519590615e-05, "loss": 0.5025, "step": 7215, "teacher_loss": 0.41754835844039917 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.15699143707752228, "learning_rate": 2.998462989888783e-05, "loss": 0.178, "step": 7216, "teacher_loss": 0.18035903573036194 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.8209565281867981, "learning_rate": 2.9984526934632402e-05, "loss": 0.4067, "step": 7217, "teacher_loss": 0.36064085364341736 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.6676613092422485, "learning_rate": 2.99844236268267e-05, "loss": 0.3649, "step": 7218, "teacher_loss": 0.3312046527862549 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.4311748147010803, "learning_rate": 2.9984319975473092e-05, "loss": 0.3433, "step": 7219, "teacher_loss": 0.3335148096084595 }, { "compression_loss": 0.0, "epoch": 1.3, "label_loss": 0.47203755378723145, "learning_rate": 2.9984215980573947e-05, "loss": 0.2357, "step": 7220, "teacher_loss": 0.20947599411010742 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.7399529814720154, "learning_rate": 2.9984111642131662e-05, "loss": 0.2939, "step": 7221, "teacher_loss": 0.2443842589855194 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.5778563618659973, "learning_rate": 2.9984006960148616e-05, "loss": 0.2241, "step": 7222, "teacher_loss": 0.18481546640396118 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.5575137138366699, "learning_rate": 2.9983901934627222e-05, "loss": 0.2614, "step": 7223, "teacher_loss": 0.2285292148590088 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.5923289060592651, "learning_rate": 2.998379656556987e-05, "loss": 0.3971, "step": 7224, "teacher_loss": 0.3754255771636963 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.2733168601989746, "learning_rate": 2.9983690852978995e-05, "loss": 0.2204, "step": 7225, "teacher_loss": 0.21456539630889893 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.43110713362693787, "learning_rate": 2.9983584796857007e-05, "loss": 0.2592, "step": 7226, "teacher_loss": 0.24008886516094208 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.4446399211883545, "learning_rate": 2.9983478397206344e-05, "loss": 0.2656, "step": 7227, "teacher_loss": 0.24568751454353333 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.4208083152770996, "learning_rate": 2.998337165402945e-05, "loss": 0.2599, "step": 7228, "teacher_loss": 0.242002934217453 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.35321855545043945, "learning_rate": 2.9983264567328756e-05, "loss": 0.3652, "step": 7229, "teacher_loss": 0.3664790391921997 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.3883362412452698, "learning_rate": 2.9983157137106737e-05, "loss": 0.2097, "step": 7230, "teacher_loss": 0.18981149792671204 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.8627563714981079, "learning_rate": 2.998304936336584e-05, "loss": 0.5401, "step": 7231, "teacher_loss": 0.5042321085929871 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.29157111048698425, "learning_rate": 2.9982941246108543e-05, "loss": 0.2588, "step": 7232, "teacher_loss": 0.25512123107910156 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.1831384301185608, "learning_rate": 2.998283278533733e-05, "loss": 0.1961, "step": 7233, "teacher_loss": 0.19749119877815247 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.8864837288856506, "learning_rate": 2.9982723981054677e-05, "loss": 0.3289, "step": 7234, "teacher_loss": 0.26696956157684326 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.3303971290588379, "learning_rate": 2.9982614833263083e-05, "loss": 0.247, "step": 7235, "teacher_loss": 0.2377208024263382 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.6651716828346252, "learning_rate": 2.9982505341965056e-05, "loss": 0.3817, "step": 7236, "teacher_loss": 0.35018348693847656 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.40689969062805176, "learning_rate": 2.99823955071631e-05, "loss": 0.2335, "step": 7237, "teacher_loss": 0.21428629755973816 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.2402096688747406, "learning_rate": 2.9982285328859737e-05, "loss": 0.2086, "step": 7238, "teacher_loss": 0.20507864654064178 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.24840183556079865, "learning_rate": 2.9982174807057486e-05, "loss": 0.2299, "step": 7239, "teacher_loss": 0.22786910831928253 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.8850395679473877, "learning_rate": 2.9982063941758882e-05, "loss": 0.3104, "step": 7240, "teacher_loss": 0.24658949673175812 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.49173837900161743, "learning_rate": 2.9981952732966477e-05, "loss": 0.3805, "step": 7241, "teacher_loss": 0.3680863380432129 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.7370061278343201, "learning_rate": 2.998184118068281e-05, "loss": 0.278, "step": 7242, "teacher_loss": 0.2270524799823761 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.23076987266540527, "learning_rate": 2.998172928491045e-05, "loss": 0.2173, "step": 7243, "teacher_loss": 0.21583566069602966 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.416591078042984, "learning_rate": 2.9981617045651946e-05, "loss": 0.2343, "step": 7244, "teacher_loss": 0.21407851576805115 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.3746960759162903, "learning_rate": 2.9981504462909887e-05, "loss": 0.1991, "step": 7245, "teacher_loss": 0.1795339584350586 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.8359163999557495, "learning_rate": 2.998139153668684e-05, "loss": 0.765, "step": 7246, "teacher_loss": 0.7571512460708618 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.6188288927078247, "learning_rate": 2.998127826698541e-05, "loss": 0.4419, "step": 7247, "teacher_loss": 0.42219096422195435 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.21094536781311035, "learning_rate": 2.998116465380818e-05, "loss": 0.2272, "step": 7248, "teacher_loss": 0.2290198802947998 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.32698947191238403, "learning_rate": 2.9981050697157762e-05, "loss": 0.2289, "step": 7249, "teacher_loss": 0.21796278655529022 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.3986563980579376, "learning_rate": 2.9980936397036768e-05, "loss": 0.2201, "step": 7250, "teacher_loss": 0.20028197765350342 }, { "epoch": 1.31, "eval_exact_match": 79.30936613055819, "eval_f1": 86.74824375149826, "step": 7250 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.2922189235687256, "learning_rate": 2.9980821753447818e-05, "loss": 0.2295, "step": 7251, "teacher_loss": 0.22257539629936218 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.19028058648109436, "learning_rate": 2.998070676639354e-05, "loss": 0.1842, "step": 7252, "teacher_loss": 0.18350613117218018 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.4416945278644562, "learning_rate": 2.998059143587657e-05, "loss": 0.2527, "step": 7253, "teacher_loss": 0.23173725605010986 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.9720132946968079, "learning_rate": 2.998047576189955e-05, "loss": 0.5182, "step": 7254, "teacher_loss": 0.4677361845970154 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.33549728989601135, "learning_rate": 2.9980359744465134e-05, "loss": 0.204, "step": 7255, "teacher_loss": 0.1893874704837799 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.2665591239929199, "learning_rate": 2.9980243383575986e-05, "loss": 0.1736, "step": 7256, "teacher_loss": 0.16327320039272308 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.4667154848575592, "learning_rate": 2.998012667923477e-05, "loss": 0.2381, "step": 7257, "teacher_loss": 0.21275238692760468 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.6106563806533813, "learning_rate": 2.998000963144416e-05, "loss": 0.3463, "step": 7258, "teacher_loss": 0.3169543743133545 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.48956573009490967, "learning_rate": 2.997989224020684e-05, "loss": 0.2344, "step": 7259, "teacher_loss": 0.2060491144657135 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.5121805667877197, "learning_rate": 2.9979774505525512e-05, "loss": 0.31, "step": 7260, "teacher_loss": 0.287537157535553 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.17669223248958588, "learning_rate": 2.9979656427402857e-05, "loss": 0.1679, "step": 7261, "teacher_loss": 0.1669164001941681 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.4805675148963928, "learning_rate": 2.9979538005841594e-05, "loss": 0.3345, "step": 7262, "teacher_loss": 0.3183096945285797 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.1577012836933136, "learning_rate": 2.9979419240844435e-05, "loss": 0.1925, "step": 7263, "teacher_loss": 0.19635093212127686 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.24642488360404968, "learning_rate": 2.9979300132414105e-05, "loss": 0.2278, "step": 7264, "teacher_loss": 0.2257044017314911 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.953657865524292, "learning_rate": 2.9979180680553336e-05, "loss": 0.3425, "step": 7265, "teacher_loss": 0.2746277153491974 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.2676847577095032, "learning_rate": 2.997906088526486e-05, "loss": 0.2003, "step": 7266, "teacher_loss": 0.19284990429878235 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.841678261756897, "learning_rate": 2.9978940746551426e-05, "loss": 0.3766, "step": 7267, "teacher_loss": 0.32495301961898804 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 1.0753730535507202, "learning_rate": 2.997882026441579e-05, "loss": 0.4057, "step": 7268, "teacher_loss": 0.33123910427093506 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.6366230249404907, "learning_rate": 2.9978699438860718e-05, "loss": 0.4198, "step": 7269, "teacher_loss": 0.3957279324531555 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.31219756603240967, "learning_rate": 2.9978578269888974e-05, "loss": 0.2013, "step": 7270, "teacher_loss": 0.18897521495819092 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.8002399206161499, "learning_rate": 2.9978456757503337e-05, "loss": 0.3853, "step": 7271, "teacher_loss": 0.3391510248184204 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.2662709355354309, "learning_rate": 2.9978334901706597e-05, "loss": 0.3012, "step": 7272, "teacher_loss": 0.30507731437683105 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.24473556876182556, "learning_rate": 2.997821270250154e-05, "loss": 0.277, "step": 7273, "teacher_loss": 0.28056466579437256 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.34481680393218994, "learning_rate": 2.9978090159890984e-05, "loss": 0.2123, "step": 7274, "teacher_loss": 0.19757075607776642 }, { "compression_loss": 0.0, "epoch": 1.31, "label_loss": 0.8017687797546387, "learning_rate": 2.997796727387772e-05, "loss": 0.3596, "step": 7275, "teacher_loss": 0.31043741106987 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.3086521625518799, "learning_rate": 2.997784404446457e-05, "loss": 0.3249, "step": 7276, "teacher_loss": 0.3267475664615631 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.4343588948249817, "learning_rate": 2.9977720471654366e-05, "loss": 0.2874, "step": 7277, "teacher_loss": 0.2710815668106079 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.28277331590652466, "learning_rate": 2.9977596555449934e-05, "loss": 0.2012, "step": 7278, "teacher_loss": 0.19212418794631958 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.2767001986503601, "learning_rate": 2.9977472295854123e-05, "loss": 0.21, "step": 7279, "teacher_loss": 0.20257991552352905 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.6051468253135681, "learning_rate": 2.9977347692869773e-05, "loss": 0.2635, "step": 7280, "teacher_loss": 0.22550326585769653 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.2482650727033615, "learning_rate": 2.997722274649974e-05, "loss": 0.2438, "step": 7281, "teacher_loss": 0.24328620731830597 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.4350046217441559, "learning_rate": 2.9977097456746904e-05, "loss": 0.2313, "step": 7282, "teacher_loss": 0.2086797058582306 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.42968565225601196, "learning_rate": 2.997697182361412e-05, "loss": 0.2518, "step": 7283, "teacher_loss": 0.23206770420074463 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.2455797642469406, "learning_rate": 2.997684584710428e-05, "loss": 0.1611, "step": 7284, "teacher_loss": 0.151741161942482 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.9952998161315918, "learning_rate": 2.9976719527220265e-05, "loss": 0.503, "step": 7285, "teacher_loss": 0.44833970069885254 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.5655444264411926, "learning_rate": 2.9976592863964975e-05, "loss": 0.237, "step": 7286, "teacher_loss": 0.20048922300338745 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.25527966022491455, "learning_rate": 2.9976465857341312e-05, "loss": 0.1641, "step": 7287, "teacher_loss": 0.1540106236934662 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.8168010711669922, "learning_rate": 2.9976338507352187e-05, "loss": 0.3515, "step": 7288, "teacher_loss": 0.2997886836528778 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.8808541297912598, "learning_rate": 2.9976210814000522e-05, "loss": 0.2799, "step": 7289, "teacher_loss": 0.21310442686080933 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.09543114900588989, "learning_rate": 2.9976082777289247e-05, "loss": 0.1929, "step": 7290, "teacher_loss": 0.20374110341072083 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.5572643876075745, "learning_rate": 2.997595439722129e-05, "loss": 0.3074, "step": 7291, "teacher_loss": 0.27965646982192993 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.29949676990509033, "learning_rate": 2.9975825673799602e-05, "loss": 0.1926, "step": 7292, "teacher_loss": 0.1807679682970047 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.432328462600708, "learning_rate": 2.997569660702713e-05, "loss": 0.1884, "step": 7293, "teacher_loss": 0.1613493263721466 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.6709614992141724, "learning_rate": 2.997556719690684e-05, "loss": 0.3086, "step": 7294, "teacher_loss": 0.26828891038894653 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.9702615141868591, "learning_rate": 2.9975437443441686e-05, "loss": 0.2902, "step": 7295, "teacher_loss": 0.21461263298988342 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.8656134605407715, "learning_rate": 2.9975307346634654e-05, "loss": 0.2512, "step": 7296, "teacher_loss": 0.18290898203849792 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.5771152973175049, "learning_rate": 2.997517690648872e-05, "loss": 0.2408, "step": 7297, "teacher_loss": 0.20338109135627747 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.8216719627380371, "learning_rate": 2.9975046123006876e-05, "loss": 0.3716, "step": 7298, "teacher_loss": 0.32157260179519653 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.34991562366485596, "learning_rate": 2.9974914996192124e-05, "loss": 0.2098, "step": 7299, "teacher_loss": 0.19417712092399597 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.504607081413269, "learning_rate": 2.997478352604747e-05, "loss": 0.3049, "step": 7300, "teacher_loss": 0.2827131748199463 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.3709542155265808, "learning_rate": 2.9974651712575925e-05, "loss": 0.311, "step": 7301, "teacher_loss": 0.30433326959609985 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.4605497121810913, "learning_rate": 2.997451955578051e-05, "loss": 0.3476, "step": 7302, "teacher_loss": 0.3350525498390198 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.5222505331039429, "learning_rate": 2.9974387055664264e-05, "loss": 0.2586, "step": 7303, "teacher_loss": 0.22925019264221191 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.45401233434677124, "learning_rate": 2.9974254212230213e-05, "loss": 0.229, "step": 7304, "teacher_loss": 0.2039814591407776 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.3161039352416992, "learning_rate": 2.997412102548141e-05, "loss": 0.2729, "step": 7305, "teacher_loss": 0.26813262701034546 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.36586785316467285, "learning_rate": 2.9973987495420903e-05, "loss": 0.2891, "step": 7306, "teacher_loss": 0.280546098947525 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.310201495885849, "learning_rate": 2.9973853622051763e-05, "loss": 0.2356, "step": 7307, "teacher_loss": 0.22728517651557922 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.3546815514564514, "learning_rate": 2.9973719405377052e-05, "loss": 0.2363, "step": 7308, "teacher_loss": 0.22311818599700928 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.7373230457305908, "learning_rate": 2.9973584845399847e-05, "loss": 0.2632, "step": 7309, "teacher_loss": 0.21047137677669525 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.42884954810142517, "learning_rate": 2.9973449942123235e-05, "loss": 0.2186, "step": 7310, "teacher_loss": 0.19527886807918549 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.7588491439819336, "learning_rate": 2.9973314695550308e-05, "loss": 0.3227, "step": 7311, "teacher_loss": 0.27427947521209717 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.3305927813053131, "learning_rate": 2.9973179105684167e-05, "loss": 0.2645, "step": 7312, "teacher_loss": 0.25716230273246765 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.4629131853580475, "learning_rate": 2.9973043172527923e-05, "loss": 0.2808, "step": 7313, "teacher_loss": 0.26051461696624756 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.42065292596817017, "learning_rate": 2.9972906896084688e-05, "loss": 0.2061, "step": 7314, "teacher_loss": 0.18229080736637115 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.5817309617996216, "learning_rate": 2.997277027635759e-05, "loss": 0.3361, "step": 7315, "teacher_loss": 0.3087572455406189 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.3875213861465454, "learning_rate": 2.9972633313349764e-05, "loss": 0.4142, "step": 7316, "teacher_loss": 0.4171638488769531 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.5381307005882263, "learning_rate": 2.9972496007064343e-05, "loss": 0.2663, "step": 7317, "teacher_loss": 0.23611651360988617 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.40636610984802246, "learning_rate": 2.997235835750448e-05, "loss": 0.3038, "step": 7318, "teacher_loss": 0.2923821210861206 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.6476719379425049, "learning_rate": 2.9972220364673327e-05, "loss": 0.2671, "step": 7319, "teacher_loss": 0.2248186469078064 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.6620304584503174, "learning_rate": 2.997208202857405e-05, "loss": 0.2435, "step": 7320, "teacher_loss": 0.19694823026657104 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.7568368911743164, "learning_rate": 2.997194334920982e-05, "loss": 0.3799, "step": 7321, "teacher_loss": 0.33800676465034485 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.4141192138195038, "learning_rate": 2.997180432658382e-05, "loss": 0.239, "step": 7322, "teacher_loss": 0.21951784193515778 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.41509801149368286, "learning_rate": 2.9971664960699234e-05, "loss": 0.2544, "step": 7323, "teacher_loss": 0.23653200268745422 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.3522759974002838, "learning_rate": 2.997152525155926e-05, "loss": 0.3048, "step": 7324, "teacher_loss": 0.299490749835968 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.23051688075065613, "learning_rate": 2.9971385199167093e-05, "loss": 0.1851, "step": 7325, "teacher_loss": 0.18002855777740479 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.3328905403614044, "learning_rate": 2.9971244803525956e-05, "loss": 0.2161, "step": 7326, "teacher_loss": 0.2031579613685608 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.46655842661857605, "learning_rate": 2.9971104064639055e-05, "loss": 0.2304, "step": 7327, "teacher_loss": 0.20419305562973022 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.8703953623771667, "learning_rate": 2.9970962982509627e-05, "loss": 0.2976, "step": 7328, "teacher_loss": 0.23398801684379578 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.5596283674240112, "learning_rate": 2.9970821557140904e-05, "loss": 0.309, "step": 7329, "teacher_loss": 0.28118613362312317 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.3566111922264099, "learning_rate": 2.9970679788536127e-05, "loss": 0.2713, "step": 7330, "teacher_loss": 0.2618138790130615 }, { "compression_loss": 0.0, "epoch": 1.32, "label_loss": 0.8700116872787476, "learning_rate": 2.9970537676698547e-05, "loss": 0.6352, "step": 7331, "teacher_loss": 0.6091054677963257 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.20053735375404358, "learning_rate": 2.9970395221631422e-05, "loss": 0.2262, "step": 7332, "teacher_loss": 0.2291019856929779 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 1.2145837545394897, "learning_rate": 2.997025242333802e-05, "loss": 0.3772, "step": 7333, "teacher_loss": 0.28420233726501465 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.49879172444343567, "learning_rate": 2.9970109281821608e-05, "loss": 0.3395, "step": 7334, "teacher_loss": 0.3218291401863098 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.45375770330429077, "learning_rate": 2.9969965797085478e-05, "loss": 0.2945, "step": 7335, "teacher_loss": 0.27675020694732666 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.39490634202957153, "learning_rate": 2.9969821969132912e-05, "loss": 0.2063, "step": 7336, "teacher_loss": 0.18534669280052185 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.6186497807502747, "learning_rate": 2.996967779796721e-05, "loss": 0.2496, "step": 7337, "teacher_loss": 0.2086172252893448 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.6019611358642578, "learning_rate": 2.996953328359168e-05, "loss": 0.3144, "step": 7338, "teacher_loss": 0.2824944853782654 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.1855420172214508, "learning_rate": 2.9969388426009632e-05, "loss": 0.1781, "step": 7339, "teacher_loss": 0.17729389667510986 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.45689040422439575, "learning_rate": 2.9969243225224386e-05, "loss": 0.2474, "step": 7340, "teacher_loss": 0.22407817840576172 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.2994353473186493, "learning_rate": 2.9969097681239274e-05, "loss": 0.2535, "step": 7341, "teacher_loss": 0.24840155243873596 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.5911794900894165, "learning_rate": 2.9968951794057633e-05, "loss": 0.3059, "step": 7342, "teacher_loss": 0.2742496728897095 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.40410545468330383, "learning_rate": 2.9968805563682805e-05, "loss": 0.2577, "step": 7343, "teacher_loss": 0.24144470691680908 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.11744584143161774, "learning_rate": 2.9968658990118145e-05, "loss": 0.1945, "step": 7344, "teacher_loss": 0.20309729874134064 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.6942311525344849, "learning_rate": 2.9968512073367015e-05, "loss": 0.2202, "step": 7345, "teacher_loss": 0.167494535446167 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.3191685378551483, "learning_rate": 2.9968364813432774e-05, "loss": 0.252, "step": 7346, "teacher_loss": 0.24453842639923096 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.46853527426719666, "learning_rate": 2.9968217210318808e-05, "loss": 0.2723, "step": 7347, "teacher_loss": 0.2505247890949249 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.635383665561676, "learning_rate": 2.9968069264028505e-05, "loss": 0.3027, "step": 7348, "teacher_loss": 0.2657429277896881 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.1864016354084015, "learning_rate": 2.9967920974565243e-05, "loss": 0.2322, "step": 7349, "teacher_loss": 0.23734194040298462 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.2718780040740967, "learning_rate": 2.9967772341932433e-05, "loss": 0.2764, "step": 7350, "teacher_loss": 0.27686163783073425 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.2989136278629303, "learning_rate": 2.9967623366133475e-05, "loss": 0.2113, "step": 7351, "teacher_loss": 0.2015347182750702 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.3854275941848755, "learning_rate": 2.9967474047171793e-05, "loss": 0.42, "step": 7352, "teacher_loss": 0.4237934947013855 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.5246375799179077, "learning_rate": 2.9967324385050806e-05, "loss": 0.2373, "step": 7353, "teacher_loss": 0.20537760853767395 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.5562995076179504, "learning_rate": 2.9967174379773943e-05, "loss": 0.2478, "step": 7354, "teacher_loss": 0.2135460078716278 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.29658836126327515, "learning_rate": 2.9967024031344646e-05, "loss": 0.2276, "step": 7355, "teacher_loss": 0.21991756558418274 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.43919995427131653, "learning_rate": 2.996687333976636e-05, "loss": 0.3253, "step": 7356, "teacher_loss": 0.31269368529319763 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.3799929916858673, "learning_rate": 2.9966722305042544e-05, "loss": 0.2909, "step": 7357, "teacher_loss": 0.2810153365135193 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.6406636238098145, "learning_rate": 2.9966570927176653e-05, "loss": 0.2437, "step": 7358, "teacher_loss": 0.19963723421096802 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.3877202272415161, "learning_rate": 2.9966419206172167e-05, "loss": 0.22, "step": 7359, "teacher_loss": 0.20136913657188416 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.7347228527069092, "learning_rate": 2.996626714203256e-05, "loss": 0.311, "step": 7360, "teacher_loss": 0.2638879418373108 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.68113774061203, "learning_rate": 2.9966114734761318e-05, "loss": 0.3111, "step": 7361, "teacher_loss": 0.269944429397583 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.4550720155239105, "learning_rate": 2.9965961984361936e-05, "loss": 0.3732, "step": 7362, "teacher_loss": 0.36410146951675415 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.4915362000465393, "learning_rate": 2.9965808890837916e-05, "loss": 0.2217, "step": 7363, "teacher_loss": 0.1917087584733963 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.6807525157928467, "learning_rate": 2.9965655454192765e-05, "loss": 0.3509, "step": 7364, "teacher_loss": 0.31422409415245056 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.42059653997421265, "learning_rate": 2.996550167443001e-05, "loss": 0.2058, "step": 7365, "teacher_loss": 0.1819540560245514 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.34519878029823303, "learning_rate": 2.996534755155317e-05, "loss": 0.2145, "step": 7366, "teacher_loss": 0.19994811713695526 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.4315633773803711, "learning_rate": 2.9965193085565774e-05, "loss": 0.2369, "step": 7367, "teacher_loss": 0.2152448296546936 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.6711968183517456, "learning_rate": 2.996503827647137e-05, "loss": 0.4459, "step": 7368, "teacher_loss": 0.42087170481681824 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.40061306953430176, "learning_rate": 2.9964883124273508e-05, "loss": 0.3204, "step": 7369, "teacher_loss": 0.31147223711013794 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.6667156219482422, "learning_rate": 2.996472762897574e-05, "loss": 0.3206, "step": 7370, "teacher_loss": 0.28214824199676514 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.4160667061805725, "learning_rate": 2.996457179058164e-05, "loss": 0.2453, "step": 7371, "teacher_loss": 0.2263103425502777 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.34996652603149414, "learning_rate": 2.9964415609094767e-05, "loss": 0.2354, "step": 7372, "teacher_loss": 0.2226373255252838 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.6272486448287964, "learning_rate": 2.9964259084518718e-05, "loss": 0.472, "step": 7373, "teacher_loss": 0.45476824045181274 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.2410384863615036, "learning_rate": 2.9964102216857062e-05, "loss": 0.2394, "step": 7374, "teacher_loss": 0.23927034437656403 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.22640088200569153, "learning_rate": 2.9963945006113416e-05, "loss": 0.144, "step": 7375, "teacher_loss": 0.13487055897712708 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.2411831021308899, "learning_rate": 2.9963787452291376e-05, "loss": 0.2467, "step": 7376, "teacher_loss": 0.24735099077224731 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.4106539487838745, "learning_rate": 2.9963629555394548e-05, "loss": 0.2563, "step": 7377, "teacher_loss": 0.23909598588943481 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.5019468665122986, "learning_rate": 2.9963471315426558e-05, "loss": 0.4787, "step": 7378, "teacher_loss": 0.4760851562023163 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.7579314708709717, "learning_rate": 2.9963312732391037e-05, "loss": 0.3872, "step": 7379, "teacher_loss": 0.3459699749946594 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 1.0692360401153564, "learning_rate": 2.9963153806291617e-05, "loss": 0.4457, "step": 7380, "teacher_loss": 0.37641799449920654 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.46559929847717285, "learning_rate": 2.996299453713194e-05, "loss": 0.2553, "step": 7381, "teacher_loss": 0.23196925222873688 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.5425081253051758, "learning_rate": 2.9962834924915662e-05, "loss": 0.4174, "step": 7382, "teacher_loss": 0.4035385847091675 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.6741442680358887, "learning_rate": 2.996267496964644e-05, "loss": 0.2465, "step": 7383, "teacher_loss": 0.19900484383106232 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.2983776926994324, "learning_rate": 2.996251467132794e-05, "loss": 0.2445, "step": 7384, "teacher_loss": 0.2384839653968811 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.4212455749511719, "learning_rate": 2.9962354029963835e-05, "loss": 0.3394, "step": 7385, "teacher_loss": 0.330324649810791 }, { "compression_loss": 0.0, "epoch": 1.33, "label_loss": 0.17655718326568604, "learning_rate": 2.9962193045557816e-05, "loss": 0.2044, "step": 7386, "teacher_loss": 0.2074650377035141 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.24185813963413239, "learning_rate": 2.996203171811357e-05, "loss": 0.2186, "step": 7387, "teacher_loss": 0.21602776646614075 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.45158565044403076, "learning_rate": 2.9961870047634795e-05, "loss": 0.2737, "step": 7388, "teacher_loss": 0.25394681096076965 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.4181064963340759, "learning_rate": 2.9961708034125196e-05, "loss": 0.2393, "step": 7389, "teacher_loss": 0.21941301226615906 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2609187364578247, "learning_rate": 2.996154567758849e-05, "loss": 0.4262, "step": 7390, "teacher_loss": 0.4446113109588623 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.44070518016815186, "learning_rate": 2.99613829780284e-05, "loss": 0.2185, "step": 7391, "teacher_loss": 0.1938561648130417 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2816624045372009, "learning_rate": 2.996121993544865e-05, "loss": 0.2154, "step": 7392, "teacher_loss": 0.20804142951965332 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.4211142361164093, "learning_rate": 2.996105654985299e-05, "loss": 0.2525, "step": 7393, "teacher_loss": 0.23376566171646118 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.3629847466945648, "learning_rate": 2.9960892821245152e-05, "loss": 0.2108, "step": 7394, "teacher_loss": 0.1938496232032776 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.09167563915252686, "learning_rate": 2.99607287496289e-05, "loss": 0.2099, "step": 7395, "teacher_loss": 0.22303424775600433 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.6031914353370667, "learning_rate": 2.9960564335007996e-05, "loss": 0.279, "step": 7396, "teacher_loss": 0.24294881522655487 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.5824425220489502, "learning_rate": 2.99603995773862e-05, "loss": 0.311, "step": 7397, "teacher_loss": 0.28085461258888245 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.4841139316558838, "learning_rate": 2.99602344767673e-05, "loss": 0.2139, "step": 7398, "teacher_loss": 0.183875173330307 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.15167561173439026, "learning_rate": 2.9960069033155072e-05, "loss": 0.1765, "step": 7399, "teacher_loss": 0.17922484874725342 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.8382759094238281, "learning_rate": 2.9959903246553316e-05, "loss": 0.3067, "step": 7400, "teacher_loss": 0.2476838082075119 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.45282816886901855, "learning_rate": 2.995973711696583e-05, "loss": 0.3477, "step": 7401, "teacher_loss": 0.335995078086853 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.4538745582103729, "learning_rate": 2.9959570644396423e-05, "loss": 0.2624, "step": 7402, "teacher_loss": 0.24115785956382751 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.542221188545227, "learning_rate": 2.9959403828848916e-05, "loss": 0.2999, "step": 7403, "teacher_loss": 0.2729555368423462 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.13862469792366028, "learning_rate": 2.9959236670327127e-05, "loss": 0.1789, "step": 7404, "teacher_loss": 0.1833406686782837 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.22919392585754395, "learning_rate": 2.995906916883489e-05, "loss": 0.1772, "step": 7405, "teacher_loss": 0.17146193981170654 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.3760446012020111, "learning_rate": 2.9958901324376046e-05, "loss": 0.1895, "step": 7406, "teacher_loss": 0.16875171661376953 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2690447270870209, "learning_rate": 2.9958733136954452e-05, "loss": 0.1986, "step": 7407, "teacher_loss": 0.19072780013084412 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.4795798063278198, "learning_rate": 2.9958564606573947e-05, "loss": 0.2302, "step": 7408, "teacher_loss": 0.20250046253204346 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2868333160877228, "learning_rate": 2.9958395733238405e-05, "loss": 0.2491, "step": 7409, "teacher_loss": 0.2449251115322113 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2213318645954132, "learning_rate": 2.9958226516951697e-05, "loss": 0.2565, "step": 7410, "teacher_loss": 0.26037633419036865 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.4070155918598175, "learning_rate": 2.99580569577177e-05, "loss": 0.3134, "step": 7411, "teacher_loss": 0.3030399680137634 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.39761602878570557, "learning_rate": 2.9957887055540307e-05, "loss": 0.2598, "step": 7412, "teacher_loss": 0.24451839923858643 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.54997718334198, "learning_rate": 2.995771681042341e-05, "loss": 0.3066, "step": 7413, "teacher_loss": 0.27954599261283875 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2787015438079834, "learning_rate": 2.9957546222370914e-05, "loss": 0.3053, "step": 7414, "teacher_loss": 0.30828937888145447 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.48375430703163147, "learning_rate": 2.9957375291386727e-05, "loss": 0.2591, "step": 7415, "teacher_loss": 0.2340962141752243 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2840811312198639, "learning_rate": 2.9957204017474767e-05, "loss": 0.2304, "step": 7416, "teacher_loss": 0.22444060444831848 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.18929371237754822, "learning_rate": 2.995703240063896e-05, "loss": 0.1423, "step": 7417, "teacher_loss": 0.13702404499053955 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 1.0329270362854004, "learning_rate": 2.995686044088325e-05, "loss": 0.4708, "step": 7418, "teacher_loss": 0.40832197666168213 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2621626555919647, "learning_rate": 2.9956688138211567e-05, "loss": 0.2658, "step": 7419, "teacher_loss": 0.26625746488571167 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.5005480647087097, "learning_rate": 2.995651549262787e-05, "loss": 0.3065, "step": 7420, "teacher_loss": 0.2849048972129822 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.3439386188983917, "learning_rate": 2.995634250413612e-05, "loss": 0.2486, "step": 7421, "teacher_loss": 0.23796644806861877 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2944409251213074, "learning_rate": 2.995616917274027e-05, "loss": 0.1968, "step": 7422, "teacher_loss": 0.185902401804924 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 1.0446711778640747, "learning_rate": 2.9955995498444306e-05, "loss": 0.4179, "step": 7423, "teacher_loss": 0.3482702970504761 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 1.3766136169433594, "learning_rate": 2.99558214812522e-05, "loss": 0.347, "step": 7424, "teacher_loss": 0.23261412978172302 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.3324040472507477, "learning_rate": 2.9955647121167955e-05, "loss": 0.2165, "step": 7425, "teacher_loss": 0.20363083481788635 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2738818824291229, "learning_rate": 2.9955472418195556e-05, "loss": 0.3266, "step": 7426, "teacher_loss": 0.33250826597213745 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.35307320952415466, "learning_rate": 2.9955297372339017e-05, "loss": 0.2694, "step": 7427, "teacher_loss": 0.260053813457489 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 1.2614067792892456, "learning_rate": 2.9955121983602344e-05, "loss": 0.3985, "step": 7428, "teacher_loss": 0.30260366201400757 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.36976194381713867, "learning_rate": 2.9954946251989563e-05, "loss": 0.2484, "step": 7429, "teacher_loss": 0.23487775027751923 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.45760565996170044, "learning_rate": 2.99547701775047e-05, "loss": 0.261, "step": 7430, "teacher_loss": 0.23920223116874695 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.06537441164255142, "learning_rate": 2.99545937601518e-05, "loss": 0.1265, "step": 7431, "teacher_loss": 0.13328242301940918 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.3329549729824066, "learning_rate": 2.9954416999934896e-05, "loss": 0.3632, "step": 7432, "teacher_loss": 0.3666110634803772 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.29801440238952637, "learning_rate": 2.9954239896858043e-05, "loss": 0.4, "step": 7433, "teacher_loss": 0.4113055467605591 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.7839879989624023, "learning_rate": 2.995406245092531e-05, "loss": 0.3947, "step": 7434, "teacher_loss": 0.3514120876789093 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.38176602125167847, "learning_rate": 2.9953884662140757e-05, "loss": 0.2281, "step": 7435, "teacher_loss": 0.21097299456596375 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.34904783964157104, "learning_rate": 2.9953706530508465e-05, "loss": 0.2197, "step": 7436, "teacher_loss": 0.20534396171569824 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.24289575219154358, "learning_rate": 2.9953528056032514e-05, "loss": 0.2286, "step": 7437, "teacher_loss": 0.2269619107246399 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.2839312255382538, "learning_rate": 2.9953349238716996e-05, "loss": 0.2322, "step": 7438, "teacher_loss": 0.2263968288898468 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.542436957359314, "learning_rate": 2.9953170078566014e-05, "loss": 0.3226, "step": 7439, "teacher_loss": 0.2981450855731964 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.47624945640563965, "learning_rate": 2.995299057558367e-05, "loss": 0.3339, "step": 7440, "teacher_loss": 0.31803613901138306 }, { "compression_loss": 0.0, "epoch": 1.34, "label_loss": 0.6108731031417847, "learning_rate": 2.995281072977409e-05, "loss": 0.3166, "step": 7441, "teacher_loss": 0.2839125394821167 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.33732280135154724, "learning_rate": 2.995263054114139e-05, "loss": 0.2568, "step": 7442, "teacher_loss": 0.24781520664691925 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.13159143924713135, "learning_rate": 2.99524500096897e-05, "loss": 0.1544, "step": 7443, "teacher_loss": 0.1569531410932541 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.33022165298461914, "learning_rate": 2.995226913542316e-05, "loss": 0.216, "step": 7444, "teacher_loss": 0.20328685641288757 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.2415725588798523, "learning_rate": 2.995208791834592e-05, "loss": 0.2788, "step": 7445, "teacher_loss": 0.282951295375824 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 1.1612355709075928, "learning_rate": 2.995190635846213e-05, "loss": 0.3122, "step": 7446, "teacher_loss": 0.21784768998622894 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.571759045124054, "learning_rate": 2.9951724455775963e-05, "loss": 0.2685, "step": 7447, "teacher_loss": 0.23475387692451477 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.2011307030916214, "learning_rate": 2.995154221029157e-05, "loss": 0.235, "step": 7448, "teacher_loss": 0.23872415721416473 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.3539904057979584, "learning_rate": 2.995135962201315e-05, "loss": 0.2878, "step": 7449, "teacher_loss": 0.28048595786094666 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.3041117489337921, "learning_rate": 2.9951176690944877e-05, "loss": 0.3779, "step": 7450, "teacher_loss": 0.38605183362960815 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.32429298758506775, "learning_rate": 2.995099341709095e-05, "loss": 0.2519, "step": 7451, "teacher_loss": 0.24389860033988953 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.48834770917892456, "learning_rate": 2.9950809800455567e-05, "loss": 0.3076, "step": 7452, "teacher_loss": 0.28749576210975647 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.5275222063064575, "learning_rate": 2.9950625841042943e-05, "loss": 0.3044, "step": 7453, "teacher_loss": 0.27961310744285583 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.5923051834106445, "learning_rate": 2.995044153885729e-05, "loss": 0.3115, "step": 7454, "teacher_loss": 0.2802680730819702 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.42109596729278564, "learning_rate": 2.995025689390284e-05, "loss": 0.3157, "step": 7455, "teacher_loss": 0.3039645552635193 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.367237389087677, "learning_rate": 2.995007190618382e-05, "loss": 0.2408, "step": 7456, "teacher_loss": 0.2267540991306305 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.5828267335891724, "learning_rate": 2.9949886575704477e-05, "loss": 0.2683, "step": 7457, "teacher_loss": 0.23337845504283905 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.4457915127277374, "learning_rate": 2.994970090246905e-05, "loss": 0.5, "step": 7458, "teacher_loss": 0.506027102470398 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.3433298170566559, "learning_rate": 2.994951488648181e-05, "loss": 0.3042, "step": 7459, "teacher_loss": 0.29985344409942627 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.29525110125541687, "learning_rate": 2.994932852774701e-05, "loss": 0.3455, "step": 7460, "teacher_loss": 0.3510931134223938 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.19486252963542938, "learning_rate": 2.9949141826268927e-05, "loss": 0.1768, "step": 7461, "teacher_loss": 0.17478960752487183 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.3592166602611542, "learning_rate": 2.9948954782051847e-05, "loss": 0.2443, "step": 7462, "teacher_loss": 0.2314939796924591 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.5024241209030151, "learning_rate": 2.994876739510005e-05, "loss": 0.5241, "step": 7463, "teacher_loss": 0.5264977812767029 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.628815770149231, "learning_rate": 2.9948579665417834e-05, "loss": 0.4483, "step": 7464, "teacher_loss": 0.4282122552394867 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.3290923237800598, "learning_rate": 2.9948391593009506e-05, "loss": 0.164, "step": 7465, "teacher_loss": 0.1456906944513321 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.7108486294746399, "learning_rate": 2.9948203177879372e-05, "loss": 0.6942, "step": 7466, "teacher_loss": 0.6923169493675232 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.6858711242675781, "learning_rate": 2.9948014420031763e-05, "loss": 0.4079, "step": 7467, "teacher_loss": 0.3769756257534027 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.4692775011062622, "learning_rate": 2.9947825319471e-05, "loss": 0.1971, "step": 7468, "teacher_loss": 0.1669057011604309 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.434905469417572, "learning_rate": 2.9947635876201415e-05, "loss": 0.2707, "step": 7469, "teacher_loss": 0.2524486184120178 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.2583503723144531, "learning_rate": 2.9947446090227352e-05, "loss": 0.2059, "step": 7470, "teacher_loss": 0.20002153515815735 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.41995954513549805, "learning_rate": 2.9947255961553164e-05, "loss": 0.31, "step": 7471, "teacher_loss": 0.2977794110774994 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.3154115676879883, "learning_rate": 2.994706549018322e-05, "loss": 0.2359, "step": 7472, "teacher_loss": 0.22709864377975464 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.4403138756752014, "learning_rate": 2.994687467612187e-05, "loss": 0.2506, "step": 7473, "teacher_loss": 0.22953663766384125 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.6175891160964966, "learning_rate": 2.99466835193735e-05, "loss": 0.242, "step": 7474, "teacher_loss": 0.20021606981754303 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.4916706681251526, "learning_rate": 2.994649201994249e-05, "loss": 0.438, "step": 7475, "teacher_loss": 0.4320324957370758 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 1.0189846754074097, "learning_rate": 2.994630017783323e-05, "loss": 0.4673, "step": 7476, "teacher_loss": 0.40599769353866577 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.1506902277469635, "learning_rate": 2.9946107993050115e-05, "loss": 0.1571, "step": 7477, "teacher_loss": 0.15786322951316833 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.10593598335981369, "learning_rate": 2.9945915465597557e-05, "loss": 0.1812, "step": 7478, "teacher_loss": 0.18961024284362793 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.46019983291625977, "learning_rate": 2.9945722595479965e-05, "loss": 0.2845, "step": 7479, "teacher_loss": 0.2649534046649933 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.6787864565849304, "learning_rate": 2.994552938270177e-05, "loss": 0.2742, "step": 7480, "teacher_loss": 0.22929373383522034 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.34946414828300476, "learning_rate": 2.9945335827267388e-05, "loss": 0.2683, "step": 7481, "teacher_loss": 0.2592858076095581 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 1.390134334564209, "learning_rate": 2.9945141929181267e-05, "loss": 0.3425, "step": 7482, "teacher_loss": 0.2260952889919281 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.5059945583343506, "learning_rate": 2.994494768844785e-05, "loss": 0.3265, "step": 7483, "teacher_loss": 0.3065948486328125 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.8946864604949951, "learning_rate": 2.9944753105071587e-05, "loss": 0.4428, "step": 7484, "teacher_loss": 0.3925560712814331 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.3887980878353119, "learning_rate": 2.9944558179056944e-05, "loss": 0.1781, "step": 7485, "teacher_loss": 0.15467095375061035 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.4459196627140045, "learning_rate": 2.9944362910408393e-05, "loss": 0.2961, "step": 7486, "teacher_loss": 0.27947306632995605 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.5475760698318481, "learning_rate": 2.9944167299130397e-05, "loss": 0.2545, "step": 7487, "teacher_loss": 0.22190696001052856 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.3333711624145508, "learning_rate": 2.9943971345227452e-05, "loss": 0.2768, "step": 7488, "teacher_loss": 0.27047520875930786 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.3368968963623047, "learning_rate": 2.9943775048704053e-05, "loss": 0.2132, "step": 7489, "teacher_loss": 0.1994766891002655 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.1833430528640747, "learning_rate": 2.994357840956469e-05, "loss": 0.1789, "step": 7490, "teacher_loss": 0.17837193608283997 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.3790837228298187, "learning_rate": 2.9943381427813882e-05, "loss": 0.2324, "step": 7491, "teacher_loss": 0.21615347266197205 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.5838605165481567, "learning_rate": 2.994318410345614e-05, "loss": 0.3608, "step": 7492, "teacher_loss": 0.3359782099723816 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.2583012878894806, "learning_rate": 2.9942986436495986e-05, "loss": 0.2216, "step": 7493, "teacher_loss": 0.2175406515598297 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 1.3319706916809082, "learning_rate": 2.9942788426937956e-05, "loss": 0.381, "step": 7494, "teacher_loss": 0.2753249406814575 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.36045414209365845, "learning_rate": 2.9942590074786588e-05, "loss": 0.2097, "step": 7495, "teacher_loss": 0.19290709495544434 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.25065258145332336, "learning_rate": 2.9942391380046433e-05, "loss": 0.2755, "step": 7496, "teacher_loss": 0.2783011198043823 }, { "compression_loss": 0.0, "epoch": 1.35, "label_loss": 0.6273810863494873, "learning_rate": 2.9942192342722037e-05, "loss": 0.3676, "step": 7497, "teacher_loss": 0.3387652039527893 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.4858560860157013, "learning_rate": 2.994199296281797e-05, "loss": 0.2166, "step": 7498, "teacher_loss": 0.18671831488609314 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.3831263780593872, "learning_rate": 2.994179324033881e-05, "loss": 0.2803, "step": 7499, "teacher_loss": 0.26887187361717224 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.7703880071640015, "learning_rate": 2.994159317528912e-05, "loss": 0.2757, "step": 7500, "teacher_loss": 0.22071754932403564 }, { "epoch": 1.36, "eval_exact_match": 78.90255439924314, "eval_f1": 86.43267132876123, "step": 7500 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.8324979543685913, "learning_rate": 2.99413927676735e-05, "loss": 0.3098, "step": 7501, "teacher_loss": 0.25171077251434326 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.6357442736625671, "learning_rate": 2.9941192017496545e-05, "loss": 0.3712, "step": 7502, "teacher_loss": 0.34180718660354614 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.4425898790359497, "learning_rate": 2.9940990924762846e-05, "loss": 0.2492, "step": 7503, "teacher_loss": 0.22773519158363342 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.7362959980964661, "learning_rate": 2.994078948947702e-05, "loss": 0.4452, "step": 7504, "teacher_loss": 0.412894070148468 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.5426827669143677, "learning_rate": 2.9940587711643693e-05, "loss": 0.2662, "step": 7505, "teacher_loss": 0.23547953367233276 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.3502090573310852, "learning_rate": 2.9940385591267477e-05, "loss": 0.1905, "step": 7506, "teacher_loss": 0.17271116375923157 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.6637804508209229, "learning_rate": 2.9940183128353015e-05, "loss": 0.2245, "step": 7507, "teacher_loss": 0.17564037442207336 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.23844844102859497, "learning_rate": 2.9939980322904948e-05, "loss": 0.2011, "step": 7508, "teacher_loss": 0.19697964191436768 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.5452797412872314, "learning_rate": 2.9939777174927924e-05, "loss": 0.2766, "step": 7509, "teacher_loss": 0.2467808723449707 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.4377099275588989, "learning_rate": 2.9939573684426603e-05, "loss": 0.3167, "step": 7510, "teacher_loss": 0.3032767176628113 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.41947972774505615, "learning_rate": 2.9939369851405645e-05, "loss": 0.236, "step": 7511, "teacher_loss": 0.21561065316200256 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.2861916422843933, "learning_rate": 2.993916567586973e-05, "loss": 0.1732, "step": 7512, "teacher_loss": 0.16060954332351685 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.4824354648590088, "learning_rate": 2.993896115782353e-05, "loss": 0.2535, "step": 7513, "teacher_loss": 0.22809892892837524 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.2786598801612854, "learning_rate": 2.993875629727175e-05, "loss": 0.2425, "step": 7514, "teacher_loss": 0.23850062489509583 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.20274555683135986, "learning_rate": 2.993855109421907e-05, "loss": 0.1747, "step": 7515, "teacher_loss": 0.17157389223575592 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.29919224977493286, "learning_rate": 2.99383455486702e-05, "loss": 0.275, "step": 7516, "teacher_loss": 0.27230942249298096 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.8254399299621582, "learning_rate": 2.9938139660629863e-05, "loss": 0.6489, "step": 7517, "teacher_loss": 0.6292467713356018 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.32526054978370667, "learning_rate": 2.9937933430102758e-05, "loss": 0.2691, "step": 7518, "teacher_loss": 0.26285964250564575 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.5519172549247742, "learning_rate": 2.9937726857093636e-05, "loss": 0.3313, "step": 7519, "teacher_loss": 0.3067835867404938 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.46934181451797485, "learning_rate": 2.9937519941607216e-05, "loss": 0.2677, "step": 7520, "teacher_loss": 0.2452666163444519 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.46599280834198, "learning_rate": 2.9937312683648256e-05, "loss": 0.2209, "step": 7521, "teacher_loss": 0.19371947646141052 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.4714832305908203, "learning_rate": 2.993710508322149e-05, "loss": 0.268, "step": 7522, "teacher_loss": 0.24534301459789276 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.5620614886283875, "learning_rate": 2.99368971403317e-05, "loss": 0.2017, "step": 7523, "teacher_loss": 0.16170698404312134 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.4059962034225464, "learning_rate": 2.9936688854983637e-05, "loss": 0.2042, "step": 7524, "teacher_loss": 0.1817486435174942 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.5865641832351685, "learning_rate": 2.993648022718208e-05, "loss": 0.2988, "step": 7525, "teacher_loss": 0.2668393552303314 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.5302565693855286, "learning_rate": 2.9936271256931812e-05, "loss": 0.3135, "step": 7526, "teacher_loss": 0.2894470691680908 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.6800379753112793, "learning_rate": 2.9936061944237628e-05, "loss": 0.2749, "step": 7527, "teacher_loss": 0.22983799874782562 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.3122449517250061, "learning_rate": 2.993585228910432e-05, "loss": 0.2216, "step": 7528, "teacher_loss": 0.21155352890491486 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 1.0097167491912842, "learning_rate": 2.9935642291536706e-05, "loss": 0.6414, "step": 7529, "teacher_loss": 0.6004397869110107 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.6385592222213745, "learning_rate": 2.9935431951539584e-05, "loss": 0.4158, "step": 7530, "teacher_loss": 0.39110374450683594 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.6485559940338135, "learning_rate": 2.9935221269117795e-05, "loss": 0.3553, "step": 7531, "teacher_loss": 0.3227570652961731 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.40885916352272034, "learning_rate": 2.9935010244276155e-05, "loss": 0.2703, "step": 7532, "teacher_loss": 0.2549360692501068 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.321699321269989, "learning_rate": 2.993479887701951e-05, "loss": 0.225, "step": 7533, "teacher_loss": 0.2142285704612732 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.5968490839004517, "learning_rate": 2.9934587167352708e-05, "loss": 0.2061, "step": 7534, "teacher_loss": 0.162733793258667 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.3200331926345825, "learning_rate": 2.9934375115280592e-05, "loss": 0.2469, "step": 7535, "teacher_loss": 0.23878273367881775 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.4204094409942627, "learning_rate": 2.9934162720808028e-05, "loss": 0.2404, "step": 7536, "teacher_loss": 0.22043679654598236 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.28288891911506653, "learning_rate": 2.9933949983939894e-05, "loss": 0.1838, "step": 7537, "teacher_loss": 0.17274974286556244 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.6203982830047607, "learning_rate": 2.993373690468106e-05, "loss": 0.4273, "step": 7538, "teacher_loss": 0.4058406352996826 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.4666936695575714, "learning_rate": 2.9933523483036415e-05, "loss": 0.263, "step": 7539, "teacher_loss": 0.24040162563323975 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.5362464189529419, "learning_rate": 2.9933309719010844e-05, "loss": 0.2862, "step": 7540, "teacher_loss": 0.2584337890148163 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.23154741525650024, "learning_rate": 2.9933095612609253e-05, "loss": 0.2699, "step": 7541, "teacher_loss": 0.2742026746273041 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.2427349090576172, "learning_rate": 2.9932881163836556e-05, "loss": 0.1988, "step": 7542, "teacher_loss": 0.1939554512500763 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.18827080726623535, "learning_rate": 2.9932666372697664e-05, "loss": 0.2117, "step": 7543, "teacher_loss": 0.21433806419372559 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.6037752628326416, "learning_rate": 2.9932451239197498e-05, "loss": 0.2735, "step": 7544, "teacher_loss": 0.2367558777332306 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.6233841180801392, "learning_rate": 2.9932235763340997e-05, "loss": 0.2895, "step": 7545, "teacher_loss": 0.25234729051589966 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.5477994680404663, "learning_rate": 2.9932019945133104e-05, "loss": 0.2491, "step": 7546, "teacher_loss": 0.215952530503273 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.4526468813419342, "learning_rate": 2.9931803784578757e-05, "loss": 0.2289, "step": 7547, "teacher_loss": 0.20406216382980347 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.312862366437912, "learning_rate": 2.993158728168292e-05, "loss": 0.3075, "step": 7548, "teacher_loss": 0.30693647265434265 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.7838975787162781, "learning_rate": 2.9931370436450552e-05, "loss": 0.3074, "step": 7549, "teacher_loss": 0.2545110583305359 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.8435627222061157, "learning_rate": 2.993115324888663e-05, "loss": 0.3417, "step": 7550, "teacher_loss": 0.2859613299369812 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.588554859161377, "learning_rate": 2.9930935718996122e-05, "loss": 0.2999, "step": 7551, "teacher_loss": 0.2677900195121765 }, { "compression_loss": 0.0, "epoch": 1.36, "label_loss": 0.36412879824638367, "learning_rate": 2.993071784678403e-05, "loss": 0.3387, "step": 7552, "teacher_loss": 0.3358222544193268 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.46777772903442383, "learning_rate": 2.993049963225534e-05, "loss": 0.3756, "step": 7553, "teacher_loss": 0.3653554916381836 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 1.0213065147399902, "learning_rate": 2.993028107541506e-05, "loss": 0.7713, "step": 7554, "teacher_loss": 0.7435516119003296 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.607062578201294, "learning_rate": 2.9930062176268196e-05, "loss": 0.3089, "step": 7555, "teacher_loss": 0.2757790982723236 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.3792635202407837, "learning_rate": 2.9929842934819768e-05, "loss": 0.2933, "step": 7556, "teacher_loss": 0.28380149602890015 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.3142491281032562, "learning_rate": 2.9929623351074806e-05, "loss": 0.2243, "step": 7557, "teacher_loss": 0.21432015299797058 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.4454195499420166, "learning_rate": 2.9929403425038338e-05, "loss": 0.2411, "step": 7558, "teacher_loss": 0.2183436155319214 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.46095946431159973, "learning_rate": 2.992918315671541e-05, "loss": 0.2912, "step": 7559, "teacher_loss": 0.27229368686676025 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.4410061240196228, "learning_rate": 2.992896254611108e-05, "loss": 0.3261, "step": 7560, "teacher_loss": 0.31338778138160706 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.9786903262138367, "learning_rate": 2.9928741593230393e-05, "loss": 0.3817, "step": 7561, "teacher_loss": 0.31538426876068115 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.3916565775871277, "learning_rate": 2.9928520298078417e-05, "loss": 0.2544, "step": 7562, "teacher_loss": 0.23915785551071167 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.228489950299263, "learning_rate": 2.992829866066023e-05, "loss": 0.2791, "step": 7563, "teacher_loss": 0.2847137451171875 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.3630978465080261, "learning_rate": 2.9928076680980917e-05, "loss": 0.1845, "step": 7564, "teacher_loss": 0.16466104984283447 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.5929465293884277, "learning_rate": 2.992785435904556e-05, "loss": 0.2402, "step": 7565, "teacher_loss": 0.2010573446750641 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.6393760442733765, "learning_rate": 2.9927631694859256e-05, "loss": 0.3202, "step": 7566, "teacher_loss": 0.2847674787044525 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.26598042249679565, "learning_rate": 2.9927408688427115e-05, "loss": 0.2954, "step": 7567, "teacher_loss": 0.29863959550857544 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.5515957474708557, "learning_rate": 2.9927185339754245e-05, "loss": 0.3485, "step": 7568, "teacher_loss": 0.3259860873222351 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.6076855659484863, "learning_rate": 2.9926961648845774e-05, "loss": 0.3017, "step": 7569, "teacher_loss": 0.26764625310897827 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.571256160736084, "learning_rate": 2.9926737615706823e-05, "loss": 0.2456, "step": 7570, "teacher_loss": 0.20946840941905975 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.34154656529426575, "learning_rate": 2.9926513240342527e-05, "loss": 0.3439, "step": 7571, "teacher_loss": 0.34421372413635254 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.5341852307319641, "learning_rate": 2.992628852275804e-05, "loss": 0.4697, "step": 7572, "teacher_loss": 0.4625610113143921 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.21058453619480133, "learning_rate": 2.992606346295851e-05, "loss": 0.2269, "step": 7573, "teacher_loss": 0.22871890664100647 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.5513201951980591, "learning_rate": 2.9925838060949087e-05, "loss": 0.3563, "step": 7574, "teacher_loss": 0.33462658524513245 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.49390077590942383, "learning_rate": 2.9925612316734957e-05, "loss": 0.3447, "step": 7575, "teacher_loss": 0.32809942960739136 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.4184456467628479, "learning_rate": 2.992538623032128e-05, "loss": 0.2226, "step": 7576, "teacher_loss": 0.20087221264839172 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.5866072177886963, "learning_rate": 2.9925159801713243e-05, "loss": 0.4075, "step": 7577, "teacher_loss": 0.3876102864742279 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.3299693763256073, "learning_rate": 2.9924933030916044e-05, "loss": 0.2544, "step": 7578, "teacher_loss": 0.2460080236196518 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.597364068031311, "learning_rate": 2.992470591793488e-05, "loss": 0.2622, "step": 7579, "teacher_loss": 0.22494718432426453 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.9301906228065491, "learning_rate": 2.992447846277495e-05, "loss": 0.8035, "step": 7580, "teacher_loss": 0.789463996887207 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.44339799880981445, "learning_rate": 2.9924250665441478e-05, "loss": 0.4921, "step": 7581, "teacher_loss": 0.4974651634693146 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.3032291531562805, "learning_rate": 2.9924022525939684e-05, "loss": 0.216, "step": 7582, "teacher_loss": 0.20626375079154968 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.7703602313995361, "learning_rate": 2.99237940442748e-05, "loss": 0.3301, "step": 7583, "teacher_loss": 0.28113555908203125 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.21320851147174835, "learning_rate": 2.9923565220452058e-05, "loss": 0.3109, "step": 7584, "teacher_loss": 0.3217393159866333 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.36754024028778076, "learning_rate": 2.9923336054476708e-05, "loss": 0.2144, "step": 7585, "teacher_loss": 0.19742700457572937 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.32668545842170715, "learning_rate": 2.992310654635401e-05, "loss": 0.2631, "step": 7586, "teacher_loss": 0.2560487985610962 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.5014143586158752, "learning_rate": 2.992287669608922e-05, "loss": 0.2608, "step": 7587, "teacher_loss": 0.2340397834777832 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.38248589634895325, "learning_rate": 2.9922646503687603e-05, "loss": 0.254, "step": 7588, "teacher_loss": 0.2397211492061615 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.5165925621986389, "learning_rate": 2.9922415969154445e-05, "loss": 0.4666, "step": 7589, "teacher_loss": 0.46099644899368286 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.4691811800003052, "learning_rate": 2.992218509249503e-05, "loss": 0.1961, "step": 7590, "teacher_loss": 0.1658032089471817 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.3885548710823059, "learning_rate": 2.9921953873714652e-05, "loss": 0.3609, "step": 7591, "teacher_loss": 0.35786527395248413 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.2618866562843323, "learning_rate": 2.9921722312818604e-05, "loss": 0.2775, "step": 7592, "teacher_loss": 0.2792610228061676 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.32753002643585205, "learning_rate": 2.9921490409812203e-05, "loss": 0.4075, "step": 7593, "teacher_loss": 0.41635072231292725 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.536220908164978, "learning_rate": 2.9921258164700765e-05, "loss": 0.3394, "step": 7594, "teacher_loss": 0.3175109624862671 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.6709280014038086, "learning_rate": 2.9921025577489617e-05, "loss": 0.7408, "step": 7595, "teacher_loss": 0.7485227584838867 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.5450140237808228, "learning_rate": 2.9920792648184083e-05, "loss": 0.2741, "step": 7596, "teacher_loss": 0.24402843415737152 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.4634959101676941, "learning_rate": 2.992055937678951e-05, "loss": 0.2739, "step": 7597, "teacher_loss": 0.2528146803379059 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.3967730700969696, "learning_rate": 2.9920325763311242e-05, "loss": 0.422, "step": 7598, "teacher_loss": 0.42482852935791016 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.696155309677124, "learning_rate": 2.992009180775464e-05, "loss": 0.3298, "step": 7599, "teacher_loss": 0.2890735864639282 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.21985122561454773, "learning_rate": 2.9919857510125064e-05, "loss": 0.1969, "step": 7600, "teacher_loss": 0.19437068700790405 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.6172599792480469, "learning_rate": 2.9919622870427893e-05, "loss": 0.2602, "step": 7601, "teacher_loss": 0.22054541110992432 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.14786367118358612, "learning_rate": 2.9919387888668494e-05, "loss": 0.1429, "step": 7602, "teacher_loss": 0.14233875274658203 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.33377137780189514, "learning_rate": 2.9919152564852268e-05, "loss": 0.2344, "step": 7603, "teacher_loss": 0.22334596514701843 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.2726837694644928, "learning_rate": 2.9918916898984598e-05, "loss": 0.234, "step": 7604, "teacher_loss": 0.22966215014457703 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.20103251934051514, "learning_rate": 2.9918680891070896e-05, "loss": 0.1732, "step": 7605, "teacher_loss": 0.170084148645401 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.8575376272201538, "learning_rate": 2.9918444541116568e-05, "loss": 0.375, "step": 7606, "teacher_loss": 0.32138872146606445 }, { "compression_loss": 0.0, "epoch": 1.37, "label_loss": 0.26272910833358765, "learning_rate": 2.9918207849127033e-05, "loss": 0.2964, "step": 7607, "teacher_loss": 0.30010873079299927 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.6971885561943054, "learning_rate": 2.991797081510772e-05, "loss": 0.3172, "step": 7608, "teacher_loss": 0.274998277425766 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.31353896856307983, "learning_rate": 2.9917733439064065e-05, "loss": 0.2456, "step": 7609, "teacher_loss": 0.23810040950775146 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.5445033311843872, "learning_rate": 2.9917495721001505e-05, "loss": 0.2243, "step": 7610, "teacher_loss": 0.18877622485160828 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 1.0506901741027832, "learning_rate": 2.9917257660925495e-05, "loss": 0.547, "step": 7611, "teacher_loss": 0.49104979634284973 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.19204777479171753, "learning_rate": 2.9917019258841492e-05, "loss": 0.2418, "step": 7612, "teacher_loss": 0.24727264046669006 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.26949289441108704, "learning_rate": 2.991678051475496e-05, "loss": 0.2277, "step": 7613, "teacher_loss": 0.22303706407546997 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.5632785558700562, "learning_rate": 2.9916541428671373e-05, "loss": 0.2555, "step": 7614, "teacher_loss": 0.22132110595703125 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.4861583113670349, "learning_rate": 2.9916302000596215e-05, "loss": 0.2483, "step": 7615, "teacher_loss": 0.22183868288993835 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.5267183184623718, "learning_rate": 2.9916062230534968e-05, "loss": 0.292, "step": 7616, "teacher_loss": 0.26588404178619385 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.7098817825317383, "learning_rate": 2.991582211849314e-05, "loss": 0.3411, "step": 7617, "teacher_loss": 0.30013298988342285 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.451063334941864, "learning_rate": 2.991558166447623e-05, "loss": 0.2836, "step": 7618, "teacher_loss": 0.26503312587738037 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.7276087999343872, "learning_rate": 2.991534086848975e-05, "loss": 0.4087, "step": 7619, "teacher_loss": 0.37330079078674316 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.5973576307296753, "learning_rate": 2.9915099730539223e-05, "loss": 0.361, "step": 7620, "teacher_loss": 0.33478158712387085 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.23257100582122803, "learning_rate": 2.991485825063018e-05, "loss": 0.21, "step": 7621, "teacher_loss": 0.20745518803596497 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.9342533349990845, "learning_rate": 2.9914616428768153e-05, "loss": 0.6189, "step": 7622, "teacher_loss": 0.5838311910629272 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.6175295114517212, "learning_rate": 2.9914374264958684e-05, "loss": 0.2455, "step": 7623, "teacher_loss": 0.20417055487632751 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.2612594664096832, "learning_rate": 2.991413175920733e-05, "loss": 0.2373, "step": 7624, "teacher_loss": 0.23464033007621765 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.4282259941101074, "learning_rate": 2.991388891151965e-05, "loss": 0.2786, "step": 7625, "teacher_loss": 0.2619744837284088 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.3231649398803711, "learning_rate": 2.991364572190121e-05, "loss": 0.2784, "step": 7626, "teacher_loss": 0.27348142862319946 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.5406469106674194, "learning_rate": 2.9913402190357596e-05, "loss": 0.2933, "step": 7627, "teacher_loss": 0.2658522427082062 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.4537014067173004, "learning_rate": 2.9913158316894374e-05, "loss": 0.2408, "step": 7628, "teacher_loss": 0.21709023416042328 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.4893779754638672, "learning_rate": 2.9912914101517144e-05, "loss": 0.2604, "step": 7629, "teacher_loss": 0.23499667644500732 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.39155447483062744, "learning_rate": 2.9912669544231507e-05, "loss": 0.3032, "step": 7630, "teacher_loss": 0.2934088408946991 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.5394455194473267, "learning_rate": 2.9912424645043064e-05, "loss": 0.3402, "step": 7631, "teacher_loss": 0.3180971145629883 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.9320620894432068, "learning_rate": 2.991217940395744e-05, "loss": 0.3225, "step": 7632, "teacher_loss": 0.25479528307914734 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.4853166341781616, "learning_rate": 2.991193382098025e-05, "loss": 0.2637, "step": 7633, "teacher_loss": 0.239122211933136 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.3353040814399719, "learning_rate": 2.9911687896117126e-05, "loss": 0.3197, "step": 7634, "teacher_loss": 0.31792449951171875 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.5667581558227539, "learning_rate": 2.99114416293737e-05, "loss": 0.4046, "step": 7635, "teacher_loss": 0.38663744926452637 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.18207284808158875, "learning_rate": 2.991119502075563e-05, "loss": 0.2295, "step": 7636, "teacher_loss": 0.23474003374576569 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.4621998071670532, "learning_rate": 2.9910948070268562e-05, "loss": 0.3162, "step": 7637, "teacher_loss": 0.300016313791275 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.38535675406455994, "learning_rate": 2.991070077791816e-05, "loss": 0.2576, "step": 7638, "teacher_loss": 0.2433701455593109 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.49239856004714966, "learning_rate": 2.9910453143710096e-05, "loss": 0.2226, "step": 7639, "teacher_loss": 0.19262206554412842 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.27584561705589294, "learning_rate": 2.9910205167650044e-05, "loss": 0.1819, "step": 7640, "teacher_loss": 0.17148154973983765 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.41592133045196533, "learning_rate": 2.990995684974369e-05, "loss": 0.2771, "step": 7641, "teacher_loss": 0.2617039084434509 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.6000227332115173, "learning_rate": 2.9909708189996728e-05, "loss": 0.2176, "step": 7642, "teacher_loss": 0.17507179081439972 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.7403451800346375, "learning_rate": 2.990945918841486e-05, "loss": 0.3587, "step": 7643, "teacher_loss": 0.3163034915924072 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.3336305618286133, "learning_rate": 2.990920984500379e-05, "loss": 0.1946, "step": 7644, "teacher_loss": 0.1792076826095581 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 1.288038969039917, "learning_rate": 2.9908960159769243e-05, "loss": 0.7831, "step": 7645, "teacher_loss": 0.727039635181427 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.8299707770347595, "learning_rate": 2.990871013271694e-05, "loss": 0.3659, "step": 7646, "teacher_loss": 0.3143097162246704 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.4849017262458801, "learning_rate": 2.9908459763852605e-05, "loss": 0.8342, "step": 7647, "teacher_loss": 0.8730412721633911 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.3111902177333832, "learning_rate": 2.990820905318199e-05, "loss": 0.2294, "step": 7648, "teacher_loss": 0.220281183719635 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.24912330508232117, "learning_rate": 2.9907958000710838e-05, "loss": 0.2592, "step": 7649, "teacher_loss": 0.26032960414886475 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.20828185975551605, "learning_rate": 2.9907706606444905e-05, "loss": 0.2203, "step": 7650, "teacher_loss": 0.22168032824993134 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.5150014162063599, "learning_rate": 2.9907454870389957e-05, "loss": 0.2407, "step": 7651, "teacher_loss": 0.21016745269298553 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.1890479028224945, "learning_rate": 2.9907202792551764e-05, "loss": 0.2251, "step": 7652, "teacher_loss": 0.229139506816864 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.35701701045036316, "learning_rate": 2.99069503729361e-05, "loss": 0.2391, "step": 7653, "teacher_loss": 0.22603777050971985 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.4295775890350342, "learning_rate": 2.9906697611548767e-05, "loss": 0.248, "step": 7654, "teacher_loss": 0.22784534096717834 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.24752330780029297, "learning_rate": 2.9906444508395544e-05, "loss": 0.2023, "step": 7655, "teacher_loss": 0.19729755818843842 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.3397597074508667, "learning_rate": 2.990619106348224e-05, "loss": 0.222, "step": 7656, "teacher_loss": 0.20892798900604248 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.32617202401161194, "learning_rate": 2.9905937276814666e-05, "loss": 0.3033, "step": 7657, "teacher_loss": 0.3007524609565735 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.4927031099796295, "learning_rate": 2.9905683148398642e-05, "loss": 0.2285, "step": 7658, "teacher_loss": 0.19910714030265808 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.3975003659725189, "learning_rate": 2.990542867823999e-05, "loss": 0.2165, "step": 7659, "teacher_loss": 0.1964210420846939 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.5493260025978088, "learning_rate": 2.990517386634455e-05, "loss": 0.2391, "step": 7660, "teacher_loss": 0.20462894439697266 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.3311784565448761, "learning_rate": 2.990491871271816e-05, "loss": 0.2201, "step": 7661, "teacher_loss": 0.20771250128746033 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.9117849469184875, "learning_rate": 2.990466321736667e-05, "loss": 0.5034, "step": 7662, "teacher_loss": 0.4580279588699341 }, { "compression_loss": 0.0, "epoch": 1.38, "label_loss": 0.9144551157951355, "learning_rate": 2.990440738029594e-05, "loss": 0.657, "step": 7663, "teacher_loss": 0.628434956073761 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.11332852393388748, "learning_rate": 2.9904151201511835e-05, "loss": 0.1796, "step": 7664, "teacher_loss": 0.18698114156723022 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.8387352228164673, "learning_rate": 2.990389468102023e-05, "loss": 0.4767, "step": 7665, "teacher_loss": 0.4364364743232727 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.3453589081764221, "learning_rate": 2.9903637818827e-05, "loss": 0.1956, "step": 7666, "teacher_loss": 0.17895755171775818 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.4446118175983429, "learning_rate": 2.9903380614938047e-05, "loss": 0.3536, "step": 7667, "teacher_loss": 0.3434881865978241 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.6534728407859802, "learning_rate": 2.9903123069359247e-05, "loss": 0.2781, "step": 7668, "teacher_loss": 0.2363986372947693 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.8965345621109009, "learning_rate": 2.9902865182096524e-05, "loss": 0.5777, "step": 7669, "teacher_loss": 0.5422395467758179 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.31460219621658325, "learning_rate": 2.990260695315578e-05, "loss": 0.251, "step": 7670, "teacher_loss": 0.2439253032207489 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 1.4292893409729004, "learning_rate": 2.9902348382542943e-05, "loss": 0.4601, "step": 7671, "teacher_loss": 0.3523657023906708 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.812456488609314, "learning_rate": 2.9902089470263937e-05, "loss": 0.2741, "step": 7672, "teacher_loss": 0.21432796120643616 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.42403146624565125, "learning_rate": 2.9901830216324694e-05, "loss": 0.2066, "step": 7673, "teacher_loss": 0.1824251413345337 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.35832852125167847, "learning_rate": 2.9901570620731165e-05, "loss": 0.326, "step": 7674, "teacher_loss": 0.3224143087863922 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.39909595251083374, "learning_rate": 2.99013106834893e-05, "loss": 0.3171, "step": 7675, "teacher_loss": 0.3080254793167114 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.5603435039520264, "learning_rate": 2.9901050404605054e-05, "loss": 0.2804, "step": 7676, "teacher_loss": 0.24925857782363892 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.7439641952514648, "learning_rate": 2.9900789784084396e-05, "loss": 0.6849, "step": 7677, "teacher_loss": 0.6783853769302368 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.7180014252662659, "learning_rate": 2.9900528821933307e-05, "loss": 0.457, "step": 7678, "teacher_loss": 0.4279584288597107 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.5929282903671265, "learning_rate": 2.990026751815777e-05, "loss": 0.2533, "step": 7679, "teacher_loss": 0.21560589969158173 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.4413987994194031, "learning_rate": 2.9900005872763767e-05, "loss": 0.2843, "step": 7680, "teacher_loss": 0.2668389081954956 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.27870482206344604, "learning_rate": 2.9899743885757303e-05, "loss": 0.1888, "step": 7681, "teacher_loss": 0.1788131296634674 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.2997399568557739, "learning_rate": 2.9899481557144387e-05, "loss": 0.2916, "step": 7682, "teacher_loss": 0.29066556692123413 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.7859231233596802, "learning_rate": 2.9899218886931024e-05, "loss": 0.4196, "step": 7683, "teacher_loss": 0.37890440225601196 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.6184636354446411, "learning_rate": 2.9898955875123243e-05, "loss": 0.2992, "step": 7684, "teacher_loss": 0.26370954513549805 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.45271170139312744, "learning_rate": 2.9898692521727074e-05, "loss": 0.2663, "step": 7685, "teacher_loss": 0.24558836221694946 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 1.5589104890823364, "learning_rate": 2.9898428826748556e-05, "loss": 0.5338, "step": 7686, "teacher_loss": 0.4199133813381195 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.16332146525382996, "learning_rate": 2.989816479019373e-05, "loss": 0.2172, "step": 7687, "teacher_loss": 0.22313950955867767 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.6288355588912964, "learning_rate": 2.9897900412068658e-05, "loss": 0.3145, "step": 7688, "teacher_loss": 0.27959388494491577 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.10287728905677795, "learning_rate": 2.9897635692379387e-05, "loss": 0.1673, "step": 7689, "teacher_loss": 0.17445877194404602 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.45436355471611023, "learning_rate": 2.9897370631132002e-05, "loss": 0.5602, "step": 7690, "teacher_loss": 0.5719693899154663 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.20927652716636658, "learning_rate": 2.989710522833257e-05, "loss": 0.2672, "step": 7691, "teacher_loss": 0.27360397577285767 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.2832906246185303, "learning_rate": 2.9896839483987177e-05, "loss": 0.1737, "step": 7692, "teacher_loss": 0.1615416705608368 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.5547175407409668, "learning_rate": 2.9896573398101925e-05, "loss": 0.3049, "step": 7693, "teacher_loss": 0.2771707773208618 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.5389564037322998, "learning_rate": 2.98963069706829e-05, "loss": 0.3383, "step": 7694, "teacher_loss": 0.3159579038619995 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.42774468660354614, "learning_rate": 2.989604020173622e-05, "loss": 0.2761, "step": 7695, "teacher_loss": 0.2592783570289612 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.3508699834346771, "learning_rate": 2.9895773091268002e-05, "loss": 0.2075, "step": 7696, "teacher_loss": 0.1915394365787506 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.5646740198135376, "learning_rate": 2.989550563928436e-05, "loss": 0.3057, "step": 7697, "teacher_loss": 0.2768925428390503 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.16079141199588776, "learning_rate": 2.9895237845791437e-05, "loss": 0.2271, "step": 7698, "teacher_loss": 0.2344880998134613 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.20284727215766907, "learning_rate": 2.9894969710795368e-05, "loss": 0.1987, "step": 7699, "teacher_loss": 0.19820529222488403 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.40466055274009705, "learning_rate": 2.9894701234302303e-05, "loss": 0.3401, "step": 7700, "teacher_loss": 0.3329346776008606 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.38243868947029114, "learning_rate": 2.989443241631839e-05, "loss": 0.2912, "step": 7701, "teacher_loss": 0.2810543179512024 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.6176409721374512, "learning_rate": 2.9894163256849803e-05, "loss": 0.3587, "step": 7702, "teacher_loss": 0.3298988938331604 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.3861449360847473, "learning_rate": 2.9893893755902705e-05, "loss": 0.1971, "step": 7703, "teacher_loss": 0.1761438548564911 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.5779880285263062, "learning_rate": 2.9893623913483276e-05, "loss": 0.2482, "step": 7704, "teacher_loss": 0.21156755089759827 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.7626945972442627, "learning_rate": 2.9893353729597706e-05, "loss": 0.2963, "step": 7705, "teacher_loss": 0.2445230334997177 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.38619405031204224, "learning_rate": 2.9893083204252187e-05, "loss": 0.4251, "step": 7706, "teacher_loss": 0.42941296100616455 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.7126033306121826, "learning_rate": 2.9892812337452924e-05, "loss": 0.3314, "step": 7707, "teacher_loss": 0.289096862077713 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.3210299015045166, "learning_rate": 2.9892541129206122e-05, "loss": 0.2735, "step": 7708, "teacher_loss": 0.2682119309902191 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.5015177130699158, "learning_rate": 2.9892269579518005e-05, "loss": 0.2967, "step": 7709, "teacher_loss": 0.2739132046699524 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.6347278356552124, "learning_rate": 2.9891997688394792e-05, "loss": 0.2161, "step": 7710, "teacher_loss": 0.16954763233661652 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.40542662143707275, "learning_rate": 2.9891725455842726e-05, "loss": 0.317, "step": 7711, "teacher_loss": 0.30716565251350403 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.08896270394325256, "learning_rate": 2.989145288186804e-05, "loss": 0.1219, "step": 7712, "teacher_loss": 0.125535249710083 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 1.1812069416046143, "learning_rate": 2.989117996647699e-05, "loss": 0.3151, "step": 7713, "teacher_loss": 0.21887724101543427 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.6275110840797424, "learning_rate": 2.989090670967582e-05, "loss": 0.2753, "step": 7714, "teacher_loss": 0.23614096641540527 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.5886155366897583, "learning_rate": 2.9890633111470808e-05, "loss": 0.2371, "step": 7715, "teacher_loss": 0.19809550046920776 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.8504222631454468, "learning_rate": 2.9890359171868225e-05, "loss": 0.682, "step": 7716, "teacher_loss": 0.6633404493331909 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.2692478895187378, "learning_rate": 2.9890084890874353e-05, "loss": 0.2671, "step": 7717, "teacher_loss": 0.26688599586486816 }, { "compression_loss": 0.0, "epoch": 1.39, "label_loss": 0.28245532512664795, "learning_rate": 2.9889810268495472e-05, "loss": 0.2907, "step": 7718, "teacher_loss": 0.2916331887245178 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.5584731698036194, "learning_rate": 2.988953530473789e-05, "loss": 0.2336, "step": 7719, "teacher_loss": 0.19752509891986847 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.25852152705192566, "learning_rate": 2.9889259999607897e-05, "loss": 0.2292, "step": 7720, "teacher_loss": 0.22590261697769165 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.25501549243927, "learning_rate": 2.9888984353111814e-05, "loss": 0.2072, "step": 7721, "teacher_loss": 0.2019159495830536 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 1.220973253250122, "learning_rate": 2.9888708365255964e-05, "loss": 0.3519, "step": 7722, "teacher_loss": 0.2553603947162628 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.25047096610069275, "learning_rate": 2.9888432036046664e-05, "loss": 0.2812, "step": 7723, "teacher_loss": 0.2846137583255768 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.676315426826477, "learning_rate": 2.988815536549026e-05, "loss": 0.2486, "step": 7724, "teacher_loss": 0.2010425627231598 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.5959880352020264, "learning_rate": 2.9887878353593093e-05, "loss": 0.4573, "step": 7725, "teacher_loss": 0.44186708331108093 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.6098060607910156, "learning_rate": 2.9887601000361505e-05, "loss": 0.2973, "step": 7726, "teacher_loss": 0.26260310411453247 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.21846815943717957, "learning_rate": 2.9887323305801863e-05, "loss": 0.234, "step": 7727, "teacher_loss": 0.2356969267129898 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.7597025632858276, "learning_rate": 2.9887045269920533e-05, "loss": 0.262, "step": 7728, "teacher_loss": 0.20670972764492035 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.3945283889770508, "learning_rate": 2.9886766892723887e-05, "loss": 0.4478, "step": 7729, "teacher_loss": 0.45376574993133545 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.9825731515884399, "learning_rate": 2.988648817421831e-05, "loss": 0.4069, "step": 7730, "teacher_loss": 0.34298083186149597 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.36861345171928406, "learning_rate": 2.9886209114410194e-05, "loss": 0.2665, "step": 7731, "teacher_loss": 0.25520598888397217 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.2924313545227051, "learning_rate": 2.9885929713305927e-05, "loss": 0.3089, "step": 7732, "teacher_loss": 0.31077510118484497 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.5260310769081116, "learning_rate": 2.9885649970911934e-05, "loss": 0.3034, "step": 7733, "teacher_loss": 0.2786204218864441 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.25850313901901245, "learning_rate": 2.9885369887234603e-05, "loss": 0.1906, "step": 7734, "teacher_loss": 0.18305891752243042 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.6007351875305176, "learning_rate": 2.9885089462280377e-05, "loss": 0.2785, "step": 7735, "teacher_loss": 0.24267081916332245 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.6686475872993469, "learning_rate": 2.9884808696055675e-05, "loss": 0.256, "step": 7736, "teacher_loss": 0.2101626694202423 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.21142703294754028, "learning_rate": 2.988452758856694e-05, "loss": 0.2518, "step": 7737, "teacher_loss": 0.25626808404922485 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.5742628574371338, "learning_rate": 2.9884246139820613e-05, "loss": 0.4333, "step": 7738, "teacher_loss": 0.41760575771331787 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.8935059905052185, "learning_rate": 2.9883964349823142e-05, "loss": 0.3832, "step": 7739, "teacher_loss": 0.326495498418808 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.16041089594364166, "learning_rate": 2.9883682218580993e-05, "loss": 0.1812, "step": 7740, "teacher_loss": 0.18348428606987 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.5403831005096436, "learning_rate": 2.988339974610064e-05, "loss": 0.3149, "step": 7741, "teacher_loss": 0.2898145914077759 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.1856047809123993, "learning_rate": 2.988311693238855e-05, "loss": 0.2028, "step": 7742, "teacher_loss": 0.20466753840446472 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.7981098890304565, "learning_rate": 2.9882833777451215e-05, "loss": 0.5572, "step": 7743, "teacher_loss": 0.5304381847381592 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.18892623484134674, "learning_rate": 2.988255028129512e-05, "loss": 0.2584, "step": 7744, "teacher_loss": 0.2661294639110565 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.3756716549396515, "learning_rate": 2.9882266443926766e-05, "loss": 0.2908, "step": 7745, "teacher_loss": 0.28136372566223145 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.20233780145645142, "learning_rate": 2.9881982265352665e-05, "loss": 0.2132, "step": 7746, "teacher_loss": 0.21444067358970642 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.3564853072166443, "learning_rate": 2.9881697745579323e-05, "loss": 0.2937, "step": 7747, "teacher_loss": 0.2867465019226074 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.33353284001350403, "learning_rate": 2.9881412884613273e-05, "loss": 0.2546, "step": 7748, "teacher_loss": 0.24582603573799133 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.3640999495983124, "learning_rate": 2.988112768246104e-05, "loss": 0.3128, "step": 7749, "teacher_loss": 0.30711257457733154 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.3294028341770172, "learning_rate": 2.9880842139129168e-05, "loss": 0.2603, "step": 7750, "teacher_loss": 0.25257858633995056 }, { "epoch": 1.4, "eval_exact_match": 79.18637653736991, "eval_f1": 86.78283582499546, "step": 7750 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.5814988017082214, "learning_rate": 2.9880556254624202e-05, "loss": 0.3444, "step": 7751, "teacher_loss": 0.31810107827186584 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.5516996383666992, "learning_rate": 2.988027002895269e-05, "loss": 0.2156, "step": 7752, "teacher_loss": 0.17826640605926514 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.6024022102355957, "learning_rate": 2.987998346212121e-05, "loss": 0.3827, "step": 7753, "teacher_loss": 0.35823768377304077 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.47813841700553894, "learning_rate": 2.987969655413631e-05, "loss": 0.2159, "step": 7754, "teacher_loss": 0.18676459789276123 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.4733859598636627, "learning_rate": 2.9879409305004582e-05, "loss": 0.233, "step": 7755, "teacher_loss": 0.20633962750434875 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.40482449531555176, "learning_rate": 2.9879121714732612e-05, "loss": 0.2468, "step": 7756, "teacher_loss": 0.2292899489402771 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.17602097988128662, "learning_rate": 2.9878833783326996e-05, "loss": 0.1859, "step": 7757, "teacher_loss": 0.1870066225528717 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.41530680656433105, "learning_rate": 2.9878545510794323e-05, "loss": 0.3099, "step": 7758, "teacher_loss": 0.2982083261013031 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.4381055533885956, "learning_rate": 2.9878256897141215e-05, "loss": 0.2867, "step": 7759, "teacher_loss": 0.269903302192688 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.34129971265792847, "learning_rate": 2.987796794237428e-05, "loss": 0.3507, "step": 7760, "teacher_loss": 0.35173535346984863 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.4512649178504944, "learning_rate": 2.9877678646500143e-05, "loss": 0.3374, "step": 7761, "teacher_loss": 0.32472532987594604 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.7364459037780762, "learning_rate": 2.9877389009525447e-05, "loss": 0.37, "step": 7762, "teacher_loss": 0.32924115657806396 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.80064857006073, "learning_rate": 2.987709903145682e-05, "loss": 0.3103, "step": 7763, "teacher_loss": 0.2558046579360962 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.3515069782733917, "learning_rate": 2.987680871230092e-05, "loss": 0.1708, "step": 7764, "teacher_loss": 0.15071332454681396 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.5223360061645508, "learning_rate": 2.98765180520644e-05, "loss": 0.2561, "step": 7765, "teacher_loss": 0.22649329900741577 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.6189290881156921, "learning_rate": 2.987622705075392e-05, "loss": 0.2922, "step": 7766, "teacher_loss": 0.2558940052986145 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.6466519832611084, "learning_rate": 2.9875935708376156e-05, "loss": 0.2874, "step": 7767, "teacher_loss": 0.24752295017242432 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.493502676486969, "learning_rate": 2.9875644024937788e-05, "loss": 0.4489, "step": 7768, "teacher_loss": 0.4439082741737366 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.43044495582580566, "learning_rate": 2.9875352000445503e-05, "loss": 0.2629, "step": 7769, "teacher_loss": 0.24429334700107574 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.2896115183830261, "learning_rate": 2.9875059634905996e-05, "loss": 0.3539, "step": 7770, "teacher_loss": 0.3609997034072876 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.26232093572616577, "learning_rate": 2.987476692832596e-05, "loss": 0.1882, "step": 7771, "teacher_loss": 0.1799980252981186 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.923363447189331, "learning_rate": 2.9874473880712125e-05, "loss": 0.4743, "step": 7772, "teacher_loss": 0.42437034845352173 }, { "compression_loss": 0.0, "epoch": 1.4, "label_loss": 0.5126427412033081, "learning_rate": 2.98741804920712e-05, "loss": 0.3137, "step": 7773, "teacher_loss": 0.2916460633277893 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.904716432094574, "learning_rate": 2.9873886762409904e-05, "loss": 0.334, "step": 7774, "teacher_loss": 0.27061885595321655 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.5211755633354187, "learning_rate": 2.9873592691734985e-05, "loss": 0.277, "step": 7775, "teacher_loss": 0.24991083145141602 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.32271963357925415, "learning_rate": 2.9873298280053174e-05, "loss": 0.261, "step": 7776, "teacher_loss": 0.2540992796421051 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.4167831540107727, "learning_rate": 2.987300352737123e-05, "loss": 0.2455, "step": 7777, "teacher_loss": 0.22651216387748718 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.2187052071094513, "learning_rate": 2.9872708433695907e-05, "loss": 0.1978, "step": 7778, "teacher_loss": 0.19549311697483063 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.23581178486347198, "learning_rate": 2.9872412999033967e-05, "loss": 0.3212, "step": 7779, "teacher_loss": 0.33072429895401 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.2800215482711792, "learning_rate": 2.9872117223392188e-05, "loss": 0.2174, "step": 7780, "teacher_loss": 0.21039703488349915 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.7383527755737305, "learning_rate": 2.9871821106777354e-05, "loss": 0.3076, "step": 7781, "teacher_loss": 0.2597929537296295 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.3133199214935303, "learning_rate": 2.987152464919624e-05, "loss": 0.171, "step": 7782, "teacher_loss": 0.15519243478775024 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.7065719366073608, "learning_rate": 2.9871227850655663e-05, "loss": 0.3373, "step": 7783, "teacher_loss": 0.2962336838245392 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.3674553632736206, "learning_rate": 2.9870930711162413e-05, "loss": 0.3061, "step": 7784, "teacher_loss": 0.2993176579475403 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.5854167938232422, "learning_rate": 2.9870633230723313e-05, "loss": 0.2903, "step": 7785, "teacher_loss": 0.25753286480903625 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.6971392035484314, "learning_rate": 2.987033540934517e-05, "loss": 0.8198, "step": 7786, "teacher_loss": 0.8333829641342163 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.4101884365081787, "learning_rate": 2.9870037247034823e-05, "loss": 0.2339, "step": 7787, "teacher_loss": 0.2142728716135025 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.2499900460243225, "learning_rate": 2.9869738743799103e-05, "loss": 0.3565, "step": 7788, "teacher_loss": 0.3682914972305298 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.38142943382263184, "learning_rate": 2.9869439899644856e-05, "loss": 0.3019, "step": 7789, "teacher_loss": 0.29301655292510986 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.290536493062973, "learning_rate": 2.9869140714578934e-05, "loss": 0.2271, "step": 7790, "teacher_loss": 0.2200368046760559 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.8257572054862976, "learning_rate": 2.9868841188608196e-05, "loss": 0.645, "step": 7791, "teacher_loss": 0.6249566078186035 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.6436138153076172, "learning_rate": 2.9868541321739508e-05, "loss": 0.4424, "step": 7792, "teacher_loss": 0.42009642720222473 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.8447271585464478, "learning_rate": 2.9868241113979744e-05, "loss": 0.3683, "step": 7793, "teacher_loss": 0.3154064416885376 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.41758328676223755, "learning_rate": 2.9867940565335788e-05, "loss": 0.2529, "step": 7794, "teacher_loss": 0.23465704917907715 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.5974828004837036, "learning_rate": 2.9867639675814532e-05, "loss": 0.4327, "step": 7795, "teacher_loss": 0.41433846950531006 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.41767778992652893, "learning_rate": 2.9867338445422875e-05, "loss": 0.3602, "step": 7796, "teacher_loss": 0.35381531715393066 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.3344104290008545, "learning_rate": 2.986703687416772e-05, "loss": 0.2141, "step": 7797, "teacher_loss": 0.2007756531238556 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.3764779567718506, "learning_rate": 2.986673496205599e-05, "loss": 0.2701, "step": 7798, "teacher_loss": 0.25832122564315796 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.9717959761619568, "learning_rate": 2.986643270909459e-05, "loss": 0.3898, "step": 7799, "teacher_loss": 0.32515549659729004 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.45768409967422485, "learning_rate": 2.9866130115290468e-05, "loss": 0.2276, "step": 7800, "teacher_loss": 0.2020527422428131 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.5572268962860107, "learning_rate": 2.986582718065055e-05, "loss": 0.2723, "step": 7801, "teacher_loss": 0.24064376950263977 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.6474494934082031, "learning_rate": 2.9865523905181786e-05, "loss": 0.3292, "step": 7802, "teacher_loss": 0.2938896417617798 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.6471384763717651, "learning_rate": 2.9865220288891125e-05, "loss": 0.2702, "step": 7803, "teacher_loss": 0.22832253575325012 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.7434492111206055, "learning_rate": 2.9864916331785533e-05, "loss": 0.4546, "step": 7804, "teacher_loss": 0.4225079417228699 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.6822161674499512, "learning_rate": 2.986461203387198e-05, "loss": 0.4149, "step": 7805, "teacher_loss": 0.385237455368042 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.4707350432872772, "learning_rate": 2.9864307395157435e-05, "loss": 0.268, "step": 7806, "teacher_loss": 0.2454976737499237 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.3236408829689026, "learning_rate": 2.986400241564889e-05, "loss": 0.2494, "step": 7807, "teacher_loss": 0.2411423772573471 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.2281387746334076, "learning_rate": 2.986369709535333e-05, "loss": 0.1985, "step": 7808, "teacher_loss": 0.19520670175552368 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.390608012676239, "learning_rate": 2.986339143427776e-05, "loss": 0.2949, "step": 7809, "teacher_loss": 0.2843020558357239 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.2657642364501953, "learning_rate": 2.9863085432429193e-05, "loss": 0.2524, "step": 7810, "teacher_loss": 0.25096291303634644 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.37380653619766235, "learning_rate": 2.986277908981463e-05, "loss": 0.2607, "step": 7811, "teacher_loss": 0.24807780981063843 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.6848816275596619, "learning_rate": 2.986247240644111e-05, "loss": 0.3601, "step": 7812, "teacher_loss": 0.3239631652832031 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.3223942816257477, "learning_rate": 2.9862165382315657e-05, "loss": 0.2079, "step": 7813, "teacher_loss": 0.19520601630210876 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.636390745639801, "learning_rate": 2.986185801744531e-05, "loss": 0.292, "step": 7814, "teacher_loss": 0.2536846995353699 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.6240343451499939, "learning_rate": 2.9861550311837116e-05, "loss": 0.2582, "step": 7815, "teacher_loss": 0.21750250458717346 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.7771161794662476, "learning_rate": 2.9861242265498133e-05, "loss": 0.3767, "step": 7816, "teacher_loss": 0.33225274085998535 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.5588755011558533, "learning_rate": 2.986093387843542e-05, "loss": 0.3765, "step": 7817, "teacher_loss": 0.3562135696411133 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.3828737139701843, "learning_rate": 2.9860625150656043e-05, "loss": 0.2498, "step": 7818, "teacher_loss": 0.2350327968597412 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.5769387483596802, "learning_rate": 2.9860316082167095e-05, "loss": 0.2832, "step": 7819, "teacher_loss": 0.25057876110076904 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.2763219475746155, "learning_rate": 2.9860006672975647e-05, "loss": 0.283, "step": 7820, "teacher_loss": 0.28378212451934814 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.24730199575424194, "learning_rate": 2.9859696923088802e-05, "loss": 0.2396, "step": 7821, "teacher_loss": 0.23874951899051666 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.29271623492240906, "learning_rate": 2.9859386832513656e-05, "loss": 0.1817, "step": 7822, "teacher_loss": 0.16936182975769043 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.5235801935195923, "learning_rate": 2.985907640125732e-05, "loss": 0.3151, "step": 7823, "teacher_loss": 0.29196396470069885 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.5830715298652649, "learning_rate": 2.9858765629326913e-05, "loss": 0.2402, "step": 7824, "teacher_loss": 0.20212921500205994 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.35706770420074463, "learning_rate": 2.985845451672956e-05, "loss": 0.3127, "step": 7825, "teacher_loss": 0.3078124225139618 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.28731000423431396, "learning_rate": 2.9858143063472392e-05, "loss": 0.2032, "step": 7826, "teacher_loss": 0.19384275376796722 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.8478162288665771, "learning_rate": 2.985783126956255e-05, "loss": 0.2882, "step": 7827, "teacher_loss": 0.22604668140411377 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.534031867980957, "learning_rate": 2.9857519135007184e-05, "loss": 0.5821, "step": 7828, "teacher_loss": 0.5874625444412231 }, { "compression_loss": 0.0, "epoch": 1.41, "label_loss": 0.35257723927497864, "learning_rate": 2.9857206659813447e-05, "loss": 0.2564, "step": 7829, "teacher_loss": 0.24576786160469055 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.30685386061668396, "learning_rate": 2.9856893843988507e-05, "loss": 0.2961, "step": 7830, "teacher_loss": 0.29489296674728394 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.44119954109191895, "learning_rate": 2.9856580687539537e-05, "loss": 0.3053, "step": 7831, "teacher_loss": 0.2902168333530426 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.6479071974754333, "learning_rate": 2.985626719047371e-05, "loss": 0.3109, "step": 7832, "teacher_loss": 0.27346375584602356 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.47327619791030884, "learning_rate": 2.9855953352798223e-05, "loss": 0.3112, "step": 7833, "teacher_loss": 0.2931399345397949 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.8096597194671631, "learning_rate": 2.9855639174520262e-05, "loss": 0.3847, "step": 7834, "teacher_loss": 0.33747875690460205 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.29229435324668884, "learning_rate": 2.9855324655647036e-05, "loss": 0.2022, "step": 7835, "teacher_loss": 0.19222742319107056 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.40433692932128906, "learning_rate": 2.9855009796185752e-05, "loss": 0.281, "step": 7836, "teacher_loss": 0.2672778367996216 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.6754491925239563, "learning_rate": 2.9854694596143632e-05, "loss": 0.4032, "step": 7837, "teacher_loss": 0.37299975752830505 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.17516176402568817, "learning_rate": 2.98543790555279e-05, "loss": 0.219, "step": 7838, "teacher_loss": 0.22389857470989227 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.570223331451416, "learning_rate": 2.98540631743458e-05, "loss": 0.2323, "step": 7839, "teacher_loss": 0.19474822282791138 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.4763471484184265, "learning_rate": 2.9853746952604556e-05, "loss": 0.2416, "step": 7840, "teacher_loss": 0.21551844477653503 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.45642226934432983, "learning_rate": 2.9853430390311434e-05, "loss": 0.2849, "step": 7841, "teacher_loss": 0.26583221554756165 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.7567011117935181, "learning_rate": 2.985311348747368e-05, "loss": 0.2952, "step": 7842, "teacher_loss": 0.2439035177230835 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.23512735962867737, "learning_rate": 2.9852796244098574e-05, "loss": 0.211, "step": 7843, "teacher_loss": 0.20826426148414612 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.5450301170349121, "learning_rate": 2.9852478660193375e-05, "loss": 0.239, "step": 7844, "teacher_loss": 0.20494484901428223 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.34329545497894287, "learning_rate": 2.9852160735765374e-05, "loss": 0.3377, "step": 7845, "teacher_loss": 0.33705127239227295 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.9214147925376892, "learning_rate": 2.9851842470821854e-05, "loss": 0.4034, "step": 7846, "teacher_loss": 0.3458458483219147 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.4630696773529053, "learning_rate": 2.9851523865370113e-05, "loss": 0.2512, "step": 7847, "teacher_loss": 0.22761225700378418 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.40053293108940125, "learning_rate": 2.9851204919417463e-05, "loss": 0.1915, "step": 7848, "teacher_loss": 0.16827738285064697 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.9158337116241455, "learning_rate": 2.9850885632971202e-05, "loss": 0.3364, "step": 7849, "teacher_loss": 0.27197355031967163 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.2564816474914551, "learning_rate": 2.985056600603867e-05, "loss": 0.3, "step": 7850, "teacher_loss": 0.30478712916374207 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.5917198657989502, "learning_rate": 2.9850246038627172e-05, "loss": 0.3044, "step": 7851, "teacher_loss": 0.2724647521972656 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.3412782549858093, "learning_rate": 2.9849925730744064e-05, "loss": 0.2767, "step": 7852, "teacher_loss": 0.2694881856441498 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.21820154786109924, "learning_rate": 2.9849605082396678e-05, "loss": 0.2904, "step": 7853, "teacher_loss": 0.2984505891799927 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.47932130098342896, "learning_rate": 2.984928409359237e-05, "loss": 0.3589, "step": 7854, "teacher_loss": 0.3455348610877991 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.4179157614707947, "learning_rate": 2.9848962764338497e-05, "loss": 0.3351, "step": 7855, "teacher_loss": 0.3259321451187134 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.3651089668273926, "learning_rate": 2.9848641094642423e-05, "loss": 0.2096, "step": 7856, "teacher_loss": 0.19228777289390564 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.5287426710128784, "learning_rate": 2.9848319084511535e-05, "loss": 0.4421, "step": 7857, "teacher_loss": 0.4324356019496918 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.2561258375644684, "learning_rate": 2.9847996733953204e-05, "loss": 0.1774, "step": 7858, "teacher_loss": 0.1686631143093109 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.47791463136672974, "learning_rate": 2.9847674042974828e-05, "loss": 0.2425, "step": 7859, "teacher_loss": 0.21635472774505615 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.4318338632583618, "learning_rate": 2.9847351011583796e-05, "loss": 0.233, "step": 7860, "teacher_loss": 0.21094997227191925 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.44735851883888245, "learning_rate": 2.9847027639787524e-05, "loss": 0.2076, "step": 7861, "teacher_loss": 0.1809990555047989 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.39480364322662354, "learning_rate": 2.984670392759342e-05, "loss": 0.2312, "step": 7862, "teacher_loss": 0.21304309368133545 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.4058953523635864, "learning_rate": 2.9846379875008906e-05, "loss": 0.2499, "step": 7863, "teacher_loss": 0.23261000216007233 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.48555833101272583, "learning_rate": 2.9846055482041418e-05, "loss": 0.3168, "step": 7864, "teacher_loss": 0.2980100214481354 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.4436887502670288, "learning_rate": 2.9845730748698385e-05, "loss": 0.2697, "step": 7865, "teacher_loss": 0.2504180669784546 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.5427103042602539, "learning_rate": 2.984540567498726e-05, "loss": 0.3442, "step": 7866, "teacher_loss": 0.322147011756897 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 1.2021923065185547, "learning_rate": 2.9845080260915484e-05, "loss": 0.5085, "step": 7867, "teacher_loss": 0.4314666986465454 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.5192672610282898, "learning_rate": 2.9844754506490534e-05, "loss": 0.3265, "step": 7868, "teacher_loss": 0.30503201484680176 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.22259517014026642, "learning_rate": 2.9844428411719864e-05, "loss": 0.2632, "step": 7869, "teacher_loss": 0.26768654584884644 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.7509715557098389, "learning_rate": 2.984410197661096e-05, "loss": 0.2806, "step": 7870, "teacher_loss": 0.2283553034067154 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.6299173831939697, "learning_rate": 2.9843775201171303e-05, "loss": 0.3198, "step": 7871, "teacher_loss": 0.2853153347969055 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.3831256926059723, "learning_rate": 2.9843448085408383e-05, "loss": 0.3328, "step": 7872, "teacher_loss": 0.3272198438644409 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.8916932344436646, "learning_rate": 2.9843120629329698e-05, "loss": 0.3202, "step": 7873, "teacher_loss": 0.2566462457180023 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.2449224889278412, "learning_rate": 2.9842792832942764e-05, "loss": 0.2767, "step": 7874, "teacher_loss": 0.28023597598075867 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.775907576084137, "learning_rate": 2.984246469625509e-05, "loss": 0.5577, "step": 7875, "teacher_loss": 0.5334750413894653 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.3195655345916748, "learning_rate": 2.98421362192742e-05, "loss": 0.2053, "step": 7876, "teacher_loss": 0.1925676017999649 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.3903654217720032, "learning_rate": 2.9841807402007622e-05, "loss": 0.2715, "step": 7877, "teacher_loss": 0.25829219818115234 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.48247915506362915, "learning_rate": 2.9841478244462906e-05, "loss": 0.3351, "step": 7878, "teacher_loss": 0.31874769926071167 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.779265820980072, "learning_rate": 2.984114874664759e-05, "loss": 0.3147, "step": 7879, "teacher_loss": 0.2630445063114166 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.6541645526885986, "learning_rate": 2.9840818908569223e-05, "loss": 0.2538, "step": 7880, "teacher_loss": 0.20933130383491516 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.26947706937789917, "learning_rate": 2.9840488730235378e-05, "loss": 0.228, "step": 7881, "teacher_loss": 0.22342851758003235 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.32322901487350464, "learning_rate": 2.9840158211653616e-05, "loss": 0.1679, "step": 7882, "teacher_loss": 0.150656059384346 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.5273176431655884, "learning_rate": 2.9839827352831522e-05, "loss": 0.272, "step": 7883, "teacher_loss": 0.24364562332630157 }, { "compression_loss": 0.0, "epoch": 1.42, "label_loss": 0.3925158381462097, "learning_rate": 2.983949615377668e-05, "loss": 0.2073, "step": 7884, "teacher_loss": 0.18673306703567505 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.3156322240829468, "learning_rate": 2.9839164614496686e-05, "loss": 0.2322, "step": 7885, "teacher_loss": 0.222874253988266 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.7113157510757446, "learning_rate": 2.9838832734999132e-05, "loss": 0.2776, "step": 7886, "teacher_loss": 0.22939878702163696 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.8962811827659607, "learning_rate": 2.9838500515291632e-05, "loss": 0.6153, "step": 7887, "teacher_loss": 0.584083616733551 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.25298309326171875, "learning_rate": 2.983816795538181e-05, "loss": 0.2162, "step": 7888, "teacher_loss": 0.21214696764945984 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.2882899343967438, "learning_rate": 2.9837835055277275e-05, "loss": 0.2064, "step": 7889, "teacher_loss": 0.19726887345314026 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.22818830609321594, "learning_rate": 2.983750181498567e-05, "loss": 0.2435, "step": 7890, "teacher_loss": 0.24517516791820526 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.43428054451942444, "learning_rate": 2.983716823451464e-05, "loss": 0.3154, "step": 7891, "teacher_loss": 0.30220216512680054 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 1.1805777549743652, "learning_rate": 2.983683431387182e-05, "loss": 0.4844, "step": 7892, "teacher_loss": 0.40702199935913086 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.46497368812561035, "learning_rate": 2.9836500053064874e-05, "loss": 0.2096, "step": 7893, "teacher_loss": 0.18124695122241974 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.337495356798172, "learning_rate": 2.9836165452101466e-05, "loss": 0.2125, "step": 7894, "teacher_loss": 0.19866225123405457 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.3568882346153259, "learning_rate": 2.9835830510989267e-05, "loss": 0.2708, "step": 7895, "teacher_loss": 0.26125368475914 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.3126126527786255, "learning_rate": 2.9835495229735948e-05, "loss": 0.2078, "step": 7896, "teacher_loss": 0.19613653421401978 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.5350156426429749, "learning_rate": 2.9835159608349207e-05, "loss": 0.3208, "step": 7897, "teacher_loss": 0.29700711369514465 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.4463285505771637, "learning_rate": 2.9834823646836735e-05, "loss": 0.3199, "step": 7898, "teacher_loss": 0.305867075920105 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.6754963397979736, "learning_rate": 2.983448734520623e-05, "loss": 0.2316, "step": 7899, "teacher_loss": 0.18229299783706665 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.24778440594673157, "learning_rate": 2.9834150703465406e-05, "loss": 0.1789, "step": 7900, "teacher_loss": 0.1712363213300705 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.4365927577018738, "learning_rate": 2.9833813721621985e-05, "loss": 0.2524, "step": 7901, "teacher_loss": 0.23193974792957306 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.4043685793876648, "learning_rate": 2.9833476399683686e-05, "loss": 0.3163, "step": 7902, "teacher_loss": 0.30646079778671265 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.6264365911483765, "learning_rate": 2.9833138737658254e-05, "loss": 0.4169, "step": 7903, "teacher_loss": 0.39363518357276917 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.554568886756897, "learning_rate": 2.9832800735553416e-05, "loss": 0.2534, "step": 7904, "teacher_loss": 0.2199532687664032 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.15444108843803406, "learning_rate": 2.9832462393376926e-05, "loss": 0.1933, "step": 7905, "teacher_loss": 0.1975642293691635 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.7915629148483276, "learning_rate": 2.9832123711136548e-05, "loss": 0.6131, "step": 7906, "teacher_loss": 0.5933099985122681 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.13332098722457886, "learning_rate": 2.9831784688840045e-05, "loss": 0.2188, "step": 7907, "teacher_loss": 0.22826996445655823 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.38115742802619934, "learning_rate": 2.983144532649518e-05, "loss": 0.3007, "step": 7908, "teacher_loss": 0.29181087017059326 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.8646666407585144, "learning_rate": 2.9831105624109746e-05, "loss": 0.3413, "step": 7909, "teacher_loss": 0.2831355035305023 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.6045562028884888, "learning_rate": 2.983076558169152e-05, "loss": 0.3748, "step": 7910, "teacher_loss": 0.34926527738571167 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.5086529850959778, "learning_rate": 2.983042519924831e-05, "loss": 0.2572, "step": 7911, "teacher_loss": 0.2292218655347824 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.7204261422157288, "learning_rate": 2.983008447678791e-05, "loss": 0.2769, "step": 7912, "teacher_loss": 0.2275742143392563 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.2638428509235382, "learning_rate": 2.982974341431814e-05, "loss": 0.3251, "step": 7913, "teacher_loss": 0.33185291290283203 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.22108659148216248, "learning_rate": 2.9829402011846814e-05, "loss": 0.2596, "step": 7914, "teacher_loss": 0.26391515135765076 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.3795558214187622, "learning_rate": 2.9829060269381762e-05, "loss": 0.237, "step": 7915, "teacher_loss": 0.221209317445755 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.2602407932281494, "learning_rate": 2.9828718186930817e-05, "loss": 0.1896, "step": 7916, "teacher_loss": 0.18179252743721008 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.48754292726516724, "learning_rate": 2.982837576450182e-05, "loss": 0.237, "step": 7917, "teacher_loss": 0.2091621607542038 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.22752819955348969, "learning_rate": 2.9828033002102624e-05, "loss": 0.1654, "step": 7918, "teacher_loss": 0.15851053595542908 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.5340484380722046, "learning_rate": 2.9827689899741093e-05, "loss": 0.3343, "step": 7919, "teacher_loss": 0.312160849571228 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.4123237431049347, "learning_rate": 2.9827346457425087e-05, "loss": 0.3289, "step": 7920, "teacher_loss": 0.3196706175804138 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.3904937505722046, "learning_rate": 2.9827002675162478e-05, "loss": 0.2062, "step": 7921, "teacher_loss": 0.18577435612678528 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.2816625237464905, "learning_rate": 2.9826658552961155e-05, "loss": 0.232, "step": 7922, "teacher_loss": 0.22650131583213806 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.7039488554000854, "learning_rate": 2.9826314090828997e-05, "loss": 0.2693, "step": 7923, "teacher_loss": 0.22104613482952118 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.5160956978797913, "learning_rate": 2.982596928877392e-05, "loss": 0.2125, "step": 7924, "teacher_loss": 0.17876680195331573 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.3103431761264801, "learning_rate": 2.9825624146803807e-05, "loss": 0.2249, "step": 7925, "teacher_loss": 0.2153695821762085 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.45536044239997864, "learning_rate": 2.9825278664926587e-05, "loss": 0.2721, "step": 7926, "teacher_loss": 0.25169819593429565 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.42669031023979187, "learning_rate": 2.9824932843150176e-05, "loss": 0.2933, "step": 7927, "teacher_loss": 0.2784522771835327 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.23641017079353333, "learning_rate": 2.9824586681482503e-05, "loss": 0.2418, "step": 7928, "teacher_loss": 0.24239802360534668 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.257524698972702, "learning_rate": 2.9824240179931503e-05, "loss": 0.2315, "step": 7929, "teacher_loss": 0.22863918542861938 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.9684820175170898, "learning_rate": 2.9823893338505118e-05, "loss": 0.2808, "step": 7930, "teacher_loss": 0.20437420904636383 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.44187670946121216, "learning_rate": 2.9823546157211303e-05, "loss": 0.2328, "step": 7931, "teacher_loss": 0.2095218002796173 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.22811956703662872, "learning_rate": 2.9823198636058023e-05, "loss": 0.2742, "step": 7932, "teacher_loss": 0.27927637100219727 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.8179506063461304, "learning_rate": 2.9822850775053238e-05, "loss": 0.8288, "step": 7933, "teacher_loss": 0.8300375938415527 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.3418976664543152, "learning_rate": 2.9822502574204926e-05, "loss": 0.2335, "step": 7934, "teacher_loss": 0.22149452567100525 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.8048443794250488, "learning_rate": 2.9822154033521073e-05, "loss": 0.3877, "step": 7935, "teacher_loss": 0.34130239486694336 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.3619652986526489, "learning_rate": 2.982180515300966e-05, "loss": 0.2452, "step": 7936, "teacher_loss": 0.23223735392093658 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.6818567514419556, "learning_rate": 2.9821455932678698e-05, "loss": 0.5385, "step": 7937, "teacher_loss": 0.5225313305854797 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.33271127939224243, "learning_rate": 2.9821106372536188e-05, "loss": 0.2595, "step": 7938, "teacher_loss": 0.2513582110404968 }, { "compression_loss": 0.0, "epoch": 1.43, "label_loss": 0.4131905138492584, "learning_rate": 2.982075647259014e-05, "loss": 0.1945, "step": 7939, "teacher_loss": 0.17024609446525574 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.27992552518844604, "learning_rate": 2.9820406232848588e-05, "loss": 0.2135, "step": 7940, "teacher_loss": 0.2061430811882019 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.12883847951889038, "learning_rate": 2.9820055653319554e-05, "loss": 0.2643, "step": 7941, "teacher_loss": 0.27935338020324707 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.42077189683914185, "learning_rate": 2.9819704734011074e-05, "loss": 0.3078, "step": 7942, "teacher_loss": 0.2952490448951721 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.4479777216911316, "learning_rate": 2.98193534749312e-05, "loss": 0.2641, "step": 7943, "teacher_loss": 0.24369552731513977 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.4536122977733612, "learning_rate": 2.9819001876087976e-05, "loss": 0.2769, "step": 7944, "teacher_loss": 0.2572559714317322 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.6863450407981873, "learning_rate": 2.9818649937489473e-05, "loss": 0.2883, "step": 7945, "teacher_loss": 0.24410021305084229 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.794481635093689, "learning_rate": 2.9818297659143754e-05, "loss": 0.3573, "step": 7946, "teacher_loss": 0.3086785078048706 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 1.0190421342849731, "learning_rate": 2.9817945041058897e-05, "loss": 0.6772, "step": 7947, "teacher_loss": 0.6392534971237183 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.7421325445175171, "learning_rate": 2.9817592083242988e-05, "loss": 0.4406, "step": 7948, "teacher_loss": 0.40705233812332153 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.5308027267456055, "learning_rate": 2.9817238785704117e-05, "loss": 0.2968, "step": 7949, "teacher_loss": 0.2708229422569275 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.38240405917167664, "learning_rate": 2.9816885148450382e-05, "loss": 0.3141, "step": 7950, "teacher_loss": 0.30646514892578125 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.2199104130268097, "learning_rate": 2.98165311714899e-05, "loss": 0.1864, "step": 7951, "teacher_loss": 0.18264034390449524 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.6833811402320862, "learning_rate": 2.9816176854830775e-05, "loss": 0.3225, "step": 7952, "teacher_loss": 0.28236451745033264 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.23944562673568726, "learning_rate": 2.981582219848114e-05, "loss": 0.192, "step": 7953, "teacher_loss": 0.18677642941474915 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.4406251311302185, "learning_rate": 2.981546720244912e-05, "loss": 0.3937, "step": 7954, "teacher_loss": 0.3884353041648865 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.4613821804523468, "learning_rate": 2.9815111866742857e-05, "loss": 0.3229, "step": 7955, "teacher_loss": 0.3075345754623413 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.6692880392074585, "learning_rate": 2.9814756191370497e-05, "loss": 0.2306, "step": 7956, "teacher_loss": 0.181819885969162 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.7955378293991089, "learning_rate": 2.981440017634019e-05, "loss": 0.2622, "step": 7957, "teacher_loss": 0.2028876692056656 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.47293224930763245, "learning_rate": 2.981404382166011e-05, "loss": 0.3613, "step": 7958, "teacher_loss": 0.34889277815818787 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.6163287162780762, "learning_rate": 2.9813687127338417e-05, "loss": 0.2774, "step": 7959, "teacher_loss": 0.2397189438343048 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.32954472303390503, "learning_rate": 2.981333009338329e-05, "loss": 0.2237, "step": 7960, "teacher_loss": 0.2118997424840927 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.7575712203979492, "learning_rate": 2.981297271980292e-05, "loss": 0.3376, "step": 7961, "teacher_loss": 0.290883868932724 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.37359052896499634, "learning_rate": 2.9812615006605492e-05, "loss": 0.2862, "step": 7962, "teacher_loss": 0.27650186419487 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.545821487903595, "learning_rate": 2.9812256953799216e-05, "loss": 0.21, "step": 7963, "teacher_loss": 0.17267107963562012 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.6322165727615356, "learning_rate": 2.98118985613923e-05, "loss": 0.5668, "step": 7964, "teacher_loss": 0.5595746040344238 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.7418209314346313, "learning_rate": 2.9811539829392955e-05, "loss": 0.4326, "step": 7965, "teacher_loss": 0.39820894598960876 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.3942718803882599, "learning_rate": 2.981118075780941e-05, "loss": 0.2968, "step": 7966, "teacher_loss": 0.2860025465488434 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.9183347225189209, "learning_rate": 2.9810821346649894e-05, "loss": 0.4469, "step": 7967, "teacher_loss": 0.39446988701820374 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.1843469887971878, "learning_rate": 2.9810461595922653e-05, "loss": 0.1771, "step": 7968, "teacher_loss": 0.1763322651386261 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.3399212062358856, "learning_rate": 2.9810101505635932e-05, "loss": 0.2654, "step": 7969, "teacher_loss": 0.2571086287498474 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.5177813172340393, "learning_rate": 2.9809741075797982e-05, "loss": 0.244, "step": 7970, "teacher_loss": 0.21362170577049255 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.5190725326538086, "learning_rate": 2.9809380306417074e-05, "loss": 0.2214, "step": 7971, "teacher_loss": 0.18837712705135345 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.5994565486907959, "learning_rate": 2.9809019197501477e-05, "loss": 0.2474, "step": 7972, "teacher_loss": 0.2082386165857315 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 1.2282960414886475, "learning_rate": 2.9808657749059466e-05, "loss": 0.4175, "step": 7973, "teacher_loss": 0.3274487257003784 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.4580203890800476, "learning_rate": 2.9808295961099337e-05, "loss": 0.3755, "step": 7974, "teacher_loss": 0.3663333058357239 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.36321693658828735, "learning_rate": 2.9807933833629376e-05, "loss": 0.2814, "step": 7975, "teacher_loss": 0.2722950279712677 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.3472549021244049, "learning_rate": 2.980757136665789e-05, "loss": 0.265, "step": 7976, "teacher_loss": 0.2558647394180298 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.6636838316917419, "learning_rate": 2.9807208560193188e-05, "loss": 0.2673, "step": 7977, "teacher_loss": 0.22326692938804626 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.2975034713745117, "learning_rate": 2.9806845414243588e-05, "loss": 0.1932, "step": 7978, "teacher_loss": 0.1816408634185791 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.4268759489059448, "learning_rate": 2.9806481928817415e-05, "loss": 0.2559, "step": 7979, "teacher_loss": 0.23685908317565918 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.5061647891998291, "learning_rate": 2.9806118103923003e-05, "loss": 0.2773, "step": 7980, "teacher_loss": 0.25187763571739197 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.6764490604400635, "learning_rate": 2.980575393956869e-05, "loss": 0.4442, "step": 7981, "teacher_loss": 0.41834330558776855 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.17975282669067383, "learning_rate": 2.980538943576284e-05, "loss": 0.1594, "step": 7982, "teacher_loss": 0.1571287214756012 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.8709560632705688, "learning_rate": 2.9805024592513786e-05, "loss": 0.2641, "step": 7983, "teacher_loss": 0.19664371013641357 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.5154306888580322, "learning_rate": 2.9804659409829916e-05, "loss": 0.3812, "step": 7984, "teacher_loss": 0.366298645734787 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.3517243564128876, "learning_rate": 2.9804293887719588e-05, "loss": 0.2012, "step": 7985, "teacher_loss": 0.184498131275177 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.8047251105308533, "learning_rate": 2.9803928026191188e-05, "loss": 0.2782, "step": 7986, "teacher_loss": 0.21974410116672516 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.3863036334514618, "learning_rate": 2.98035618252531e-05, "loss": 0.2414, "step": 7987, "teacher_loss": 0.22527649998664856 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.605420708656311, "learning_rate": 2.980319528491373e-05, "loss": 0.3649, "step": 7988, "teacher_loss": 0.33816656470298767 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.4079616069793701, "learning_rate": 2.9802828405181468e-05, "loss": 0.2325, "step": 7989, "teacher_loss": 0.2129765748977661 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.6925249695777893, "learning_rate": 2.9802461186064734e-05, "loss": 0.2933, "step": 7990, "teacher_loss": 0.24895219504833221 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.3809935450553894, "learning_rate": 2.9802093627571943e-05, "loss": 0.3306, "step": 7991, "teacher_loss": 0.3249974548816681 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.47359800338745117, "learning_rate": 2.980172572971153e-05, "loss": 0.2452, "step": 7992, "teacher_loss": 0.21978314220905304 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.2826244831085205, "learning_rate": 2.980135749249192e-05, "loss": 0.2041, "step": 7993, "teacher_loss": 0.19532069563865662 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.5362200140953064, "learning_rate": 2.9800988915921553e-05, "loss": 0.2948, "step": 7994, "teacher_loss": 0.2680273652076721 }, { "compression_loss": 0.0, "epoch": 1.44, "label_loss": 0.2653762698173523, "learning_rate": 2.9800620000008896e-05, "loss": 0.1798, "step": 7995, "teacher_loss": 0.17032530903816223 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.5877405405044556, "learning_rate": 2.9800250744762392e-05, "loss": 0.3302, "step": 7996, "teacher_loss": 0.301611065864563 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.7703908681869507, "learning_rate": 2.9799881150190516e-05, "loss": 0.3748, "step": 7997, "teacher_loss": 0.3307967483997345 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.4675881862640381, "learning_rate": 2.9799511216301733e-05, "loss": 0.3589, "step": 7998, "teacher_loss": 0.3468117117881775 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.7271214723587036, "learning_rate": 2.979914094310453e-05, "loss": 0.2563, "step": 7999, "teacher_loss": 0.203986257314682 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.2982984781265259, "learning_rate": 2.9798770330607393e-05, "loss": 0.2335, "step": 8000, "teacher_loss": 0.2263200283050537 }, { "epoch": 1.45, "eval_exact_match": 79.29044465468307, "eval_f1": 86.85736418641653, "step": 8000 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.35950759053230286, "learning_rate": 2.9798399378818826e-05, "loss": 0.3886, "step": 8001, "teacher_loss": 0.39185500144958496 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.41313260793685913, "learning_rate": 2.9798028087747323e-05, "loss": 0.3724, "step": 8002, "teacher_loss": 0.36784258484840393 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.3457781970500946, "learning_rate": 2.979765645740141e-05, "loss": 0.291, "step": 8003, "teacher_loss": 0.28492817282676697 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.20942559838294983, "learning_rate": 2.9797284487789594e-05, "loss": 0.1923, "step": 8004, "teacher_loss": 0.190372496843338 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.16968312859535217, "learning_rate": 2.9796912178920415e-05, "loss": 0.1939, "step": 8005, "teacher_loss": 0.19657176733016968 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.8281893134117126, "learning_rate": 2.97965395308024e-05, "loss": 0.4514, "step": 8006, "teacher_loss": 0.4095839262008667 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.9052969217300415, "learning_rate": 2.97961665434441e-05, "loss": 0.3908, "step": 8007, "teacher_loss": 0.3336770534515381 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.2036474347114563, "learning_rate": 2.9795793216854056e-05, "loss": 0.2657, "step": 8008, "teacher_loss": 0.27257758378982544 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.3779197931289673, "learning_rate": 2.9795419551040836e-05, "loss": 0.3229, "step": 8009, "teacher_loss": 0.31682828068733215 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.49461743235588074, "learning_rate": 2.9795045546013005e-05, "loss": 0.2313, "step": 8010, "teacher_loss": 0.20204345881938934 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.526146411895752, "learning_rate": 2.979467120177914e-05, "loss": 0.2864, "step": 8011, "teacher_loss": 0.2597748041152954 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.15993425250053406, "learning_rate": 2.9794296518347815e-05, "loss": 0.1696, "step": 8012, "teacher_loss": 0.17064236104488373 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.26443496346473694, "learning_rate": 2.9793921495727632e-05, "loss": 0.305, "step": 8013, "teacher_loss": 0.30951380729675293 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.46183067560195923, "learning_rate": 2.979354613392718e-05, "loss": 0.2742, "step": 8014, "teacher_loss": 0.25333696603775024 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.4355916380882263, "learning_rate": 2.9793170432955073e-05, "loss": 0.2571, "step": 8015, "teacher_loss": 0.23730865120887756 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.3521590232849121, "learning_rate": 2.9792794392819916e-05, "loss": 0.309, "step": 8016, "teacher_loss": 0.3041537404060364 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.3283768594264984, "learning_rate": 2.9792418013530334e-05, "loss": 0.2285, "step": 8017, "teacher_loss": 0.21738946437835693 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.709922194480896, "learning_rate": 2.979204129509496e-05, "loss": 0.3488, "step": 8018, "teacher_loss": 0.30863460898399353 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.44409239292144775, "learning_rate": 2.9791664237522427e-05, "loss": 0.3341, "step": 8019, "teacher_loss": 0.3218342065811157 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.48729509115219116, "learning_rate": 2.979128684082138e-05, "loss": 0.2053, "step": 8020, "teacher_loss": 0.1739344298839569 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.699779212474823, "learning_rate": 2.9790909105000472e-05, "loss": 0.3144, "step": 8021, "teacher_loss": 0.27154403924942017 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.42885515093803406, "learning_rate": 2.9790531030068365e-05, "loss": 0.2706, "step": 8022, "teacher_loss": 0.2529875338077545 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.4884800910949707, "learning_rate": 2.9790152616033727e-05, "loss": 0.2615, "step": 8023, "teacher_loss": 0.23624378442764282 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.9685729146003723, "learning_rate": 2.9789773862905227e-05, "loss": 0.3838, "step": 8024, "teacher_loss": 0.31884828209877014 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.6472843885421753, "learning_rate": 2.9789394770691562e-05, "loss": 0.2591, "step": 8025, "teacher_loss": 0.2159425914287567 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.13992975652217865, "learning_rate": 2.9789015339401413e-05, "loss": 0.1855, "step": 8026, "teacher_loss": 0.19050821661949158 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.3546307682991028, "learning_rate": 2.978863556904348e-05, "loss": 0.3637, "step": 8027, "teacher_loss": 0.3646918833255768 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.4858000874519348, "learning_rate": 2.9788255459626472e-05, "loss": 0.2482, "step": 8028, "teacher_loss": 0.22182008624076843 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.19023367762565613, "learning_rate": 2.978787501115911e-05, "loss": 0.2932, "step": 8029, "teacher_loss": 0.3046276867389679 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.4192965030670166, "learning_rate": 2.97874942236501e-05, "loss": 0.3362, "step": 8030, "teacher_loss": 0.3269474506378174 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.37812745571136475, "learning_rate": 2.978711309710819e-05, "loss": 0.2376, "step": 8031, "teacher_loss": 0.2220381796360016 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 1.0065064430236816, "learning_rate": 2.978673163154211e-05, "loss": 0.3249, "step": 8032, "teacher_loss": 0.24918562173843384 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.8626583814620972, "learning_rate": 2.9786349826960605e-05, "loss": 0.4275, "step": 8033, "teacher_loss": 0.3791157305240631 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.7173159718513489, "learning_rate": 2.978596768337243e-05, "loss": 0.2839, "step": 8034, "teacher_loss": 0.23575535416603088 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.2525938153266907, "learning_rate": 2.978558520078635e-05, "loss": 0.2642, "step": 8035, "teacher_loss": 0.26552778482437134 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.19853433966636658, "learning_rate": 2.9785202379211124e-05, "loss": 0.1818, "step": 8036, "teacher_loss": 0.17988526821136475 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.5615202188491821, "learning_rate": 2.978481921865554e-05, "loss": 0.3308, "step": 8037, "teacher_loss": 0.3051198124885559 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.5207582712173462, "learning_rate": 2.9784435719128375e-05, "loss": 0.3242, "step": 8038, "teacher_loss": 0.30238497257232666 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.2839233875274658, "learning_rate": 2.978405188063843e-05, "loss": 0.2516, "step": 8039, "teacher_loss": 0.24800646305084229 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.40185031294822693, "learning_rate": 2.97836677031945e-05, "loss": 0.3105, "step": 8040, "teacher_loss": 0.3003424406051636 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.6808096766471863, "learning_rate": 2.978328318680539e-05, "loss": 0.3619, "step": 8041, "teacher_loss": 0.32644736766815186 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.7205904722213745, "learning_rate": 2.978289833147992e-05, "loss": 0.3355, "step": 8042, "teacher_loss": 0.29274511337280273 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.6800805330276489, "learning_rate": 2.9782513137226914e-05, "loss": 0.302, "step": 8043, "teacher_loss": 0.2600025534629822 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.15397977828979492, "learning_rate": 2.9782127604055205e-05, "loss": 0.2564, "step": 8044, "teacher_loss": 0.2677498757839203 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.3411473035812378, "learning_rate": 2.9781741731973627e-05, "loss": 0.2552, "step": 8045, "teacher_loss": 0.24565398693084717 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.5428531765937805, "learning_rate": 2.9781355520991026e-05, "loss": 0.2793, "step": 8046, "teacher_loss": 0.25003957748413086 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.30829495191574097, "learning_rate": 2.978096897111626e-05, "loss": 0.2833, "step": 8047, "teacher_loss": 0.2805267572402954 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.18221524357795715, "learning_rate": 2.9780582082358194e-05, "loss": 0.1702, "step": 8048, "teacher_loss": 0.16888168454170227 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.15790969133377075, "learning_rate": 2.9780194854725694e-05, "loss": 0.1699, "step": 8049, "teacher_loss": 0.17128121852874756 }, { "compression_loss": 0.0, "epoch": 1.45, "label_loss": 0.31405192613601685, "learning_rate": 2.9779807288227638e-05, "loss": 0.2155, "step": 8050, "teacher_loss": 0.20452788472175598 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.41470450162887573, "learning_rate": 2.977941938287292e-05, "loss": 0.213, "step": 8051, "teacher_loss": 0.1905650496482849 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.18562032282352448, "learning_rate": 2.977903113867042e-05, "loss": 0.1772, "step": 8052, "teacher_loss": 0.17628324031829834 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.25806769728660583, "learning_rate": 2.9778642555629044e-05, "loss": 0.2223, "step": 8053, "teacher_loss": 0.21837911009788513 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.37394577264785767, "learning_rate": 2.977825363375771e-05, "loss": 0.2505, "step": 8054, "teacher_loss": 0.2368081510066986 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.23483368754386902, "learning_rate": 2.977786437306532e-05, "loss": 0.1828, "step": 8055, "teacher_loss": 0.1770137995481491 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.9039406776428223, "learning_rate": 2.977747477356081e-05, "loss": 0.5028, "step": 8056, "teacher_loss": 0.45827025175094604 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.40942177176475525, "learning_rate": 2.9777084835253107e-05, "loss": 0.2495, "step": 8057, "teacher_loss": 0.23175999522209167 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.24341200292110443, "learning_rate": 2.9776694558151154e-05, "loss": 0.3094, "step": 8058, "teacher_loss": 0.3167455494403839 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.3338346481323242, "learning_rate": 2.97763039422639e-05, "loss": 0.181, "step": 8059, "teacher_loss": 0.16400977969169617 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.5458928942680359, "learning_rate": 2.9775912987600294e-05, "loss": 0.2939, "step": 8060, "teacher_loss": 0.26589053869247437 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.43793976306915283, "learning_rate": 2.9775521694169305e-05, "loss": 0.2587, "step": 8061, "teacher_loss": 0.23878324031829834 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.45369377732276917, "learning_rate": 2.9775130061979906e-05, "loss": 0.277, "step": 8062, "teacher_loss": 0.25732752680778503 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.28746169805526733, "learning_rate": 2.977473809104107e-05, "loss": 0.2161, "step": 8063, "teacher_loss": 0.2081322968006134 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.48836231231689453, "learning_rate": 2.9774345781361785e-05, "loss": 0.2358, "step": 8064, "teacher_loss": 0.2077031433582306 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.6063824892044067, "learning_rate": 2.977395313295105e-05, "loss": 0.2857, "step": 8065, "teacher_loss": 0.2500288188457489 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.9142793416976929, "learning_rate": 2.9773560145817863e-05, "loss": 0.4095, "step": 8066, "teacher_loss": 0.3534258008003235 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.241891011595726, "learning_rate": 2.9773166819971236e-05, "loss": 0.2059, "step": 8067, "teacher_loss": 0.20194904506206512 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.5905392169952393, "learning_rate": 2.9772773155420185e-05, "loss": 0.3083, "step": 8068, "teacher_loss": 0.2769727408885956 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.4622535705566406, "learning_rate": 2.9772379152173737e-05, "loss": 0.2504, "step": 8069, "teacher_loss": 0.2268151491880417 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.3854491710662842, "learning_rate": 2.977198481024092e-05, "loss": 0.3092, "step": 8070, "teacher_loss": 0.30074018239974976 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.3202867805957794, "learning_rate": 2.9771590129630787e-05, "loss": 0.2392, "step": 8071, "teacher_loss": 0.23018880188465118 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.3210011124610901, "learning_rate": 2.977119511035238e-05, "loss": 0.2447, "step": 8072, "teacher_loss": 0.2362537682056427 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.3466269373893738, "learning_rate": 2.9770799752414753e-05, "loss": 0.2107, "step": 8073, "teacher_loss": 0.19565042853355408 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.6781039237976074, "learning_rate": 2.9770404055826972e-05, "loss": 0.2954, "step": 8074, "teacher_loss": 0.2528509795665741 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.38710817694664, "learning_rate": 2.9770008020598113e-05, "loss": 0.4723, "step": 8075, "teacher_loss": 0.4817160367965698 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.5930625796318054, "learning_rate": 2.9769611646737252e-05, "loss": 0.27, "step": 8076, "teacher_loss": 0.2340492457151413 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.942062497138977, "learning_rate": 2.9769214934253476e-05, "loss": 0.5507, "step": 8077, "teacher_loss": 0.5071681141853333 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.3186991214752197, "learning_rate": 2.9768817883155882e-05, "loss": 0.2878, "step": 8078, "teacher_loss": 0.28431880474090576 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.5798431634902954, "learning_rate": 2.9768420493453574e-05, "loss": 0.3197, "step": 8079, "teacher_loss": 0.2908373773097992 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.5199610590934753, "learning_rate": 2.976802276515566e-05, "loss": 0.2087, "step": 8080, "teacher_loss": 0.17414042353630066 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.4656398892402649, "learning_rate": 2.9767624698271266e-05, "loss": 0.3194, "step": 8081, "teacher_loss": 0.3031614422798157 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.7253479957580566, "learning_rate": 2.9767226292809508e-05, "loss": 0.2893, "step": 8082, "teacher_loss": 0.24085929989814758 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.7313870191574097, "learning_rate": 2.9766827548779526e-05, "loss": 0.3463, "step": 8083, "teacher_loss": 0.30356746912002563 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.44095784425735474, "learning_rate": 2.976642846619046e-05, "loss": 0.3108, "step": 8084, "teacher_loss": 0.296287477016449 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.21467992663383484, "learning_rate": 2.976602904505147e-05, "loss": 0.1758, "step": 8085, "teacher_loss": 0.17149582505226135 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.5262818336486816, "learning_rate": 2.97656292853717e-05, "loss": 0.4367, "step": 8086, "teacher_loss": 0.42671999335289 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.2502528727054596, "learning_rate": 2.9765229187160316e-05, "loss": 0.236, "step": 8087, "teacher_loss": 0.23438510298728943 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.6763976812362671, "learning_rate": 2.97648287504265e-05, "loss": 0.3472, "step": 8088, "teacher_loss": 0.31064045429229736 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.18361902236938477, "learning_rate": 2.9764427975179423e-05, "loss": 0.1687, "step": 8089, "teacher_loss": 0.16703499853610992 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.4738154411315918, "learning_rate": 2.9764026861428282e-05, "loss": 0.3118, "step": 8090, "teacher_loss": 0.29374629259109497 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 1.1757898330688477, "learning_rate": 2.976362540918227e-05, "loss": 0.3953, "step": 8091, "teacher_loss": 0.30856889486312866 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.18476903438568115, "learning_rate": 2.976322361845059e-05, "loss": 0.2344, "step": 8092, "teacher_loss": 0.23991002142429352 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.7978906631469727, "learning_rate": 2.976282148924246e-05, "loss": 0.3944, "step": 8093, "teacher_loss": 0.34960877895355225 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.4356808662414551, "learning_rate": 2.976241902156709e-05, "loss": 0.629, "step": 8094, "teacher_loss": 0.6504708528518677 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.26538991928100586, "learning_rate": 2.976201621543371e-05, "loss": 0.2495, "step": 8095, "teacher_loss": 0.2477041780948639 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 1.0113760232925415, "learning_rate": 2.976161307085156e-05, "loss": 0.4517, "step": 8096, "teacher_loss": 0.38949209451675415 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.5069959163665771, "learning_rate": 2.9761209587829878e-05, "loss": 0.2981, "step": 8097, "teacher_loss": 0.2749285101890564 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.5862365365028381, "learning_rate": 2.9760805766377916e-05, "loss": 0.3175, "step": 8098, "teacher_loss": 0.28762370347976685 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.7402926683425903, "learning_rate": 2.9760401606504938e-05, "loss": 0.7622, "step": 8099, "teacher_loss": 0.7646390199661255 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.14903312921524048, "learning_rate": 2.9759997108220197e-05, "loss": 0.1663, "step": 8100, "teacher_loss": 0.16824749112129211 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.19166401028633118, "learning_rate": 2.9759592271532986e-05, "loss": 0.1904, "step": 8101, "teacher_loss": 0.19031283259391785 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.5603183507919312, "learning_rate": 2.9759187096452566e-05, "loss": 0.3008, "step": 8102, "teacher_loss": 0.2719833254814148 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.3085275888442993, "learning_rate": 2.975878158298824e-05, "loss": 0.1975, "step": 8103, "teacher_loss": 0.1851949691772461 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.7284263372421265, "learning_rate": 2.97583757311493e-05, "loss": 0.2791, "step": 8104, "teacher_loss": 0.22922734916210175 }, { "compression_loss": 0.0, "epoch": 1.46, "label_loss": 0.3495258390903473, "learning_rate": 2.9757969540945056e-05, "loss": 0.2671, "step": 8105, "teacher_loss": 0.257892906665802 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.2265910655260086, "learning_rate": 2.9757563012384814e-05, "loss": 0.2201, "step": 8106, "teacher_loss": 0.21936696767807007 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.5853474140167236, "learning_rate": 2.97571561454779e-05, "loss": 0.2899, "step": 8107, "teacher_loss": 0.25712692737579346 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.4107155203819275, "learning_rate": 2.9756748940233636e-05, "loss": 0.2454, "step": 8108, "teacher_loss": 0.22698035836219788 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.8423839807510376, "learning_rate": 2.9756341396661366e-05, "loss": 0.3601, "step": 8109, "teacher_loss": 0.3064751923084259 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.5393986701965332, "learning_rate": 2.9755933514770428e-05, "loss": 0.2565, "step": 8110, "teacher_loss": 0.22503072023391724 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 1.1462527513504028, "learning_rate": 2.9755525294570173e-05, "loss": 0.3114, "step": 8111, "teacher_loss": 0.21864129602909088 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.12924696505069733, "learning_rate": 2.9755116736069964e-05, "loss": 0.1881, "step": 8112, "teacher_loss": 0.19465124607086182 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.3674677610397339, "learning_rate": 2.9754707839279164e-05, "loss": 0.2111, "step": 8113, "teacher_loss": 0.19370847940444946 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.9885168075561523, "learning_rate": 2.9754298604207157e-05, "loss": 0.3503, "step": 8114, "teacher_loss": 0.27939724922180176 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.26287201046943665, "learning_rate": 2.9753889030863312e-05, "loss": 0.2467, "step": 8115, "teacher_loss": 0.24495232105255127 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.3919248580932617, "learning_rate": 2.975347911925703e-05, "loss": 0.275, "step": 8116, "teacher_loss": 0.26203691959381104 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.28359806537628174, "learning_rate": 2.97530688693977e-05, "loss": 0.2217, "step": 8117, "teacher_loss": 0.21486003696918488 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.35373297333717346, "learning_rate": 2.9752658281294735e-05, "loss": 0.2697, "step": 8118, "teacher_loss": 0.26036337018013 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.6003345847129822, "learning_rate": 2.975224735495755e-05, "loss": 0.3378, "step": 8119, "teacher_loss": 0.30861300230026245 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.26357048749923706, "learning_rate": 2.975183609039556e-05, "loss": 0.1683, "step": 8120, "teacher_loss": 0.15775799751281738 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.4701172709465027, "learning_rate": 2.9751424487618196e-05, "loss": 0.2958, "step": 8121, "teacher_loss": 0.2764098048210144 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.423725962638855, "learning_rate": 2.9751012546634898e-05, "loss": 0.3679, "step": 8122, "teacher_loss": 0.36166954040527344 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.37218284606933594, "learning_rate": 2.9750600267455104e-05, "loss": 0.1884, "step": 8123, "teacher_loss": 0.1679789125919342 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.28768423199653625, "learning_rate": 2.9750187650088276e-05, "loss": 0.2168, "step": 8124, "teacher_loss": 0.20887160301208496 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.6872828602790833, "learning_rate": 2.9749774694543862e-05, "loss": 0.327, "step": 8125, "teacher_loss": 0.2869381010532379 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.32114049792289734, "learning_rate": 2.9749361400831342e-05, "loss": 0.3068, "step": 8126, "teacher_loss": 0.30523765087127686 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.35265010595321655, "learning_rate": 2.9748947768960183e-05, "loss": 0.2821, "step": 8127, "teacher_loss": 0.27425771951675415 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.5579265356063843, "learning_rate": 2.9748533798939872e-05, "loss": 0.2598, "step": 8128, "teacher_loss": 0.22672465443611145 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.15026262402534485, "learning_rate": 2.9748119490779898e-05, "loss": 0.189, "step": 8129, "teacher_loss": 0.1933566927909851 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.4982910752296448, "learning_rate": 2.9747704844489762e-05, "loss": 0.2855, "step": 8130, "teacher_loss": 0.26184138655662537 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.29063814878463745, "learning_rate": 2.9747289860078975e-05, "loss": 0.3159, "step": 8131, "teacher_loss": 0.31875449419021606 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.5790538191795349, "learning_rate": 2.974687453755704e-05, "loss": 0.2737, "step": 8132, "teacher_loss": 0.23976552486419678 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.5124067068099976, "learning_rate": 2.9746458876933487e-05, "loss": 0.2397, "step": 8133, "teacher_loss": 0.20935088396072388 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.2579716444015503, "learning_rate": 2.9746042878217845e-05, "loss": 0.2058, "step": 8134, "teacher_loss": 0.20004823803901672 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.29261016845703125, "learning_rate": 2.9745626541419648e-05, "loss": 0.2903, "step": 8135, "teacher_loss": 0.2900722324848175 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.35633546113967896, "learning_rate": 2.974520986654845e-05, "loss": 0.1845, "step": 8136, "teacher_loss": 0.16537439823150635 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.33052337169647217, "learning_rate": 2.9744792853613792e-05, "loss": 0.2629, "step": 8137, "teacher_loss": 0.25535738468170166 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.24920472502708435, "learning_rate": 2.9744375502625242e-05, "loss": 0.2341, "step": 8138, "teacher_loss": 0.23240327835083008 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.423315167427063, "learning_rate": 2.974395781359237e-05, "loss": 0.39, "step": 8139, "teacher_loss": 0.3863159418106079 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.30177706480026245, "learning_rate": 2.9743539786524746e-05, "loss": 0.2877, "step": 8140, "teacher_loss": 0.2861484885215759 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.5094630718231201, "learning_rate": 2.9743121421431963e-05, "loss": 0.3482, "step": 8141, "teacher_loss": 0.3303123116493225 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.5499067306518555, "learning_rate": 2.9742702718323605e-05, "loss": 0.2394, "step": 8142, "teacher_loss": 0.20485320687294006 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.5517386794090271, "learning_rate": 2.9742283677209277e-05, "loss": 0.2111, "step": 8143, "teacher_loss": 0.17326796054840088 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.48349255323410034, "learning_rate": 2.9741864298098584e-05, "loss": 0.3237, "step": 8144, "teacher_loss": 0.3059951066970825 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.25946253538131714, "learning_rate": 2.9741444581001135e-05, "loss": 0.1857, "step": 8145, "teacher_loss": 0.17752626538276672 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.8467333316802979, "learning_rate": 2.9741024525926566e-05, "loss": 0.7973, "step": 8146, "teacher_loss": 0.7918380498886108 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.21400555968284607, "learning_rate": 2.97406041328845e-05, "loss": 0.1724, "step": 8147, "teacher_loss": 0.16774117946624756 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.8075433969497681, "learning_rate": 2.974018340188457e-05, "loss": 0.4414, "step": 8148, "teacher_loss": 0.4006918668746948 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.1271267980337143, "learning_rate": 2.9739762332936433e-05, "loss": 0.1604, "step": 8149, "teacher_loss": 0.16413989663124084 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.41097894310951233, "learning_rate": 2.9739340926049738e-05, "loss": 0.3176, "step": 8150, "teacher_loss": 0.30727115273475647 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.7365285158157349, "learning_rate": 2.9738919181234144e-05, "loss": 0.8734, "step": 8151, "teacher_loss": 0.8885741233825684 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.6470263600349426, "learning_rate": 2.9738497098499325e-05, "loss": 0.405, "step": 8152, "teacher_loss": 0.3781614303588867 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.470409631729126, "learning_rate": 2.9738074677854956e-05, "loss": 0.2903, "step": 8153, "teacher_loss": 0.2702961564064026 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.4736995995044708, "learning_rate": 2.9737651919310717e-05, "loss": 0.2373, "step": 8154, "teacher_loss": 0.21105676889419556 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.4125184118747711, "learning_rate": 2.973722882287631e-05, "loss": 0.2899, "step": 8155, "teacher_loss": 0.2762352228164673 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.6273418664932251, "learning_rate": 2.973680538856143e-05, "loss": 0.3912, "step": 8156, "teacher_loss": 0.3649166226387024 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.3285972476005554, "learning_rate": 2.9736381616375783e-05, "loss": 0.3322, "step": 8157, "teacher_loss": 0.3325648903846741 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.16893085837364197, "learning_rate": 2.973595750632909e-05, "loss": 0.2054, "step": 8158, "teacher_loss": 0.20939815044403076 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.3801194727420807, "learning_rate": 2.9735533058431074e-05, "loss": 0.3053, "step": 8159, "teacher_loss": 0.29703962802886963 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.3367338478565216, "learning_rate": 2.973510827269146e-05, "loss": 0.1613, "step": 8160, "teacher_loss": 0.14180812239646912 }, { "compression_loss": 0.0, "epoch": 1.47, "label_loss": 0.30335402488708496, "learning_rate": 2.973468314911999e-05, "loss": 0.1941, "step": 8161, "teacher_loss": 0.18200674653053284 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.3989580273628235, "learning_rate": 2.9734257687726416e-05, "loss": 0.246, "step": 8162, "teacher_loss": 0.22901788353919983 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.7661243677139282, "learning_rate": 2.973383188852049e-05, "loss": 0.4453, "step": 8163, "teacher_loss": 0.4096333980560303 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.3543039858341217, "learning_rate": 2.9733405751511974e-05, "loss": 0.3041, "step": 8164, "teacher_loss": 0.29856154322624207 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.722306489944458, "learning_rate": 2.973297927671063e-05, "loss": 0.304, "step": 8165, "teacher_loss": 0.2574799060821533 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.3803054392337799, "learning_rate": 2.973255246412625e-05, "loss": 0.145, "step": 8166, "teacher_loss": 0.11884692311286926 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.776046633720398, "learning_rate": 2.9732125313768608e-05, "loss": 0.283, "step": 8167, "teacher_loss": 0.22817113995552063 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.4325186014175415, "learning_rate": 2.9731697825647504e-05, "loss": 0.266, "step": 8168, "teacher_loss": 0.24755185842514038 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.14022308588027954, "learning_rate": 2.9731269999772737e-05, "loss": 0.1851, "step": 8169, "teacher_loss": 0.190038800239563 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.3932662010192871, "learning_rate": 2.9730841836154116e-05, "loss": 0.338, "step": 8170, "teacher_loss": 0.33188700675964355 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.5001851916313171, "learning_rate": 2.9730413334801456e-05, "loss": 0.2834, "step": 8171, "teacher_loss": 0.25927141308784485 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.6095743179321289, "learning_rate": 2.972998449572458e-05, "loss": 0.3354, "step": 8172, "teacher_loss": 0.3048837184906006 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.7760735154151917, "learning_rate": 2.9729555318933327e-05, "loss": 0.5297, "step": 8173, "teacher_loss": 0.502333402633667 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.4768903851509094, "learning_rate": 2.9729125804437532e-05, "loss": 0.2944, "step": 8174, "teacher_loss": 0.27413681149482727 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.39413008093833923, "learning_rate": 2.9728695952247035e-05, "loss": 0.257, "step": 8175, "teacher_loss": 0.24179138243198395 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.13885346055030823, "learning_rate": 2.9728265762371708e-05, "loss": 0.1566, "step": 8176, "teacher_loss": 0.15854905545711517 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.9335932731628418, "learning_rate": 2.97278352348214e-05, "loss": 0.4126, "step": 8177, "teacher_loss": 0.35468918085098267 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.6343482732772827, "learning_rate": 2.9727404369605985e-05, "loss": 0.2856, "step": 8178, "teacher_loss": 0.24679480493068695 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.2928655743598938, "learning_rate": 2.9726973166735347e-05, "loss": 0.1676, "step": 8179, "teacher_loss": 0.1536298543214798 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.2646465599536896, "learning_rate": 2.9726541626219365e-05, "loss": 0.2151, "step": 8180, "teacher_loss": 0.2095642387866974 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.216688334941864, "learning_rate": 2.9726109748067936e-05, "loss": 0.2873, "step": 8181, "teacher_loss": 0.29512155055999756 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.6062241792678833, "learning_rate": 2.9725677532290964e-05, "loss": 0.2437, "step": 8182, "teacher_loss": 0.20343399047851562 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.20176056027412415, "learning_rate": 2.9725244978898354e-05, "loss": 0.2332, "step": 8183, "teacher_loss": 0.23671194911003113 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.5101134777069092, "learning_rate": 2.9724812087900024e-05, "loss": 0.3013, "step": 8184, "teacher_loss": 0.278059184551239 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.6626385450363159, "learning_rate": 2.9724378859305905e-05, "loss": 0.4386, "step": 8185, "teacher_loss": 0.41375815868377686 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.34106212854385376, "learning_rate": 2.972394529312592e-05, "loss": 0.2231, "step": 8186, "teacher_loss": 0.20995593070983887 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.7625394463539124, "learning_rate": 2.9723511389370015e-05, "loss": 0.5808, "step": 8187, "teacher_loss": 0.5606253147125244 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.39818793535232544, "learning_rate": 2.9723077148048143e-05, "loss": 0.2098, "step": 8188, "teacher_loss": 0.18884649872779846 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.703392505645752, "learning_rate": 2.9722642569170244e-05, "loss": 0.2159, "step": 8189, "teacher_loss": 0.16175755858421326 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.2669304609298706, "learning_rate": 2.9722207652746297e-05, "loss": 0.1991, "step": 8190, "teacher_loss": 0.1916126012802124 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.4417065680027008, "learning_rate": 2.972177239878627e-05, "loss": 0.2649, "step": 8191, "teacher_loss": 0.2452913224697113 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.12884913384914398, "learning_rate": 2.9721336807300134e-05, "loss": 0.1785, "step": 8192, "teacher_loss": 0.18400683999061584 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.8325263261795044, "learning_rate": 2.9720900878297887e-05, "loss": 0.6302, "step": 8193, "teacher_loss": 0.6077010631561279 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.9836317896842957, "learning_rate": 2.972046461178952e-05, "loss": 0.366, "step": 8194, "teacher_loss": 0.29736989736557007 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.37794429063796997, "learning_rate": 2.972002800778503e-05, "loss": 0.2983, "step": 8195, "teacher_loss": 0.2894938588142395 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.539686381816864, "learning_rate": 2.971959106629443e-05, "loss": 0.2719, "step": 8196, "teacher_loss": 0.24218055605888367 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.2979428768157959, "learning_rate": 2.971915378732774e-05, "loss": 0.2508, "step": 8197, "teacher_loss": 0.2455427348613739 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.8227146863937378, "learning_rate": 2.9718716170894987e-05, "loss": 0.3418, "step": 8198, "teacher_loss": 0.28837132453918457 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.3547306954860687, "learning_rate": 2.97182782170062e-05, "loss": 0.2315, "step": 8199, "teacher_loss": 0.21777455508708954 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.3757646083831787, "learning_rate": 2.9717839925671414e-05, "loss": 0.3293, "step": 8200, "teacher_loss": 0.3241514265537262 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.6775994300842285, "learning_rate": 2.971740129690069e-05, "loss": 0.271, "step": 8201, "teacher_loss": 0.22585608065128326 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.693859338760376, "learning_rate": 2.971696233070408e-05, "loss": 0.3591, "step": 8202, "teacher_loss": 0.32188284397125244 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.4581086039543152, "learning_rate": 2.971652302709165e-05, "loss": 0.2462, "step": 8203, "teacher_loss": 0.22261425852775574 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.4841664135456085, "learning_rate": 2.9716083386073465e-05, "loss": 0.3256, "step": 8204, "teacher_loss": 0.3080095052719116 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.30393919348716736, "learning_rate": 2.971564340765961e-05, "loss": 0.2104, "step": 8205, "teacher_loss": 0.2000046968460083 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.2805207073688507, "learning_rate": 2.9715203091860172e-05, "loss": 0.1824, "step": 8206, "teacher_loss": 0.1715065836906433 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.3695563077926636, "learning_rate": 2.9714762438685246e-05, "loss": 0.2731, "step": 8207, "teacher_loss": 0.26233798265457153 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.6023653745651245, "learning_rate": 2.9714321448144934e-05, "loss": 0.4234, "step": 8208, "teacher_loss": 0.40354806184768677 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.6006008386611938, "learning_rate": 2.9713880120249346e-05, "loss": 0.4396, "step": 8209, "teacher_loss": 0.421748548746109 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.5437072515487671, "learning_rate": 2.9713438455008603e-05, "loss": 0.2636, "step": 8210, "teacher_loss": 0.23248618841171265 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.6870304346084595, "learning_rate": 2.9712996452432827e-05, "loss": 0.5096, "step": 8211, "teacher_loss": 0.48989659547805786 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.2999982237815857, "learning_rate": 2.9712554112532157e-05, "loss": 0.2418, "step": 8212, "teacher_loss": 0.23538298904895782 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.639070451259613, "learning_rate": 2.971211143531673e-05, "loss": 0.2903, "step": 8213, "teacher_loss": 0.2515646815299988 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.38200056552886963, "learning_rate": 2.97116684207967e-05, "loss": 0.2586, "step": 8214, "teacher_loss": 0.24486488103866577 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.21200186014175415, "learning_rate": 2.9711225068982217e-05, "loss": 0.1455, "step": 8215, "teacher_loss": 0.1380728930234909 }, { "compression_loss": 0.0, "epoch": 1.48, "label_loss": 0.5590644478797913, "learning_rate": 2.971078137988345e-05, "loss": 0.2253, "step": 8216, "teacher_loss": 0.18820244073867798 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.2474207580089569, "learning_rate": 2.9710337353510573e-05, "loss": 0.1786, "step": 8217, "teacher_loss": 0.17090237140655518 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.7600335478782654, "learning_rate": 2.9709892989873764e-05, "loss": 0.2962, "step": 8218, "teacher_loss": 0.2446936070919037 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.3686853051185608, "learning_rate": 2.970944828898321e-05, "loss": 0.318, "step": 8219, "teacher_loss": 0.31234419345855713 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.42755141854286194, "learning_rate": 2.970900325084911e-05, "loss": 0.2396, "step": 8220, "teacher_loss": 0.21868109703063965 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.31360459327697754, "learning_rate": 2.9708557875481665e-05, "loss": 0.2491, "step": 8221, "teacher_loss": 0.24194639921188354 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.4301757216453552, "learning_rate": 2.9708112162891084e-05, "loss": 0.3686, "step": 8222, "teacher_loss": 0.3617165982723236 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.8100927472114563, "learning_rate": 2.9707666113087593e-05, "loss": 1.001, "step": 8223, "teacher_loss": 1.0221672058105469 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.5292739868164062, "learning_rate": 2.970721972608141e-05, "loss": 0.3204, "step": 8224, "teacher_loss": 0.2972298860549927 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.8262772560119629, "learning_rate": 2.9706773001882772e-05, "loss": 0.3199, "step": 8225, "teacher_loss": 0.26363474130630493 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.3406209349632263, "learning_rate": 2.9706325940501927e-05, "loss": 0.1973, "step": 8226, "teacher_loss": 0.18135693669319153 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.7358806133270264, "learning_rate": 2.9705878541949118e-05, "loss": 0.3657, "step": 8227, "teacher_loss": 0.32459670305252075 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.8401494026184082, "learning_rate": 2.9705430806234603e-05, "loss": 0.4209, "step": 8228, "teacher_loss": 0.37436389923095703 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.18290647864341736, "learning_rate": 2.970498273336865e-05, "loss": 0.2203, "step": 8229, "teacher_loss": 0.22450795769691467 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.41541701555252075, "learning_rate": 2.970453432336153e-05, "loss": 0.2077, "step": 8230, "teacher_loss": 0.18463781476020813 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.766947865486145, "learning_rate": 2.970408557622353e-05, "loss": 0.5848, "step": 8231, "teacher_loss": 0.5645679235458374 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.7842399477958679, "learning_rate": 2.9703636491964925e-05, "loss": 0.3656, "step": 8232, "teacher_loss": 0.31905221939086914 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.2675301134586334, "learning_rate": 2.970318707059602e-05, "loss": 0.2196, "step": 8233, "teacher_loss": 0.21426743268966675 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.4327450096607208, "learning_rate": 2.970273731212712e-05, "loss": 0.2917, "step": 8234, "teacher_loss": 0.27598896622657776 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.38992100954055786, "learning_rate": 2.9702287216568537e-05, "loss": 0.3075, "step": 8235, "teacher_loss": 0.2983550727367401 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.6753556132316589, "learning_rate": 2.9701836783930584e-05, "loss": 0.2496, "step": 8236, "teacher_loss": 0.20231623947620392 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.9020670652389526, "learning_rate": 2.9701386014223592e-05, "loss": 0.325, "step": 8237, "teacher_loss": 0.26086899638175964 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.3401253819465637, "learning_rate": 2.9700934907457898e-05, "loss": 0.2299, "step": 8238, "teacher_loss": 0.21763408184051514 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.18903255462646484, "learning_rate": 2.9700483463643838e-05, "loss": 0.2491, "step": 8239, "teacher_loss": 0.2557646334171295 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.38801127672195435, "learning_rate": 2.970003168279177e-05, "loss": 0.2433, "step": 8240, "teacher_loss": 0.22721263766288757 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.5279019474983215, "learning_rate": 2.9699579564912048e-05, "loss": 0.2758, "step": 8241, "teacher_loss": 0.2477797120809555 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.4817383587360382, "learning_rate": 2.969912711001504e-05, "loss": 0.228, "step": 8242, "teacher_loss": 0.19977101683616638 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.544009804725647, "learning_rate": 2.9698674318111117e-05, "loss": 0.2635, "step": 8243, "teacher_loss": 0.23233303427696228 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.2647302448749542, "learning_rate": 2.969822118921066e-05, "loss": 0.2289, "step": 8244, "teacher_loss": 0.2248658835887909 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 1.121783971786499, "learning_rate": 2.9697767723324058e-05, "loss": 0.2478, "step": 8245, "teacher_loss": 0.1507301926612854 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.6131390333175659, "learning_rate": 2.9697313920461708e-05, "loss": 0.3032, "step": 8246, "teacher_loss": 0.26878905296325684 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.3602389395236969, "learning_rate": 2.9696859780634016e-05, "loss": 0.1962, "step": 8247, "teacher_loss": 0.1779191493988037 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.8005020022392273, "learning_rate": 2.9696405303851392e-05, "loss": 0.3767, "step": 8248, "teacher_loss": 0.32958823442459106 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.2803199589252472, "learning_rate": 2.9695950490124256e-05, "loss": 0.2144, "step": 8249, "teacher_loss": 0.20709499716758728 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 1.0980370044708252, "learning_rate": 2.9695495339463036e-05, "loss": 1.0336, "step": 8250, "teacher_loss": 1.0264418125152588 }, { "epoch": 1.49, "eval_exact_match": 79.13907284768212, "eval_f1": 86.71594304916012, "step": 8250 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.2720036804676056, "learning_rate": 2.969503985187817e-05, "loss": 0.219, "step": 8251, "teacher_loss": 0.21315529942512512 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.3008033037185669, "learning_rate": 2.9694584027380094e-05, "loss": 0.2526, "step": 8252, "teacher_loss": 0.2472873032093048 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.48980453610420227, "learning_rate": 2.9694127865979266e-05, "loss": 0.3265, "step": 8253, "teacher_loss": 0.3083362877368927 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.6193699836730957, "learning_rate": 2.969367136768614e-05, "loss": 0.3883, "step": 8254, "teacher_loss": 0.3626496493816376 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.6130605936050415, "learning_rate": 2.9693214532511183e-05, "loss": 0.365, "step": 8255, "teacher_loss": 0.33739152550697327 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.6840561628341675, "learning_rate": 2.969275736046487e-05, "loss": 0.3971, "step": 8256, "teacher_loss": 0.36526957154273987 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.5802739262580872, "learning_rate": 2.9692299851557686e-05, "loss": 0.2526, "step": 8257, "teacher_loss": 0.2161685675382614 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.11517634987831116, "learning_rate": 2.9691842005800113e-05, "loss": 0.1811, "step": 8258, "teacher_loss": 0.18841925263404846 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.5294514298439026, "learning_rate": 2.969138382320265e-05, "loss": 0.2351, "step": 8259, "teacher_loss": 0.20243753492832184 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.6079145669937134, "learning_rate": 2.9690925303775802e-05, "loss": 0.3257, "step": 8260, "teacher_loss": 0.2943679392337799 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.867706298828125, "learning_rate": 2.969046644753008e-05, "loss": 0.3728, "step": 8261, "teacher_loss": 0.31786417961120605 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.35639429092407227, "learning_rate": 2.9690007254476014e-05, "loss": 0.283, "step": 8262, "teacher_loss": 0.27480629086494446 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.8812310695648193, "learning_rate": 2.968954772462412e-05, "loss": 0.3739, "step": 8263, "teacher_loss": 0.31751763820648193 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.2417874038219452, "learning_rate": 2.9689087857984942e-05, "loss": 0.2625, "step": 8264, "teacher_loss": 0.2648100256919861 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.775303840637207, "learning_rate": 2.9688627654569013e-05, "loss": 0.312, "step": 8265, "teacher_loss": 0.2605516016483307 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.5896140336990356, "learning_rate": 2.9688167114386898e-05, "loss": 0.3523, "step": 8266, "teacher_loss": 0.3259405195713043 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.3817254900932312, "learning_rate": 2.9687706237449143e-05, "loss": 0.2645, "step": 8267, "teacher_loss": 0.2514649033546448 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.5383740663528442, "learning_rate": 2.968724502376632e-05, "loss": 0.2871, "step": 8268, "teacher_loss": 0.259149432182312 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.5733349919319153, "learning_rate": 2.9686783473349006e-05, "loss": 0.32, "step": 8269, "teacher_loss": 0.2918621599674225 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.52850741147995, "learning_rate": 2.968632158620778e-05, "loss": 0.2562, "step": 8270, "teacher_loss": 0.2259790599346161 }, { "compression_loss": 0.0, "epoch": 1.49, "label_loss": 0.29134970903396606, "learning_rate": 2.9685859362353235e-05, "loss": 0.2619, "step": 8271, "teacher_loss": 0.2586093544960022 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.19141846895217896, "learning_rate": 2.968539680179596e-05, "loss": 0.2085, "step": 8272, "teacher_loss": 0.2104235589504242 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.5316446423530579, "learning_rate": 2.9684933904546564e-05, "loss": 0.2964, "step": 8273, "teacher_loss": 0.27026551961898804 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.28756818175315857, "learning_rate": 2.9684470670615667e-05, "loss": 0.3136, "step": 8274, "teacher_loss": 0.316439151763916 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.4492422044277191, "learning_rate": 2.968400710001388e-05, "loss": 0.3842, "step": 8275, "teacher_loss": 0.3769516944885254 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.6005218029022217, "learning_rate": 2.9683543192751836e-05, "loss": 0.2991, "step": 8276, "teacher_loss": 0.2656573951244354 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.47699397802352905, "learning_rate": 2.9683078948840175e-05, "loss": 0.24, "step": 8277, "teacher_loss": 0.21366241574287415 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.5211970210075378, "learning_rate": 2.968261436828953e-05, "loss": 0.3075, "step": 8278, "teacher_loss": 0.28372249007225037 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.6410287618637085, "learning_rate": 2.968214945111056e-05, "loss": 0.2762, "step": 8279, "teacher_loss": 0.2356693595647812 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.676201343536377, "learning_rate": 2.968168419731392e-05, "loss": 0.3573, "step": 8280, "teacher_loss": 0.3219008445739746 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.26739197969436646, "learning_rate": 2.9681218606910283e-05, "loss": 0.1846, "step": 8281, "teacher_loss": 0.17536070942878723 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.49219387769699097, "learning_rate": 2.968075267991032e-05, "loss": 0.3952, "step": 8282, "teacher_loss": 0.38438600301742554 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.21525005996227264, "learning_rate": 2.968028641632471e-05, "loss": 0.2165, "step": 8283, "teacher_loss": 0.21665681898593903 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.27386558055877686, "learning_rate": 2.9679819816164148e-05, "loss": 0.2375, "step": 8284, "teacher_loss": 0.2335040271282196 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.4536552131175995, "learning_rate": 2.9679352879439328e-05, "loss": 0.2979, "step": 8285, "teacher_loss": 0.2805665135383606 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.2945255935192108, "learning_rate": 2.9678885606160962e-05, "loss": 0.2119, "step": 8286, "teacher_loss": 0.20273634791374207 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.3294672966003418, "learning_rate": 2.9678417996339757e-05, "loss": 0.2745, "step": 8287, "teacher_loss": 0.26834189891815186 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.40236687660217285, "learning_rate": 2.9677950049986432e-05, "loss": 0.2546, "step": 8288, "teacher_loss": 0.23822659254074097 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.9912687540054321, "learning_rate": 2.967748176711172e-05, "loss": 0.37, "step": 8289, "teacher_loss": 0.30093270540237427 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.4055962860584259, "learning_rate": 2.967701314772636e-05, "loss": 0.2984, "step": 8290, "teacher_loss": 0.2865150272846222 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.5629176497459412, "learning_rate": 2.9676544191841094e-05, "loss": 0.3079, "step": 8291, "teacher_loss": 0.2795999050140381 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.44685113430023193, "learning_rate": 2.9676074899466666e-05, "loss": 0.2393, "step": 8292, "teacher_loss": 0.21621333062648773 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.46933919191360474, "learning_rate": 2.9675605270613845e-05, "loss": 0.3014, "step": 8293, "teacher_loss": 0.28272271156311035 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.8725112676620483, "learning_rate": 2.9675135305293394e-05, "loss": 0.7238, "step": 8294, "teacher_loss": 0.7072983980178833 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.39136794209480286, "learning_rate": 2.967466500351609e-05, "loss": 0.2694, "step": 8295, "teacher_loss": 0.25588706135749817 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.6513285636901855, "learning_rate": 2.967419436529271e-05, "loss": 0.2742, "step": 8296, "teacher_loss": 0.23228520154953003 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.38436412811279297, "learning_rate": 2.9673723390634055e-05, "loss": 0.2534, "step": 8297, "teacher_loss": 0.23882606625556946 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.4502195715904236, "learning_rate": 2.9673252079550917e-05, "loss": 0.5762, "step": 8298, "teacher_loss": 0.5901873707771301 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.5212042331695557, "learning_rate": 2.9672780432054095e-05, "loss": 0.2499, "step": 8299, "teacher_loss": 0.21971049904823303 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.7054161429405212, "learning_rate": 2.9672308448154415e-05, "loss": 0.3212, "step": 8300, "teacher_loss": 0.278546005487442 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.3220503032207489, "learning_rate": 2.967183612786269e-05, "loss": 0.2564, "step": 8301, "teacher_loss": 0.24906158447265625 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.35697615146636963, "learning_rate": 2.967136347118975e-05, "loss": 0.2163, "step": 8302, "teacher_loss": 0.20071209967136383 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.7200969457626343, "learning_rate": 2.967089047814643e-05, "loss": 0.3181, "step": 8303, "teacher_loss": 0.273413747549057 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.44575589895248413, "learning_rate": 2.9670417148743586e-05, "loss": 0.2111, "step": 8304, "teacher_loss": 0.1850753277540207 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.7856945991516113, "learning_rate": 2.966994348299205e-05, "loss": 0.403, "step": 8305, "teacher_loss": 0.36050018668174744 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.3099052309989929, "learning_rate": 2.9669469480902698e-05, "loss": 0.2279, "step": 8306, "teacher_loss": 0.21882228553295135 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.35392987728118896, "learning_rate": 2.966899514248639e-05, "loss": 0.2893, "step": 8307, "teacher_loss": 0.28210657835006714 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.30253785848617554, "learning_rate": 2.966852046775401e-05, "loss": 0.2339, "step": 8308, "teacher_loss": 0.22630849480628967 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.29873746633529663, "learning_rate": 2.966804545671643e-05, "loss": 0.2763, "step": 8309, "teacher_loss": 0.27378422021865845 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.7076412439346313, "learning_rate": 2.9667570109384546e-05, "loss": 0.2791, "step": 8310, "teacher_loss": 0.23151251673698425 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.8246315717697144, "learning_rate": 2.9667094425769252e-05, "loss": 0.2958, "step": 8311, "teacher_loss": 0.23702660202980042 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.8957650661468506, "learning_rate": 2.966661840588146e-05, "loss": 0.3596, "step": 8312, "teacher_loss": 0.29998862743377686 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.1701478660106659, "learning_rate": 2.9666142049732078e-05, "loss": 0.1866, "step": 8313, "teacher_loss": 0.18845608830451965 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.78484708070755, "learning_rate": 2.966566535733203e-05, "loss": 0.421, "step": 8314, "teacher_loss": 0.3805864155292511 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.19726255536079407, "learning_rate": 2.9665188328692245e-05, "loss": 0.2192, "step": 8315, "teacher_loss": 0.22168636322021484 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.4940336048603058, "learning_rate": 2.9664710963823664e-05, "loss": 0.2325, "step": 8316, "teacher_loss": 0.20341017842292786 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.44547832012176514, "learning_rate": 2.9664233262737223e-05, "loss": 0.2189, "step": 8317, "teacher_loss": 0.19375723600387573 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.2453044056892395, "learning_rate": 2.9663755225443883e-05, "loss": 0.1812, "step": 8318, "teacher_loss": 0.1740460991859436 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.2473374307155609, "learning_rate": 2.96632768519546e-05, "loss": 0.2587, "step": 8319, "teacher_loss": 0.2599189579486847 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.6977126002311707, "learning_rate": 2.9662798142280337e-05, "loss": 0.3048, "step": 8320, "teacher_loss": 0.2611473798751831 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.5293257832527161, "learning_rate": 2.966231909643208e-05, "loss": 0.2695, "step": 8321, "teacher_loss": 0.24065130949020386 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.1446615606546402, "learning_rate": 2.9661839714420803e-05, "loss": 0.1859, "step": 8322, "teacher_loss": 0.1905125081539154 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.5196651220321655, "learning_rate": 2.9661359996257498e-05, "loss": 0.3741, "step": 8323, "teacher_loss": 0.3579312264919281 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.3940209746360779, "learning_rate": 2.966087994195317e-05, "loss": 0.2549, "step": 8324, "teacher_loss": 0.2394266426563263 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.29434457421302795, "learning_rate": 2.9660399551518814e-05, "loss": 0.2261, "step": 8325, "teacher_loss": 0.21848604083061218 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.2795717716217041, "learning_rate": 2.965991882496546e-05, "loss": 0.2701, "step": 8326, "teacher_loss": 0.2690196633338928 }, { "compression_loss": 0.0, "epoch": 1.5, "label_loss": 0.2920002341270447, "learning_rate": 2.965943776230411e-05, "loss": 0.2613, "step": 8327, "teacher_loss": 0.2578977346420288 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5741807818412781, "learning_rate": 2.9658956363545807e-05, "loss": 0.3677, "step": 8328, "teacher_loss": 0.34473544359207153 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5791088342666626, "learning_rate": 2.9658474628701585e-05, "loss": 0.3515, "step": 8329, "teacher_loss": 0.3262072801589966 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.19352833926677704, "learning_rate": 2.965799255778249e-05, "loss": 0.2056, "step": 8330, "teacher_loss": 0.20698420703411102 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.2579488456249237, "learning_rate": 2.965751015079957e-05, "loss": 0.1972, "step": 8331, "teacher_loss": 0.1904098093509674 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.624376118183136, "learning_rate": 2.965702740776389e-05, "loss": 0.2744, "step": 8332, "teacher_loss": 0.23547488451004028 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5111129283905029, "learning_rate": 2.965654432868651e-05, "loss": 0.3857, "step": 8333, "teacher_loss": 0.3717600703239441 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.3470556139945984, "learning_rate": 2.9656060913578518e-05, "loss": 0.2462, "step": 8334, "teacher_loss": 0.23500967025756836 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5388896465301514, "learning_rate": 2.9655577162450987e-05, "loss": 0.4477, "step": 8335, "teacher_loss": 0.4375278949737549 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.40195631980895996, "learning_rate": 2.9655093075315013e-05, "loss": 0.2458, "step": 8336, "teacher_loss": 0.22844737768173218 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.37421172857284546, "learning_rate": 2.9654608652181692e-05, "loss": 0.1919, "step": 8337, "teacher_loss": 0.1716611534357071 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.4291038513183594, "learning_rate": 2.9654123893062128e-05, "loss": 0.2376, "step": 8338, "teacher_loss": 0.21633939445018768 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.6496659517288208, "learning_rate": 2.9653638797967443e-05, "loss": 0.3195, "step": 8339, "teacher_loss": 0.2828552722930908 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.4038991332054138, "learning_rate": 2.965315336690875e-05, "loss": 0.3394, "step": 8340, "teacher_loss": 0.33224791288375854 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5648466944694519, "learning_rate": 2.9652667599897188e-05, "loss": 0.2966, "step": 8341, "teacher_loss": 0.2668426036834717 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5127744674682617, "learning_rate": 2.9652181496943888e-05, "loss": 0.2889, "step": 8342, "teacher_loss": 0.26402172446250916 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.768584132194519, "learning_rate": 2.9651695058059994e-05, "loss": 0.4148, "step": 8343, "teacher_loss": 0.37549591064453125 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.47389882802963257, "learning_rate": 2.965120828325666e-05, "loss": 0.2331, "step": 8344, "teacher_loss": 0.2063218057155609 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.2705867290496826, "learning_rate": 2.9650721172545048e-05, "loss": 0.2408, "step": 8345, "teacher_loss": 0.2375270128250122 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5164929032325745, "learning_rate": 2.9650233725936323e-05, "loss": 0.3, "step": 8346, "teacher_loss": 0.27595674991607666 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.3479204773902893, "learning_rate": 2.964974594344167e-05, "loss": 0.3135, "step": 8347, "teacher_loss": 0.30964529514312744 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.44368255138397217, "learning_rate": 2.9649257825072256e-05, "loss": 0.2324, "step": 8348, "teacher_loss": 0.20887935161590576 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.3144627511501312, "learning_rate": 2.9648769370839283e-05, "loss": 0.3674, "step": 8349, "teacher_loss": 0.3732324242591858 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.6156946420669556, "learning_rate": 2.9648280580753945e-05, "loss": 0.2452, "step": 8350, "teacher_loss": 0.2040124386548996 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.17413514852523804, "learning_rate": 2.9647791454827456e-05, "loss": 0.2295, "step": 8351, "teacher_loss": 0.235652357339859 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.8269870281219482, "learning_rate": 2.9647301993071022e-05, "loss": 0.49, "step": 8352, "teacher_loss": 0.4525560736656189 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.6094679236412048, "learning_rate": 2.964681219549587e-05, "loss": 0.2795, "step": 8353, "teacher_loss": 0.24284467101097107 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.28075844049453735, "learning_rate": 2.9646322062113225e-05, "loss": 0.2488, "step": 8354, "teacher_loss": 0.24528086185455322 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5051464438438416, "learning_rate": 2.964583159293433e-05, "loss": 0.3247, "step": 8355, "teacher_loss": 0.3046274185180664 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.6898150444030762, "learning_rate": 2.9645340787970426e-05, "loss": 0.3297, "step": 8356, "teacher_loss": 0.2896538972854614 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5490769147872925, "learning_rate": 2.964484964723277e-05, "loss": 0.3293, "step": 8357, "teacher_loss": 0.30491000413894653 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.3549392819404602, "learning_rate": 2.964435817073261e-05, "loss": 0.2043, "step": 8358, "teacher_loss": 0.18755456805229187 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.20744769275188446, "learning_rate": 2.9643866358481236e-05, "loss": 0.2266, "step": 8359, "teacher_loss": 0.22875502705574036 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.47783827781677246, "learning_rate": 2.9643374210489904e-05, "loss": 0.2722, "step": 8360, "teacher_loss": 0.24934379756450653 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.6166070699691772, "learning_rate": 2.9642881726769903e-05, "loss": 0.2426, "step": 8361, "teacher_loss": 0.20106589794158936 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.2669008672237396, "learning_rate": 2.9642388907332525e-05, "loss": 0.1978, "step": 8362, "teacher_loss": 0.19013383984565735 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.3201882243156433, "learning_rate": 2.9641895752189074e-05, "loss": 0.1609, "step": 8363, "teacher_loss": 0.14325255155563354 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5220135450363159, "learning_rate": 2.964140226135085e-05, "loss": 0.2941, "step": 8364, "teacher_loss": 0.26881182193756104 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5362215042114258, "learning_rate": 2.964090843482917e-05, "loss": 0.3421, "step": 8365, "teacher_loss": 0.32055777311325073 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.47233107686042786, "learning_rate": 2.9640414272635352e-05, "loss": 0.2496, "step": 8366, "teacher_loss": 0.22484159469604492 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.33339595794677734, "learning_rate": 2.963991977478073e-05, "loss": 0.2078, "step": 8367, "teacher_loss": 0.19383983314037323 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.40793848037719727, "learning_rate": 2.9639424941276647e-05, "loss": 0.3011, "step": 8368, "teacher_loss": 0.2892279624938965 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.8922539949417114, "learning_rate": 2.9638929772134433e-05, "loss": 0.2839, "step": 8369, "teacher_loss": 0.21629515290260315 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.40253862738609314, "learning_rate": 2.9638434267365456e-05, "loss": 0.2682, "step": 8370, "teacher_loss": 0.2532370090484619 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.43802812695503235, "learning_rate": 2.9637938426981062e-05, "loss": 0.2464, "step": 8371, "teacher_loss": 0.22511237859725952 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5935195684432983, "learning_rate": 2.963744225099263e-05, "loss": 0.3076, "step": 8372, "teacher_loss": 0.27581462264060974 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.3677457273006439, "learning_rate": 2.9636945739411533e-05, "loss": 0.2266, "step": 8373, "teacher_loss": 0.21091234683990479 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.8131536841392517, "learning_rate": 2.9636448892249156e-05, "loss": 0.4475, "step": 8374, "teacher_loss": 0.40686312317848206 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.18671363592147827, "learning_rate": 2.963595170951689e-05, "loss": 0.178, "step": 8375, "teacher_loss": 0.17702318727970123 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.24322748184204102, "learning_rate": 2.9635454191226123e-05, "loss": 0.3136, "step": 8376, "teacher_loss": 0.321449339389801 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.21783912181854248, "learning_rate": 2.963495633738828e-05, "loss": 0.2514, "step": 8377, "teacher_loss": 0.25513410568237305 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.6475422978401184, "learning_rate": 2.963445814801476e-05, "loss": 0.319, "step": 8378, "teacher_loss": 0.2825337052345276 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.5407238006591797, "learning_rate": 2.9633959623116995e-05, "loss": 0.3031, "step": 8379, "teacher_loss": 0.276676744222641 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.4231138527393341, "learning_rate": 2.963346076270641e-05, "loss": 0.2607, "step": 8380, "teacher_loss": 0.24264803528785706 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.4908568263053894, "learning_rate": 2.9632961566794444e-05, "loss": 0.2757, "step": 8381, "teacher_loss": 0.2517714500427246 }, { "compression_loss": 0.0, "epoch": 1.51, "label_loss": 0.8232316970825195, "learning_rate": 2.9632462035392537e-05, "loss": 0.4056, "step": 8382, "teacher_loss": 0.3591885268688202 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.19213512539863586, "learning_rate": 2.9631962168512146e-05, "loss": 0.2697, "step": 8383, "teacher_loss": 0.2783271372318268 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.2417571246623993, "learning_rate": 2.9631461966164735e-05, "loss": 0.2084, "step": 8384, "teacher_loss": 0.2046608179807663 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.27906641364097595, "learning_rate": 2.9630961428361768e-05, "loss": 0.264, "step": 8385, "teacher_loss": 0.2623758018016815 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.3609544038772583, "learning_rate": 2.963046055511472e-05, "loss": 0.2347, "step": 8386, "teacher_loss": 0.22070875763893127 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.5492177605628967, "learning_rate": 2.9629959346435077e-05, "loss": 0.2443, "step": 8387, "teacher_loss": 0.21047580242156982 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.33009809255599976, "learning_rate": 2.962945780233433e-05, "loss": 0.2253, "step": 8388, "teacher_loss": 0.2136225700378418 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.4573343098163605, "learning_rate": 2.9628955922823972e-05, "loss": 0.2763, "step": 8389, "teacher_loss": 0.25623732805252075 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.5541173219680786, "learning_rate": 2.9628453707915518e-05, "loss": 0.3137, "step": 8390, "teacher_loss": 0.28696388006210327 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.6047319173812866, "learning_rate": 2.962795115762048e-05, "loss": 0.2803, "step": 8391, "teacher_loss": 0.24419817328453064 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.32866793870925903, "learning_rate": 2.962744827195038e-05, "loss": 0.2106, "step": 8392, "teacher_loss": 0.19745652377605438 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.5538060069084167, "learning_rate": 2.9626945050916745e-05, "loss": 0.2181, "step": 8393, "teacher_loss": 0.18079423904418945 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.3148215711116791, "learning_rate": 2.962644149453111e-05, "loss": 0.1793, "step": 8394, "teacher_loss": 0.1642608344554901 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.31869810819625854, "learning_rate": 2.962593760280503e-05, "loss": 0.1841, "step": 8395, "teacher_loss": 0.16911497712135315 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.5265036821365356, "learning_rate": 2.962543337575005e-05, "loss": 0.38, "step": 8396, "teacher_loss": 0.3637319505214691 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.3908854126930237, "learning_rate": 2.9624928813377724e-05, "loss": 0.2611, "step": 8397, "teacher_loss": 0.24672654271125793 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.28283023834228516, "learning_rate": 2.962442391569964e-05, "loss": 0.2506, "step": 8398, "teacher_loss": 0.24703805148601532 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.6269611120223999, "learning_rate": 2.9623918682727355e-05, "loss": 0.3314, "step": 8399, "teacher_loss": 0.2985045313835144 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.25144824385643005, "learning_rate": 2.9623413114472458e-05, "loss": 0.241, "step": 8400, "teacher_loss": 0.2398124784231186 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.19570410251617432, "learning_rate": 2.962290721094655e-05, "loss": 0.2497, "step": 8401, "teacher_loss": 0.25573664903640747 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.7429532408714294, "learning_rate": 2.9622400972161214e-05, "loss": 0.3798, "step": 8402, "teacher_loss": 0.3395037055015564 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.6496878862380981, "learning_rate": 2.9621894398128066e-05, "loss": 0.2914, "step": 8403, "teacher_loss": 0.2515791356563568 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.5783128142356873, "learning_rate": 2.9621387488858715e-05, "loss": 0.3202, "step": 8404, "teacher_loss": 0.29154735803604126 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.4161297678947449, "learning_rate": 2.9620880244364792e-05, "loss": 0.3433, "step": 8405, "teacher_loss": 0.3352524936199188 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.4336242079734802, "learning_rate": 2.9620372664657916e-05, "loss": 0.2697, "step": 8406, "teacher_loss": 0.25147441029548645 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.22214964032173157, "learning_rate": 2.961986474974973e-05, "loss": 0.1863, "step": 8407, "teacher_loss": 0.18227848410606384 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.12293720245361328, "learning_rate": 2.961935649965188e-05, "loss": 0.1328, "step": 8408, "teacher_loss": 0.13388575613498688 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.9924032092094421, "learning_rate": 2.9618847914376014e-05, "loss": 0.3607, "step": 8409, "teacher_loss": 0.29051482677459717 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.3416539430618286, "learning_rate": 2.96183389939338e-05, "loss": 0.2569, "step": 8410, "teacher_loss": 0.24749934673309326 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.4048881530761719, "learning_rate": 2.9617829738336894e-05, "loss": 0.3805, "step": 8411, "teacher_loss": 0.3777827024459839 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.4789392650127411, "learning_rate": 2.9617320147596982e-05, "loss": 0.2999, "step": 8412, "teacher_loss": 0.2800263464450836 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.4117765426635742, "learning_rate": 2.9616810221725743e-05, "loss": 0.2911, "step": 8413, "teacher_loss": 0.2777364253997803 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.3812519907951355, "learning_rate": 2.9616299960734873e-05, "loss": 0.2731, "step": 8414, "teacher_loss": 0.26104792952537537 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.7033828496932983, "learning_rate": 2.961578936463606e-05, "loss": 0.3322, "step": 8415, "teacher_loss": 0.29092758893966675 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.28939089179039, "learning_rate": 2.9615278433441024e-05, "loss": 0.2216, "step": 8416, "teacher_loss": 0.214043527841568 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.7313584089279175, "learning_rate": 2.9614767167161467e-05, "loss": 0.2734, "step": 8417, "teacher_loss": 0.22255347669124603 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.5938645601272583, "learning_rate": 2.9614255565809117e-05, "loss": 0.342, "step": 8418, "teacher_loss": 0.31401240825653076 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.4795907735824585, "learning_rate": 2.9613743629395707e-05, "loss": 0.3109, "step": 8419, "teacher_loss": 0.2921314835548401 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.9788291454315186, "learning_rate": 2.9613231357932967e-05, "loss": 0.3996, "step": 8420, "teacher_loss": 0.3352123498916626 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.6550199389457703, "learning_rate": 2.9612718751432646e-05, "loss": 0.355, "step": 8421, "teacher_loss": 0.32168450951576233 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.35970908403396606, "learning_rate": 2.9612205809906495e-05, "loss": 0.3148, "step": 8422, "teacher_loss": 0.3098025918006897 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.48537376523017883, "learning_rate": 2.961169253336627e-05, "loss": 0.3056, "step": 8423, "teacher_loss": 0.2856075167655945 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.6003162860870361, "learning_rate": 2.9611178921823746e-05, "loss": 0.2748, "step": 8424, "teacher_loss": 0.23865661025047302 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.6882692575454712, "learning_rate": 2.96106649752907e-05, "loss": 0.3216, "step": 8425, "teacher_loss": 0.2808833122253418 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.39154383540153503, "learning_rate": 2.9610150693778907e-05, "loss": 0.2728, "step": 8426, "teacher_loss": 0.25956955552101135 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.4956713914871216, "learning_rate": 2.9609636077300163e-05, "loss": 0.3618, "step": 8427, "teacher_loss": 0.3469092845916748 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.6076470613479614, "learning_rate": 2.960912112586627e-05, "loss": 0.2896, "step": 8428, "teacher_loss": 0.2542904019355774 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.6040419340133667, "learning_rate": 2.960860583948902e-05, "loss": 0.4315, "step": 8429, "teacher_loss": 0.4123265743255615 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.28677159547805786, "learning_rate": 2.9608090218180245e-05, "loss": 0.1555, "step": 8430, "teacher_loss": 0.14092107117176056 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.6993134021759033, "learning_rate": 2.9607574261951756e-05, "loss": 0.2685, "step": 8431, "teacher_loss": 0.22059330344200134 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.5418664216995239, "learning_rate": 2.9607057970815387e-05, "loss": 0.2752, "step": 8432, "teacher_loss": 0.24559202790260315 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.2492949366569519, "learning_rate": 2.960654134478297e-05, "loss": 0.2395, "step": 8433, "teacher_loss": 0.23839811980724335 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.34120193123817444, "learning_rate": 2.960602438386635e-05, "loss": 0.3824, "step": 8434, "teacher_loss": 0.38692623376846313 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.579842746257782, "learning_rate": 2.9605507088077385e-05, "loss": 0.3205, "step": 8435, "teacher_loss": 0.2916521430015564 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.4096500873565674, "learning_rate": 2.9604989457427934e-05, "loss": 0.366, "step": 8436, "teacher_loss": 0.3611446022987366 }, { "compression_loss": 0.0, "epoch": 1.52, "label_loss": 0.41288426518440247, "learning_rate": 2.9604471491929863e-05, "loss": 0.2535, "step": 8437, "teacher_loss": 0.23582306504249573 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.3749391436576843, "learning_rate": 2.9603953191595046e-05, "loss": 0.2738, "step": 8438, "teacher_loss": 0.26256895065307617 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.2884719967842102, "learning_rate": 2.9603434556435365e-05, "loss": 0.3065, "step": 8439, "teacher_loss": 0.30855679512023926 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.45339787006378174, "learning_rate": 2.9602915586462718e-05, "loss": 0.2386, "step": 8440, "teacher_loss": 0.21468129754066467 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.9239440560340881, "learning_rate": 2.9602396281688994e-05, "loss": 0.2757, "step": 8441, "teacher_loss": 0.203645259141922 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.4192727208137512, "learning_rate": 2.9601876642126105e-05, "loss": 0.2139, "step": 8442, "teacher_loss": 0.19105207920074463 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.8145973682403564, "learning_rate": 2.9601356667785963e-05, "loss": 0.4099, "step": 8443, "teacher_loss": 0.3649258613586426 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.30364200472831726, "learning_rate": 2.960083635868049e-05, "loss": 0.2535, "step": 8444, "teacher_loss": 0.2479751706123352 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.7078992128372192, "learning_rate": 2.9600315714821617e-05, "loss": 0.2779, "step": 8445, "teacher_loss": 0.23008784651756287 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.4667588472366333, "learning_rate": 2.9599794736221276e-05, "loss": 0.2413, "step": 8446, "teacher_loss": 0.21621307730674744 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.41662049293518066, "learning_rate": 2.9599273422891413e-05, "loss": 0.2877, "step": 8447, "teacher_loss": 0.27338707447052 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.4033508896827698, "learning_rate": 2.9598751774843986e-05, "loss": 0.2376, "step": 8448, "teacher_loss": 0.21923065185546875 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.8118084669113159, "learning_rate": 2.9598229792090948e-05, "loss": 0.3252, "step": 8449, "teacher_loss": 0.27113327383995056 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.6977510452270508, "learning_rate": 2.9597707474644265e-05, "loss": 0.2627, "step": 8450, "teacher_loss": 0.2143951952457428 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.11370211839675903, "learning_rate": 2.9597184822515915e-05, "loss": 0.1815, "step": 8451, "teacher_loss": 0.18902304768562317 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.29347214102745056, "learning_rate": 2.959666183571789e-05, "loss": 0.2508, "step": 8452, "teacher_loss": 0.2460668832063675 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.7737855911254883, "learning_rate": 2.9596138514262166e-05, "loss": 0.3101, "step": 8453, "teacher_loss": 0.2585286796092987 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.31305283308029175, "learning_rate": 2.959561485816075e-05, "loss": 0.2311, "step": 8454, "teacher_loss": 0.2220320701599121 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.2743084132671356, "learning_rate": 2.959509086742564e-05, "loss": 0.2088, "step": 8455, "teacher_loss": 0.20153355598449707 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.7727459669113159, "learning_rate": 2.9594566542068865e-05, "loss": 0.3668, "step": 8456, "teacher_loss": 0.3217070400714874 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.44734567403793335, "learning_rate": 2.9594041882102426e-05, "loss": 0.3409, "step": 8457, "teacher_loss": 0.3290509283542633 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.49452483654022217, "learning_rate": 2.9593516887538365e-05, "loss": 0.2615, "step": 8458, "teacher_loss": 0.23566311597824097 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.5902209877967834, "learning_rate": 2.9592991558388715e-05, "loss": 0.31, "step": 8459, "teacher_loss": 0.2788805365562439 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.5106395483016968, "learning_rate": 2.9592465894665526e-05, "loss": 0.4375, "step": 8460, "teacher_loss": 0.4294174611568451 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.2021930068731308, "learning_rate": 2.9591939896380836e-05, "loss": 0.2524, "step": 8461, "teacher_loss": 0.25796014070510864 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.3023834228515625, "learning_rate": 2.9591413563546722e-05, "loss": 0.2313, "step": 8462, "teacher_loss": 0.2233705222606659 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 1.266172170639038, "learning_rate": 2.9590886896175234e-05, "loss": 0.3023, "step": 8463, "teacher_loss": 0.19525475800037384 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.3610774874687195, "learning_rate": 2.9590359894278458e-05, "loss": 0.2568, "step": 8464, "teacher_loss": 0.24524801969528198 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.8323571681976318, "learning_rate": 2.9589832557868476e-05, "loss": 0.3361, "step": 8465, "teacher_loss": 0.28099486231803894 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.30585744976997375, "learning_rate": 2.9589304886957373e-05, "loss": 0.2587, "step": 8466, "teacher_loss": 0.25347477197647095 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.39869269728660583, "learning_rate": 2.9588776881557253e-05, "loss": 0.2135, "step": 8467, "teacher_loss": 0.1929522156715393 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.29899516701698303, "learning_rate": 2.9588248541680216e-05, "loss": 0.2565, "step": 8468, "teacher_loss": 0.2518104016780853 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.6047368049621582, "learning_rate": 2.958771986733838e-05, "loss": 0.2896, "step": 8469, "teacher_loss": 0.25462162494659424 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 1.1784284114837646, "learning_rate": 2.9587190858543864e-05, "loss": 0.3237, "step": 8470, "teacher_loss": 0.22872406244277954 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.7156506776809692, "learning_rate": 2.9586661515308793e-05, "loss": 0.3385, "step": 8471, "teacher_loss": 0.2965621054172516 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.11129346489906311, "learning_rate": 2.9586131837645308e-05, "loss": 0.1733, "step": 8472, "teacher_loss": 0.18018320202827454 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.39405733346939087, "learning_rate": 2.9585601825565554e-05, "loss": 0.3225, "step": 8473, "teacher_loss": 0.3145187497138977 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.47947466373443604, "learning_rate": 2.9585071479081675e-05, "loss": 0.2253, "step": 8474, "teacher_loss": 0.19707363843917847 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.2032071352005005, "learning_rate": 2.958454079820584e-05, "loss": 0.146, "step": 8475, "teacher_loss": 0.13969644904136658 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.30624833703041077, "learning_rate": 2.958400978295021e-05, "loss": 0.2666, "step": 8476, "teacher_loss": 0.2622242271900177 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 1.054046869277954, "learning_rate": 2.958347843332696e-05, "loss": 0.3451, "step": 8477, "teacher_loss": 0.2663172483444214 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.5247926115989685, "learning_rate": 2.9582946749348273e-05, "loss": 0.3117, "step": 8478, "teacher_loss": 0.2880529761314392 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.6808921098709106, "learning_rate": 2.958241473102634e-05, "loss": 0.3854, "step": 8479, "teacher_loss": 0.3525335490703583 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.2587246596813202, "learning_rate": 2.9581882378373354e-05, "loss": 0.2725, "step": 8480, "teacher_loss": 0.27407407760620117 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.43125879764556885, "learning_rate": 2.9581349691401527e-05, "loss": 0.2876, "step": 8481, "teacher_loss": 0.27168384194374084 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.6107392907142639, "learning_rate": 2.958081667012307e-05, "loss": 0.3078, "step": 8482, "teacher_loss": 0.27408719062805176 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.4485079050064087, "learning_rate": 2.95802833145502e-05, "loss": 0.2621, "step": 8483, "teacher_loss": 0.24143168330192566 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.4252525269985199, "learning_rate": 2.957974962469515e-05, "loss": 0.2547, "step": 8484, "teacher_loss": 0.2357458472251892 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 1.4223284721374512, "learning_rate": 2.9579215600570152e-05, "loss": 0.3845, "step": 8485, "teacher_loss": 0.2691500186920166 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.5256362557411194, "learning_rate": 2.9578681242187448e-05, "loss": 0.2397, "step": 8486, "teacher_loss": 0.20793354511260986 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.34112221002578735, "learning_rate": 2.9578146549559296e-05, "loss": 0.1687, "step": 8487, "teacher_loss": 0.14957356452941895 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.20230218768119812, "learning_rate": 2.9577611522697946e-05, "loss": 0.2023, "step": 8488, "teacher_loss": 0.20225536823272705 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.28243154287338257, "learning_rate": 2.9577076161615675e-05, "loss": 0.2195, "step": 8489, "teacher_loss": 0.21251662075519562 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.3175084888935089, "learning_rate": 2.9576540466324753e-05, "loss": 0.2757, "step": 8490, "teacher_loss": 0.27106496691703796 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.7194525599479675, "learning_rate": 2.957600443683746e-05, "loss": 0.2751, "step": 8491, "teacher_loss": 0.22571350634098053 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.3478257656097412, "learning_rate": 2.957546807316608e-05, "loss": 0.265, "step": 8492, "teacher_loss": 0.2557602524757385 }, { "compression_loss": 0.0, "epoch": 1.53, "label_loss": 0.37659671902656555, "learning_rate": 2.9574931375322927e-05, "loss": 0.293, "step": 8493, "teacher_loss": 0.28370219469070435 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.39596056938171387, "learning_rate": 2.9574394343320285e-05, "loss": 0.3361, "step": 8494, "teacher_loss": 0.32943981885910034 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.2551983594894409, "learning_rate": 2.9573856977170484e-05, "loss": 0.281, "step": 8495, "teacher_loss": 0.28387588262557983 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.22610679268836975, "learning_rate": 2.9573319276885837e-05, "loss": 0.1834, "step": 8496, "teacher_loss": 0.1787102222442627 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.5078145265579224, "learning_rate": 2.957278124247867e-05, "loss": 0.35, "step": 8497, "teacher_loss": 0.3324176073074341 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.9952711462974548, "learning_rate": 2.957224287396132e-05, "loss": 0.4073, "step": 8498, "teacher_loss": 0.34200337529182434 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.6877197027206421, "learning_rate": 2.957170417134613e-05, "loss": 0.3768, "step": 8499, "teacher_loss": 0.34229886531829834 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.37241944670677185, "learning_rate": 2.9571165134645455e-05, "loss": 0.2671, "step": 8500, "teacher_loss": 0.2553884983062744 }, { "epoch": 1.54, "eval_exact_match": 79.6972563859981, "eval_f1": 87.18427986193834, "step": 8500 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.7474992275238037, "learning_rate": 2.957062576387165e-05, "loss": 0.4629, "step": 8501, "teacher_loss": 0.4313093423843384 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.44652196764945984, "learning_rate": 2.9570086059037077e-05, "loss": 0.216, "step": 8502, "teacher_loss": 0.19041018187999725 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.25524282455444336, "learning_rate": 2.956954602015412e-05, "loss": 0.2797, "step": 8503, "teacher_loss": 0.28237634897232056 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.582446277141571, "learning_rate": 2.956900564723515e-05, "loss": 0.2829, "step": 8504, "teacher_loss": 0.24963778257369995 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.2064216136932373, "learning_rate": 2.956846494029256e-05, "loss": 0.1592, "step": 8505, "teacher_loss": 0.15399357676506042 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.3322484791278839, "learning_rate": 2.9567923899338748e-05, "loss": 0.2651, "step": 8506, "teacher_loss": 0.2576685845851898 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.6278872489929199, "learning_rate": 2.9567382524386122e-05, "loss": 0.2171, "step": 8507, "teacher_loss": 0.1714574247598648 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.17011165618896484, "learning_rate": 2.9566840815447088e-05, "loss": 0.1861, "step": 8508, "teacher_loss": 0.1878291368484497 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.5332197546958923, "learning_rate": 2.9566298772534065e-05, "loss": 0.2753, "step": 8509, "teacher_loss": 0.24668832123279572 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.28810805082321167, "learning_rate": 2.9565756395659485e-05, "loss": 0.3266, "step": 8510, "teacher_loss": 0.3309100866317749 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.3236047029495239, "learning_rate": 2.9565213684835782e-05, "loss": 0.2578, "step": 8511, "teacher_loss": 0.25046277046203613 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.6556010842323303, "learning_rate": 2.9564670640075395e-05, "loss": 0.299, "step": 8512, "teacher_loss": 0.25935590267181396 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.5609636902809143, "learning_rate": 2.956412726139078e-05, "loss": 0.4056, "step": 8513, "teacher_loss": 0.3883693218231201 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.6695277690887451, "learning_rate": 2.9563583548794394e-05, "loss": 0.2479, "step": 8514, "teacher_loss": 0.2010360062122345 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.9909501075744629, "learning_rate": 2.9563039502298698e-05, "loss": 0.3945, "step": 8515, "teacher_loss": 0.32819777727127075 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.543024480342865, "learning_rate": 2.956249512191617e-05, "loss": 0.2847, "step": 8516, "teacher_loss": 0.2560148239135742 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.5923643112182617, "learning_rate": 2.9561950407659287e-05, "loss": 0.2285, "step": 8517, "teacher_loss": 0.1880713403224945 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.3293316960334778, "learning_rate": 2.9561405359540544e-05, "loss": 0.3, "step": 8518, "teacher_loss": 0.29673877358436584 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.42155328392982483, "learning_rate": 2.956085997757243e-05, "loss": 0.2727, "step": 8519, "teacher_loss": 0.2562018036842346 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.22038277983665466, "learning_rate": 2.956031426176746e-05, "loss": 0.2658, "step": 8520, "teacher_loss": 0.27079641819000244 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 1.7243285179138184, "learning_rate": 2.9559768212138128e-05, "loss": 0.5099, "step": 8521, "teacher_loss": 0.3749206066131592 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.5286412239074707, "learning_rate": 2.9559221828696968e-05, "loss": 0.2426, "step": 8522, "teacher_loss": 0.2108011543750763 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.4789571762084961, "learning_rate": 2.9558675111456502e-05, "loss": 0.2901, "step": 8523, "teacher_loss": 0.26915454864501953 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.3270968496799469, "learning_rate": 2.9558128060429262e-05, "loss": 0.1653, "step": 8524, "teacher_loss": 0.14727026224136353 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.3637726902961731, "learning_rate": 2.95575806756278e-05, "loss": 0.2587, "step": 8525, "teacher_loss": 0.246974378824234 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.6164846420288086, "learning_rate": 2.9557032957064654e-05, "loss": 0.3489, "step": 8526, "teacher_loss": 0.31919431686401367 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.3185744881629944, "learning_rate": 2.9556484904752388e-05, "loss": 0.21, "step": 8527, "teacher_loss": 0.19795897603034973 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.35134902596473694, "learning_rate": 2.9555936518703564e-05, "loss": 0.2579, "step": 8528, "teacher_loss": 0.2475559115409851 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 1.3769758939743042, "learning_rate": 2.955538779893076e-05, "loss": 0.5702, "step": 8529, "teacher_loss": 0.4805184304714203 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.5651233792304993, "learning_rate": 2.9554838745446545e-05, "loss": 0.3449, "step": 8530, "teacher_loss": 0.32047462463378906 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 1.7869309186935425, "learning_rate": 2.9554289358263528e-05, "loss": 0.3986, "step": 8531, "teacher_loss": 0.2443714290857315 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.8655029535293579, "learning_rate": 2.9553739637394283e-05, "loss": 0.3367, "step": 8532, "teacher_loss": 0.2779577374458313 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.3091878890991211, "learning_rate": 2.955318958285142e-05, "loss": 0.2513, "step": 8533, "teacher_loss": 0.24482083320617676 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.3840416967868805, "learning_rate": 2.955263919464756e-05, "loss": 0.2422, "step": 8534, "teacher_loss": 0.22649219632148743 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.5083248615264893, "learning_rate": 2.9552088472795314e-05, "loss": 0.2554, "step": 8535, "teacher_loss": 0.2272791862487793 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.45723170042037964, "learning_rate": 2.9551537417307307e-05, "loss": 0.3238, "step": 8536, "teacher_loss": 0.30900412797927856 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.4478493332862854, "learning_rate": 2.955098602819617e-05, "loss": 0.2271, "step": 8537, "teacher_loss": 0.20261050760746002 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.30162131786346436, "learning_rate": 2.9550434305474556e-05, "loss": 0.2749, "step": 8538, "teacher_loss": 0.27192869782447815 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.7538201212882996, "learning_rate": 2.95498822491551e-05, "loss": 0.3638, "step": 8539, "teacher_loss": 0.3205137252807617 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.4037252962589264, "learning_rate": 2.9549329859250477e-05, "loss": 0.3005, "step": 8540, "teacher_loss": 0.28901785612106323 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.2924758791923523, "learning_rate": 2.9548777135773338e-05, "loss": 0.2976, "step": 8541, "teacher_loss": 0.29822444915771484 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.3618459105491638, "learning_rate": 2.9548224078736356e-05, "loss": 0.2886, "step": 8542, "teacher_loss": 0.28047671914100647 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.3465486466884613, "learning_rate": 2.9547670688152212e-05, "loss": 0.2236, "step": 8543, "teacher_loss": 0.20996011793613434 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.49834638833999634, "learning_rate": 2.9547116964033602e-05, "loss": 0.3097, "step": 8544, "teacher_loss": 0.2887871265411377 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.496936559677124, "learning_rate": 2.9546562906393208e-05, "loss": 0.3024, "step": 8545, "teacher_loss": 0.28076890110969543 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.5599715709686279, "learning_rate": 2.9546008515243745e-05, "loss": 0.2679, "step": 8546, "teacher_loss": 0.23540370166301727 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.8040772676467896, "learning_rate": 2.9545453790597915e-05, "loss": 0.5235, "step": 8547, "teacher_loss": 0.4922882318496704 }, { "compression_loss": 0.0, "epoch": 1.54, "label_loss": 0.5149890780448914, "learning_rate": 2.9544898732468438e-05, "loss": 0.2335, "step": 8548, "teacher_loss": 0.2022087424993515 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.6115249395370483, "learning_rate": 2.9544343340868042e-05, "loss": 0.3089, "step": 8549, "teacher_loss": 0.2752223312854767 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.7015501260757446, "learning_rate": 2.954378761580946e-05, "loss": 0.2938, "step": 8550, "teacher_loss": 0.24852949380874634 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.5815811157226562, "learning_rate": 2.9543231557305436e-05, "loss": 0.2919, "step": 8551, "teacher_loss": 0.259745717048645 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.22638264298439026, "learning_rate": 2.9542675165368708e-05, "loss": 0.1974, "step": 8552, "teacher_loss": 0.19422048330307007 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.2468913048505783, "learning_rate": 2.9542118440012043e-05, "loss": 0.1948, "step": 8553, "teacher_loss": 0.1889914572238922 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.7018353343009949, "learning_rate": 2.9541561381248203e-05, "loss": 0.2781, "step": 8554, "teacher_loss": 0.23104478418827057 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.6007068753242493, "learning_rate": 2.9541003989089956e-05, "loss": 0.2717, "step": 8555, "teacher_loss": 0.23517441749572754 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.5439329147338867, "learning_rate": 2.9540446263550085e-05, "loss": 0.2358, "step": 8556, "teacher_loss": 0.20154231786727905 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.8904017806053162, "learning_rate": 2.9539888204641377e-05, "loss": 0.3434, "step": 8557, "teacher_loss": 0.2826574146747589 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.3239039480686188, "learning_rate": 2.9539329812376624e-05, "loss": 0.2546, "step": 8558, "teacher_loss": 0.2469239979982376 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.3068544864654541, "learning_rate": 2.953877108676863e-05, "loss": 0.2504, "step": 8559, "teacher_loss": 0.24412289261817932 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.2580702304840088, "learning_rate": 2.9538212027830203e-05, "loss": 0.3587, "step": 8560, "teacher_loss": 0.36991024017333984 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.44764161109924316, "learning_rate": 2.9537652635574162e-05, "loss": 0.1909, "step": 8561, "teacher_loss": 0.16238421201705933 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.4043823778629303, "learning_rate": 2.9537092910013334e-05, "loss": 0.3002, "step": 8562, "teacher_loss": 0.2885870337486267 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.5871374607086182, "learning_rate": 2.953653285116055e-05, "loss": 0.3009, "step": 8563, "teacher_loss": 0.2691356837749481 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.26736289262771606, "learning_rate": 2.9535972459028648e-05, "loss": 0.27, "step": 8564, "teacher_loss": 0.2703286111354828 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.1722472906112671, "learning_rate": 2.953541173363048e-05, "loss": 0.2205, "step": 8565, "teacher_loss": 0.22588962316513062 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.4597053825855255, "learning_rate": 2.9534850674978903e-05, "loss": 0.2865, "step": 8566, "teacher_loss": 0.26726406812667847 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.2582359313964844, "learning_rate": 2.9534289283086776e-05, "loss": 0.2335, "step": 8567, "teacher_loss": 0.23078656196594238 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.24019068479537964, "learning_rate": 2.953372755796697e-05, "loss": 0.1791, "step": 8568, "teacher_loss": 0.17230644822120667 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.25019025802612305, "learning_rate": 2.9533165499632367e-05, "loss": 0.203, "step": 8569, "teacher_loss": 0.1977461278438568 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.5243014097213745, "learning_rate": 2.9532603108095855e-05, "loss": 0.25, "step": 8570, "teacher_loss": 0.21947431564331055 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.6994187235832214, "learning_rate": 2.953204038337032e-05, "loss": 0.4322, "step": 8571, "teacher_loss": 0.4024886190891266 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.458823025226593, "learning_rate": 2.953147732546867e-05, "loss": 0.4275, "step": 8572, "teacher_loss": 0.42399123311042786 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.5695011615753174, "learning_rate": 2.953091393440381e-05, "loss": 0.3098, "step": 8573, "teacher_loss": 0.28099048137664795 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.3701797127723694, "learning_rate": 2.9530350210188662e-05, "loss": 0.1993, "step": 8574, "teacher_loss": 0.1803111433982849 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.5282086730003357, "learning_rate": 2.952978615283615e-05, "loss": 0.243, "step": 8575, "teacher_loss": 0.21136143803596497 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.27152663469314575, "learning_rate": 2.9529221762359203e-05, "loss": 0.1898, "step": 8576, "teacher_loss": 0.18074044585227966 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.4015180468559265, "learning_rate": 2.9528657038770758e-05, "loss": 0.1714, "step": 8577, "teacher_loss": 0.1458512842655182 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 1.4023849964141846, "learning_rate": 2.952809198208377e-05, "loss": 0.4972, "step": 8578, "teacher_loss": 0.3966098725795746 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.15761852264404297, "learning_rate": 2.952752659231119e-05, "loss": 0.1654, "step": 8579, "teacher_loss": 0.16629338264465332 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.4133000373840332, "learning_rate": 2.952696086946598e-05, "loss": 0.3466, "step": 8580, "teacher_loss": 0.3392438292503357 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.42583203315734863, "learning_rate": 2.952639481356111e-05, "loss": 0.3473, "step": 8581, "teacher_loss": 0.3385217785835266 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.3133378028869629, "learning_rate": 2.952582842460956e-05, "loss": 0.2694, "step": 8582, "teacher_loss": 0.26452380418777466 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.3498862683773041, "learning_rate": 2.9525261702624316e-05, "loss": 0.1953, "step": 8583, "teacher_loss": 0.1781367063522339 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.6899369955062866, "learning_rate": 2.952469464761837e-05, "loss": 0.2645, "step": 8584, "teacher_loss": 0.21727266907691956 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.3239683508872986, "learning_rate": 2.9524127259604724e-05, "loss": 0.2319, "step": 8585, "teacher_loss": 0.22169393301010132 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.4004390835762024, "learning_rate": 2.9523559538596383e-05, "loss": 0.2269, "step": 8586, "teacher_loss": 0.20760062336921692 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.6449419260025024, "learning_rate": 2.952299148460637e-05, "loss": 0.2802, "step": 8587, "teacher_loss": 0.2396230399608612 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.1920221447944641, "learning_rate": 2.9522423097647696e-05, "loss": 0.2104, "step": 8588, "teacher_loss": 0.21244044601917267 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.36759212613105774, "learning_rate": 2.9521854377733408e-05, "loss": 0.1968, "step": 8589, "teacher_loss": 0.17787227034568787 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.43617868423461914, "learning_rate": 2.952128532487654e-05, "loss": 0.2641, "step": 8590, "teacher_loss": 0.24499253928661346 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.5856689214706421, "learning_rate": 2.9520715939090132e-05, "loss": 0.355, "step": 8591, "teacher_loss": 0.329356849193573 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.2470080554485321, "learning_rate": 2.9520146220387244e-05, "loss": 0.2621, "step": 8592, "teacher_loss": 0.2637979984283447 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.3483297824859619, "learning_rate": 2.9519576168780936e-05, "loss": 0.183, "step": 8593, "teacher_loss": 0.1646513044834137 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.6476221680641174, "learning_rate": 2.951900578428428e-05, "loss": 0.3424, "step": 8594, "teacher_loss": 0.3084837794303894 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.4343809485435486, "learning_rate": 2.951843506691035e-05, "loss": 0.2517, "step": 8595, "teacher_loss": 0.23135775327682495 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.4594653844833374, "learning_rate": 2.9517864016672233e-05, "loss": 0.2655, "step": 8596, "teacher_loss": 0.24399425089359283 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.7300125360488892, "learning_rate": 2.951729263358302e-05, "loss": 0.3612, "step": 8597, "teacher_loss": 0.3201755881309509 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.5028871297836304, "learning_rate": 2.9516720917655813e-05, "loss": 0.3218, "step": 8598, "teacher_loss": 0.3016747832298279 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.36113494634628296, "learning_rate": 2.951614886890372e-05, "loss": 0.2666, "step": 8599, "teacher_loss": 0.2560886740684509 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.12891221046447754, "learning_rate": 2.9515576487339854e-05, "loss": 0.1842, "step": 8600, "teacher_loss": 0.19030898809432983 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.8552432060241699, "learning_rate": 2.9515003772977337e-05, "loss": 0.3132, "step": 8601, "teacher_loss": 0.25296127796173096 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.4133296012878418, "learning_rate": 2.9514430725829304e-05, "loss": 0.2334, "step": 8602, "teacher_loss": 0.2134297788143158 }, { "compression_loss": 0.0, "epoch": 1.55, "label_loss": 0.5385497808456421, "learning_rate": 2.951385734590889e-05, "loss": 0.3226, "step": 8603, "teacher_loss": 0.29856711626052856 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.35052624344825745, "learning_rate": 2.9513283633229245e-05, "loss": 0.2253, "step": 8604, "teacher_loss": 0.2113742083311081 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.7931466102600098, "learning_rate": 2.9512709587803515e-05, "loss": 0.4882, "step": 8605, "teacher_loss": 0.4543372392654419 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.7294799089431763, "learning_rate": 2.9512135209644867e-05, "loss": 0.4756, "step": 8606, "teacher_loss": 0.4474000334739685 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.3064729571342468, "learning_rate": 2.951156049876647e-05, "loss": 0.2051, "step": 8607, "teacher_loss": 0.19379834830760956 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.3581099510192871, "learning_rate": 2.9510985455181497e-05, "loss": 0.292, "step": 8608, "teacher_loss": 0.28469568490982056 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.5751882195472717, "learning_rate": 2.9510410078903134e-05, "loss": 0.3178, "step": 8609, "teacher_loss": 0.28923600912094116 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.7669243812561035, "learning_rate": 2.950983436994457e-05, "loss": 0.4255, "step": 8610, "teacher_loss": 0.38756227493286133 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.38386228680610657, "learning_rate": 2.950925832831901e-05, "loss": 0.2894, "step": 8611, "teacher_loss": 0.27894073724746704 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.7692005634307861, "learning_rate": 2.950868195403966e-05, "loss": 0.3097, "step": 8612, "teacher_loss": 0.25859326124191284 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.5795905590057373, "learning_rate": 2.9508105247119728e-05, "loss": 0.2738, "step": 8613, "teacher_loss": 0.23978012800216675 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.3799271285533905, "learning_rate": 2.950752820757244e-05, "loss": 0.2276, "step": 8614, "teacher_loss": 0.21062520146369934 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.4844506084918976, "learning_rate": 2.950695083541103e-05, "loss": 0.2722, "step": 8615, "teacher_loss": 0.2486034631729126 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.4205459654331207, "learning_rate": 2.9506373130648725e-05, "loss": 0.1791, "step": 8616, "teacher_loss": 0.15226785838603973 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.5086053013801575, "learning_rate": 2.950579509329878e-05, "loss": 0.2174, "step": 8617, "teacher_loss": 0.18498878180980682 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.41754621267318726, "learning_rate": 2.9505216723374442e-05, "loss": 0.4361, "step": 8618, "teacher_loss": 0.4382110834121704 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.6171455383300781, "learning_rate": 2.9504638020888977e-05, "loss": 0.2053, "step": 8619, "teacher_loss": 0.15951156616210938 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.33646854758262634, "learning_rate": 2.950405898585565e-05, "loss": 0.2962, "step": 8620, "teacher_loss": 0.29176414012908936 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.41980230808258057, "learning_rate": 2.950347961828773e-05, "loss": 0.3017, "step": 8621, "teacher_loss": 0.2885623276233673 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.37618187069892883, "learning_rate": 2.950289991819851e-05, "loss": 0.2436, "step": 8622, "teacher_loss": 0.22891265153884888 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.9587826728820801, "learning_rate": 2.9502319885601277e-05, "loss": 0.4785, "step": 8623, "teacher_loss": 0.42515599727630615 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.458151638507843, "learning_rate": 2.9501739520509328e-05, "loss": 0.2018, "step": 8624, "teacher_loss": 0.17326241731643677 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.767421543598175, "learning_rate": 2.950115882293597e-05, "loss": 0.4138, "step": 8625, "teacher_loss": 0.37455683946609497 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.7027378082275391, "learning_rate": 2.950057779289452e-05, "loss": 0.2745, "step": 8626, "teacher_loss": 0.2269049882888794 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.5277151465415955, "learning_rate": 2.9499996430398296e-05, "loss": 0.185, "step": 8627, "teacher_loss": 0.14696623384952545 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.732877790927887, "learning_rate": 2.9499414735460625e-05, "loss": 0.7453, "step": 8628, "teacher_loss": 0.7467131614685059 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.6541773080825806, "learning_rate": 2.9498832708094845e-05, "loss": 0.3327, "step": 8629, "teacher_loss": 0.29699188470840454 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.3356805145740509, "learning_rate": 2.9498250348314302e-05, "loss": 0.1882, "step": 8630, "teacher_loss": 0.17180192470550537 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.5871535539627075, "learning_rate": 2.9497667656132345e-05, "loss": 0.2289, "step": 8631, "teacher_loss": 0.18904179334640503 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.33741694688796997, "learning_rate": 2.9497084631562333e-05, "loss": 0.2673, "step": 8632, "teacher_loss": 0.25950896739959717 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.2990621030330658, "learning_rate": 2.949650127461764e-05, "loss": 0.2196, "step": 8633, "teacher_loss": 0.21081838011741638 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.34680747985839844, "learning_rate": 2.9495917585311635e-05, "loss": 0.1839, "step": 8634, "teacher_loss": 0.1658465564250946 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.2762005925178528, "learning_rate": 2.9495333563657698e-05, "loss": 0.2558, "step": 8635, "teacher_loss": 0.253488153219223 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.29490700364112854, "learning_rate": 2.949474920966922e-05, "loss": 0.2907, "step": 8636, "teacher_loss": 0.29023826122283936 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.24283906817436218, "learning_rate": 2.9494164523359606e-05, "loss": 0.3005, "step": 8637, "teacher_loss": 0.3069021999835968 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.2504567503929138, "learning_rate": 2.949357950474225e-05, "loss": 0.3505, "step": 8638, "teacher_loss": 0.3616335093975067 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.3886566162109375, "learning_rate": 2.9492994153830576e-05, "loss": 0.3128, "step": 8639, "teacher_loss": 0.3043721616268158 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.5396826267242432, "learning_rate": 2.949240847063799e-05, "loss": 0.3215, "step": 8640, "teacher_loss": 0.2972286343574524 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.7378908395767212, "learning_rate": 2.9491822455177936e-05, "loss": 0.3676, "step": 8641, "teacher_loss": 0.3265003263950348 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.46372678875923157, "learning_rate": 2.9491236107463837e-05, "loss": 0.3135, "step": 8642, "teacher_loss": 0.29681527614593506 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.6142773628234863, "learning_rate": 2.9490649427509142e-05, "loss": 0.3107, "step": 8643, "teacher_loss": 0.27697986364364624 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.15381619334220886, "learning_rate": 2.9490062415327305e-05, "loss": 0.2601, "step": 8644, "teacher_loss": 0.27186357975006104 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.2013280689716339, "learning_rate": 2.9489475070931777e-05, "loss": 0.1788, "step": 8645, "teacher_loss": 0.17626135051250458 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.21912944316864014, "learning_rate": 2.9488887394336025e-05, "loss": 0.2175, "step": 8646, "teacher_loss": 0.21733978390693665 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.31355148553848267, "learning_rate": 2.9488299385553525e-05, "loss": 0.2795, "step": 8647, "teacher_loss": 0.2756844758987427 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.504398763179779, "learning_rate": 2.9487711044597764e-05, "loss": 0.3061, "step": 8648, "teacher_loss": 0.28407424688339233 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.2495500147342682, "learning_rate": 2.9487122371482218e-05, "loss": 0.2214, "step": 8649, "teacher_loss": 0.21832308173179626 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.41466641426086426, "learning_rate": 2.9486533366220394e-05, "loss": 0.2543, "step": 8650, "teacher_loss": 0.236490860581398 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.5795063972473145, "learning_rate": 2.9485944028825794e-05, "loss": 0.2645, "step": 8651, "teacher_loss": 0.22944766283035278 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.48607945442199707, "learning_rate": 2.9485354359311927e-05, "loss": 0.2294, "step": 8652, "teacher_loss": 0.20084503293037415 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.5190906524658203, "learning_rate": 2.9484764357692318e-05, "loss": 0.3855, "step": 8653, "teacher_loss": 0.3707018196582794 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.235230952501297, "learning_rate": 2.9484174023980482e-05, "loss": 0.233, "step": 8654, "teacher_loss": 0.23275862634181976 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.25842565298080444, "learning_rate": 2.948358335818997e-05, "loss": 0.1882, "step": 8655, "teacher_loss": 0.18038642406463623 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.39177829027175903, "learning_rate": 2.948299236033431e-05, "loss": 0.2503, "step": 8656, "teacher_loss": 0.2345782071352005 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.3635066747665405, "learning_rate": 2.9482401030427057e-05, "loss": 0.2271, "step": 8657, "teacher_loss": 0.21193882822990417 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.953407883644104, "learning_rate": 2.9481809368481772e-05, "loss": 0.4145, "step": 8658, "teacher_loss": 0.3545989990234375 }, { "compression_loss": 0.0, "epoch": 1.56, "label_loss": 0.44426149129867554, "learning_rate": 2.9481217374512014e-05, "loss": 0.2011, "step": 8659, "teacher_loss": 0.17405922710895538 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.7031523585319519, "learning_rate": 2.948062504853136e-05, "loss": 0.4002, "step": 8660, "teacher_loss": 0.3665161728858948 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.8205397725105286, "learning_rate": 2.948003239055339e-05, "loss": 0.3741, "step": 8661, "teacher_loss": 0.3244902491569519 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.2737279534339905, "learning_rate": 2.947943940059169e-05, "loss": 0.2798, "step": 8662, "teacher_loss": 0.2804777920246124 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.515887975692749, "learning_rate": 2.9478846078659856e-05, "loss": 0.3231, "step": 8663, "teacher_loss": 0.301708847284317 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.7884516716003418, "learning_rate": 2.947825242477149e-05, "loss": 0.4601, "step": 8664, "teacher_loss": 0.42365455627441406 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.6113412380218506, "learning_rate": 2.9477658438940204e-05, "loss": 0.3117, "step": 8665, "teacher_loss": 0.2784503102302551 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.3069119453430176, "learning_rate": 2.9477064121179618e-05, "loss": 0.2648, "step": 8666, "teacher_loss": 0.26009976863861084 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.7723685503005981, "learning_rate": 2.9476469471503357e-05, "loss": 0.3094, "step": 8667, "teacher_loss": 0.25794416666030884 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.390103280544281, "learning_rate": 2.9475874489925052e-05, "loss": 0.2345, "step": 8668, "teacher_loss": 0.21724724769592285 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.3343648314476013, "learning_rate": 2.9475279176458343e-05, "loss": 0.2503, "step": 8669, "teacher_loss": 0.24100151658058167 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.27346912026405334, "learning_rate": 2.9474683531116886e-05, "loss": 0.2282, "step": 8670, "teacher_loss": 0.22317636013031006 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.43901297450065613, "learning_rate": 2.947408755391433e-05, "loss": 0.3392, "step": 8671, "teacher_loss": 0.32808756828308105 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.4289487600326538, "learning_rate": 2.9473491244864347e-05, "loss": 0.2616, "step": 8672, "teacher_loss": 0.2430020123720169 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.695624053478241, "learning_rate": 2.94728946039806e-05, "loss": 0.4484, "step": 8673, "teacher_loss": 0.420968234539032 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.2031000256538391, "learning_rate": 2.947229763127677e-05, "loss": 0.2004, "step": 8674, "teacher_loss": 0.2000676393508911 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.47121721506118774, "learning_rate": 2.947170032676655e-05, "loss": 0.3252, "step": 8675, "teacher_loss": 0.30897989869117737 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 1.1334383487701416, "learning_rate": 2.9471102690463625e-05, "loss": 0.8145, "step": 8676, "teacher_loss": 0.7790335416793823 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.8293523788452148, "learning_rate": 2.94705047223817e-05, "loss": 0.3816, "step": 8677, "teacher_loss": 0.33182045817375183 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.509623110294342, "learning_rate": 2.9469906422534495e-05, "loss": 0.3289, "step": 8678, "teacher_loss": 0.3087852895259857 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.7502414584159851, "learning_rate": 2.9469307790935712e-05, "loss": 0.3968, "step": 8679, "teacher_loss": 0.35755759477615356 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.4372464120388031, "learning_rate": 2.9468708827599087e-05, "loss": 0.2328, "step": 8680, "teacher_loss": 0.21011799573898315 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.7151311635971069, "learning_rate": 2.9468109532538346e-05, "loss": 0.2722, "step": 8681, "teacher_loss": 0.2229982167482376 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.6397551894187927, "learning_rate": 2.9467509905767228e-05, "loss": 0.2399, "step": 8682, "teacher_loss": 0.19542112946510315 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 1.0269132852554321, "learning_rate": 2.946690994729949e-05, "loss": 1.0594, "step": 8683, "teacher_loss": 1.06304931640625 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.49443960189819336, "learning_rate": 2.9466309657148876e-05, "loss": 0.3232, "step": 8684, "teacher_loss": 0.3041972517967224 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.48493123054504395, "learning_rate": 2.9465709035329157e-05, "loss": 0.4258, "step": 8685, "teacher_loss": 0.41920238733291626 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.37493348121643066, "learning_rate": 2.9465108081854098e-05, "loss": 0.2559, "step": 8686, "teacher_loss": 0.24265140295028687 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.22569403052330017, "learning_rate": 2.9464506796737478e-05, "loss": 0.2537, "step": 8687, "teacher_loss": 0.2567977011203766 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.6183544397354126, "learning_rate": 2.9463905179993086e-05, "loss": 0.2689, "step": 8688, "teacher_loss": 0.23011058568954468 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.6737602353096008, "learning_rate": 2.9463303231634717e-05, "loss": 0.4056, "step": 8689, "teacher_loss": 0.37575221061706543 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.27392086386680603, "learning_rate": 2.946270095167616e-05, "loss": 0.2009, "step": 8690, "teacher_loss": 0.1928154081106186 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.43287795782089233, "learning_rate": 2.9462098340131238e-05, "loss": 0.194, "step": 8691, "teacher_loss": 0.16751116514205933 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.674911618232727, "learning_rate": 2.9461495397013757e-05, "loss": 0.3927, "step": 8692, "teacher_loss": 0.36130642890930176 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 1.1586272716522217, "learning_rate": 2.946089212233755e-05, "loss": 0.9276, "step": 8693, "teacher_loss": 0.9018793702125549 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.5047844052314758, "learning_rate": 2.9460288516116437e-05, "loss": 0.2565, "step": 8694, "teacher_loss": 0.22893774509429932 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.9601619839668274, "learning_rate": 2.9459684578364262e-05, "loss": 0.811, "step": 8695, "teacher_loss": 0.7943848371505737 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.16960570216178894, "learning_rate": 2.9459080309094878e-05, "loss": 0.1898, "step": 8696, "teacher_loss": 0.19198976457118988 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.361625075340271, "learning_rate": 2.9458475708322128e-05, "loss": 0.2546, "step": 8697, "teacher_loss": 0.24269789457321167 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.3513258099555969, "learning_rate": 2.945787077605988e-05, "loss": 0.2637, "step": 8698, "teacher_loss": 0.2539609968662262 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.3751927614212036, "learning_rate": 2.9457265512322004e-05, "loss": 0.2585, "step": 8699, "teacher_loss": 0.2455640733242035 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.17260484397411346, "learning_rate": 2.9456659917122372e-05, "loss": 0.1817, "step": 8700, "teacher_loss": 0.1826876401901245 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.33278852701187134, "learning_rate": 2.9456053990474872e-05, "loss": 0.2075, "step": 8701, "teacher_loss": 0.19360914826393127 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.18315088748931885, "learning_rate": 2.9455447732393395e-05, "loss": 0.2115, "step": 8702, "teacher_loss": 0.21463073790073395 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.6013479232788086, "learning_rate": 2.9454841142891846e-05, "loss": 0.2516, "step": 8703, "teacher_loss": 0.21268539130687714 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.2560952603816986, "learning_rate": 2.9454234221984123e-05, "loss": 0.1683, "step": 8704, "teacher_loss": 0.15859296917915344 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.4482208788394928, "learning_rate": 2.9453626969684145e-05, "loss": 0.2242, "step": 8705, "teacher_loss": 0.1993495523929596 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.5013689994812012, "learning_rate": 2.9453019386005836e-05, "loss": 0.2748, "step": 8706, "teacher_loss": 0.24963030219078064 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.7967034578323364, "learning_rate": 2.9452411470963122e-05, "loss": 0.6523, "step": 8707, "teacher_loss": 0.6362035274505615 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.43818965554237366, "learning_rate": 2.9451803224569952e-05, "loss": 0.2722, "step": 8708, "teacher_loss": 0.2537902295589447 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.26041287183761597, "learning_rate": 2.9451194646840255e-05, "loss": 0.1915, "step": 8709, "teacher_loss": 0.1838574856519699 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.8815737962722778, "learning_rate": 2.9450585737787996e-05, "loss": 0.3609, "step": 8710, "teacher_loss": 0.30306780338287354 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 1.0208110809326172, "learning_rate": 2.9449976497427127e-05, "loss": 0.5073, "step": 8711, "teacher_loss": 0.45028334856033325 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.5190050601959229, "learning_rate": 2.9449366925771622e-05, "loss": 0.3359, "step": 8712, "teacher_loss": 0.31552836298942566 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.501338541507721, "learning_rate": 2.9448757022835453e-05, "loss": 0.2748, "step": 8713, "teacher_loss": 0.24967384338378906 }, { "compression_loss": 0.0, "epoch": 1.57, "label_loss": 0.5927071571350098, "learning_rate": 2.9448146788632607e-05, "loss": 0.2387, "step": 8714, "teacher_loss": 0.19934552907943726 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.4255698025226593, "learning_rate": 2.9447536223177072e-05, "loss": 0.3575, "step": 8715, "teacher_loss": 0.34990426898002625 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.376946359872818, "learning_rate": 2.944692532648285e-05, "loss": 0.3571, "step": 8716, "teacher_loss": 0.35493841767311096 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.22233924269676208, "learning_rate": 2.944631409856394e-05, "loss": 0.2113, "step": 8717, "teacher_loss": 0.210092693567276 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.2584851384162903, "learning_rate": 2.9445702539434363e-05, "loss": 0.3611, "step": 8718, "teacher_loss": 0.3724518418312073 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 1.0913103818893433, "learning_rate": 2.9445090649108137e-05, "loss": 0.3637, "step": 8719, "teacher_loss": 0.2828175127506256 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.6493569612503052, "learning_rate": 2.9444478427599293e-05, "loss": 0.5667, "step": 8720, "teacher_loss": 0.5574923753738403 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.42301419377326965, "learning_rate": 2.9443865874921865e-05, "loss": 0.3224, "step": 8721, "teacher_loss": 0.3112179636955261 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.6200509071350098, "learning_rate": 2.9443252991089892e-05, "loss": 0.3029, "step": 8722, "teacher_loss": 0.26768988370895386 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.38496601581573486, "learning_rate": 2.9442639776117436e-05, "loss": 0.314, "step": 8723, "teacher_loss": 0.30615168809890747 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.489969402551651, "learning_rate": 2.9442026230018554e-05, "loss": 0.3517, "step": 8724, "teacher_loss": 0.3362851142883301 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.26782041788101196, "learning_rate": 2.9441412352807304e-05, "loss": 0.2607, "step": 8725, "teacher_loss": 0.2599547207355499 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.5229651927947998, "learning_rate": 2.944079814449777e-05, "loss": 0.2216, "step": 8726, "teacher_loss": 0.18807940185070038 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.617690920829773, "learning_rate": 2.9440183605104027e-05, "loss": 0.4033, "step": 8727, "teacher_loss": 0.3794995844364166 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.5342797636985779, "learning_rate": 2.943956873464017e-05, "loss": 0.3394, "step": 8728, "teacher_loss": 0.3177253007888794 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.37761110067367554, "learning_rate": 2.9438953533120293e-05, "loss": 0.2774, "step": 8729, "teacher_loss": 0.266218900680542 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.35491907596588135, "learning_rate": 2.9438338000558503e-05, "loss": 0.2331, "step": 8730, "teacher_loss": 0.2195407748222351 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.3313908874988556, "learning_rate": 2.943772213696891e-05, "loss": 0.3137, "step": 8731, "teacher_loss": 0.3117339313030243 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.5769060254096985, "learning_rate": 2.943710594236563e-05, "loss": 0.3104, "step": 8732, "teacher_loss": 0.2808319330215454 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.523478090763092, "learning_rate": 2.9436489416762807e-05, "loss": 0.2641, "step": 8733, "teacher_loss": 0.2352854609489441 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 1.2077131271362305, "learning_rate": 2.9435872560174557e-05, "loss": 0.8056, "step": 8734, "teacher_loss": 0.7609001398086548 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.21983544528484344, "learning_rate": 2.943525537261503e-05, "loss": 0.2328, "step": 8735, "teacher_loss": 0.23427045345306396 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.3821295499801636, "learning_rate": 2.9434637854098377e-05, "loss": 0.2829, "step": 8736, "teacher_loss": 0.27187561988830566 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.41544967889785767, "learning_rate": 2.9434020004638757e-05, "loss": 0.1965, "step": 8737, "teacher_loss": 0.17214302718639374 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.6012518405914307, "learning_rate": 2.9433401824250334e-05, "loss": 0.2176, "step": 8738, "teacher_loss": 0.1749608814716339 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.3901960849761963, "learning_rate": 2.943278331294728e-05, "loss": 0.2576, "step": 8739, "teacher_loss": 0.24285341799259186 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.3115026652812958, "learning_rate": 2.9432164470743776e-05, "loss": 0.1959, "step": 8740, "teacher_loss": 0.1830420047044754 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.10500791668891907, "learning_rate": 2.943154529765401e-05, "loss": 0.2311, "step": 8741, "teacher_loss": 0.24511590600013733 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.3917456865310669, "learning_rate": 2.9430925793692177e-05, "loss": 0.1838, "step": 8742, "teacher_loss": 0.16064569354057312 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.3375360667705536, "learning_rate": 2.9430305958872483e-05, "loss": 0.2094, "step": 8743, "teacher_loss": 0.19510847330093384 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.7353867292404175, "learning_rate": 2.942968579320914e-05, "loss": 0.2872, "step": 8744, "teacher_loss": 0.23743407428264618 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.45641446113586426, "learning_rate": 2.9429065296716363e-05, "loss": 0.2682, "step": 8745, "teacher_loss": 0.2472875863313675 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.5614002346992493, "learning_rate": 2.9428444469408375e-05, "loss": 0.2358, "step": 8746, "teacher_loss": 0.1995747834444046 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.17120251059532166, "learning_rate": 2.942782331129942e-05, "loss": 0.2027, "step": 8747, "teacher_loss": 0.2061944305896759 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.28657540678977966, "learning_rate": 2.9427201822403734e-05, "loss": 0.2689, "step": 8748, "teacher_loss": 0.26695016026496887 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.26850274205207825, "learning_rate": 2.942658000273556e-05, "loss": 0.2922, "step": 8749, "teacher_loss": 0.2947887182235718 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.40771299600601196, "learning_rate": 2.942595785230916e-05, "loss": 0.2184, "step": 8750, "teacher_loss": 0.19737279415130615 }, { "epoch": 1.58, "eval_exact_match": 79.29990539262063, "eval_f1": 86.90499520309895, "step": 8750 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.6159347295761108, "learning_rate": 2.9425335371138802e-05, "loss": 0.2751, "step": 8751, "teacher_loss": 0.23721975088119507 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.41514527797698975, "learning_rate": 2.942471255923875e-05, "loss": 0.2699, "step": 8752, "teacher_loss": 0.2537892460823059 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.4908739924430847, "learning_rate": 2.942408941662329e-05, "loss": 0.2353, "step": 8753, "teacher_loss": 0.20689064264297485 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.483184814453125, "learning_rate": 2.9423465943306703e-05, "loss": 0.2318, "step": 8754, "teacher_loss": 0.2039155662059784 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.6774777770042419, "learning_rate": 2.9422842139303283e-05, "loss": 0.223, "step": 8755, "teacher_loss": 0.172515869140625 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.29104936122894287, "learning_rate": 2.9422218004627338e-05, "loss": 0.2397, "step": 8756, "teacher_loss": 0.23404385149478912 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.4680928885936737, "learning_rate": 2.9421593539293173e-05, "loss": 0.2057, "step": 8757, "teacher_loss": 0.1765434741973877 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.4866985082626343, "learning_rate": 2.942096874331511e-05, "loss": 0.2151, "step": 8758, "teacher_loss": 0.18488512933254242 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.6421576738357544, "learning_rate": 2.9420343616707465e-05, "loss": 0.3877, "step": 8759, "teacher_loss": 0.3594168424606323 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.41616418957710266, "learning_rate": 2.941971815948458e-05, "loss": 0.259, "step": 8760, "teacher_loss": 0.2415849268436432 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.6059359312057495, "learning_rate": 2.9419092371660784e-05, "loss": 0.5688, "step": 8761, "teacher_loss": 0.5646283626556396 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.6384348273277283, "learning_rate": 2.9418466253250434e-05, "loss": 0.3895, "step": 8762, "teacher_loss": 0.36187058687210083 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.13635368645191193, "learning_rate": 2.941783980426788e-05, "loss": 0.1427, "step": 8763, "teacher_loss": 0.14343036711215973 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.5273498296737671, "learning_rate": 2.941721302472749e-05, "loss": 0.4556, "step": 8764, "teacher_loss": 0.4476405382156372 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.3698505163192749, "learning_rate": 2.9416585914643627e-05, "loss": 0.2754, "step": 8765, "teacher_loss": 0.26494157314300537 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.23717889189720154, "learning_rate": 2.9415958474030675e-05, "loss": 0.2968, "step": 8766, "teacher_loss": 0.303442120552063 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.21350878477096558, "learning_rate": 2.9415330702903015e-05, "loss": 0.1652, "step": 8767, "teacher_loss": 0.15985023975372314 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.3323150873184204, "learning_rate": 2.941470260127504e-05, "loss": 0.2343, "step": 8768, "teacher_loss": 0.22344408929347992 }, { "compression_loss": 0.0, "epoch": 1.58, "label_loss": 0.29647210240364075, "learning_rate": 2.9414074169161152e-05, "loss": 0.2225, "step": 8769, "teacher_loss": 0.21433106064796448 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.378919780254364, "learning_rate": 2.9413445406575762e-05, "loss": 0.3768, "step": 8770, "teacher_loss": 0.37657803297042847 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.17018666863441467, "learning_rate": 2.941281631353328e-05, "loss": 0.1868, "step": 8771, "teacher_loss": 0.18868331611156464 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.21196800470352173, "learning_rate": 2.941218689004813e-05, "loss": 0.3198, "step": 8772, "teacher_loss": 0.33178332448005676 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.5520363450050354, "learning_rate": 2.941155713613475e-05, "loss": 0.338, "step": 8773, "teacher_loss": 0.3142227232456207 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.31486591696739197, "learning_rate": 2.9410927051807568e-05, "loss": 0.3105, "step": 8774, "teacher_loss": 0.3100135326385498 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.45442333817481995, "learning_rate": 2.9410296637081036e-05, "loss": 0.2527, "step": 8775, "teacher_loss": 0.23032473027706146 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.8308318257331848, "learning_rate": 2.9409665891969612e-05, "loss": 0.32, "step": 8776, "teacher_loss": 0.26328548789024353 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.7619255781173706, "learning_rate": 2.9409034816487745e-05, "loss": 0.3055, "step": 8777, "teacher_loss": 0.25475484132766724 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.46176600456237793, "learning_rate": 2.940840341064991e-05, "loss": 0.2804, "step": 8778, "teacher_loss": 0.2602023780345917 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.29514408111572266, "learning_rate": 2.9407771674470585e-05, "loss": 0.2513, "step": 8779, "teacher_loss": 0.24644513428211212 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.38248908519744873, "learning_rate": 2.9407139607964256e-05, "loss": 0.2957, "step": 8780, "teacher_loss": 0.28605979681015015 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.29497385025024414, "learning_rate": 2.9406507211145405e-05, "loss": 0.2376, "step": 8781, "teacher_loss": 0.23122447729110718 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.3572332262992859, "learning_rate": 2.9405874484028536e-05, "loss": 0.3327, "step": 8782, "teacher_loss": 0.3300093412399292 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.3291904330253601, "learning_rate": 2.940524142662816e-05, "loss": 0.2485, "step": 8783, "teacher_loss": 0.23957999050617218 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.8445284366607666, "learning_rate": 2.940460803895879e-05, "loss": 0.362, "step": 8784, "teacher_loss": 0.3083879351615906 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.946256160736084, "learning_rate": 2.9403974321034937e-05, "loss": 0.5835, "step": 8785, "teacher_loss": 0.5431619882583618 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.40027478337287903, "learning_rate": 2.9403340272871142e-05, "loss": 0.305, "step": 8786, "teacher_loss": 0.2943701148033142 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.4661625921726227, "learning_rate": 2.940270589448194e-05, "loss": 0.2734, "step": 8787, "teacher_loss": 0.2520264685153961 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.8481431603431702, "learning_rate": 2.940207118588187e-05, "loss": 0.5574, "step": 8788, "teacher_loss": 0.5250433683395386 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.4612758159637451, "learning_rate": 2.940143614708549e-05, "loss": 0.2386, "step": 8789, "teacher_loss": 0.21381625533103943 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.6391294002532959, "learning_rate": 2.9400800778107357e-05, "loss": 0.3985, "step": 8790, "teacher_loss": 0.3717923164367676 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.5397456288337708, "learning_rate": 2.9400165078962035e-05, "loss": 0.2853, "step": 8791, "teacher_loss": 0.2570365071296692 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.44662588834762573, "learning_rate": 2.9399529049664104e-05, "loss": 0.3602, "step": 8792, "teacher_loss": 0.3506399095058441 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.2950704097747803, "learning_rate": 2.9398892690228147e-05, "loss": 0.1975, "step": 8793, "teacher_loss": 0.1866893470287323 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.2504015564918518, "learning_rate": 2.9398256000668745e-05, "loss": 0.1922, "step": 8794, "teacher_loss": 0.1857699751853943 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.30887988209724426, "learning_rate": 2.9397618981000502e-05, "loss": 0.2637, "step": 8795, "teacher_loss": 0.25866103172302246 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.12059152126312256, "learning_rate": 2.9396981631238024e-05, "loss": 0.1736, "step": 8796, "teacher_loss": 0.179483100771904 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.6224748492240906, "learning_rate": 2.9396343951395923e-05, "loss": 0.3494, "step": 8797, "teacher_loss": 0.3190913200378418 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.13256776332855225, "learning_rate": 2.9395705941488814e-05, "loss": 0.1653, "step": 8798, "teacher_loss": 0.1689566671848297 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.35983216762542725, "learning_rate": 2.939506760153133e-05, "loss": 0.2239, "step": 8799, "teacher_loss": 0.20875108242034912 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.4718207120895386, "learning_rate": 2.93944289315381e-05, "loss": 0.4454, "step": 8800, "teacher_loss": 0.44250959157943726 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.21875980496406555, "learning_rate": 2.939378993152378e-05, "loss": 0.2277, "step": 8801, "teacher_loss": 0.22866126894950867 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.6279621124267578, "learning_rate": 2.939315060150301e-05, "loss": 0.3313, "step": 8802, "teacher_loss": 0.2983255088329315 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.21201640367507935, "learning_rate": 2.9392510941490444e-05, "loss": 0.1893, "step": 8803, "teacher_loss": 0.1867557317018509 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.26410508155822754, "learning_rate": 2.9391870951500757e-05, "loss": 0.2153, "step": 8804, "teacher_loss": 0.20991788804531097 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.326290100812912, "learning_rate": 2.939123063154862e-05, "loss": 0.2197, "step": 8805, "teacher_loss": 0.2078983634710312 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.332580029964447, "learning_rate": 2.9390589981648708e-05, "loss": 0.3575, "step": 8806, "teacher_loss": 0.36025679111480713 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.527552604675293, "learning_rate": 2.9389949001815712e-05, "loss": 0.1968, "step": 8807, "teacher_loss": 0.16002824902534485 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.5263674259185791, "learning_rate": 2.938930769206433e-05, "loss": 0.3668, "step": 8808, "teacher_loss": 0.3490758538246155 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.3165118098258972, "learning_rate": 2.9388666052409268e-05, "loss": 0.1799, "step": 8809, "teacher_loss": 0.16470378637313843 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.31869256496429443, "learning_rate": 2.938802408286523e-05, "loss": 0.2042, "step": 8810, "teacher_loss": 0.19147758185863495 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.4055965542793274, "learning_rate": 2.9387381783446937e-05, "loss": 0.2503, "step": 8811, "teacher_loss": 0.23299652338027954 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.5883247256278992, "learning_rate": 2.9386739154169115e-05, "loss": 0.2993, "step": 8812, "teacher_loss": 0.267169713973999 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.621792733669281, "learning_rate": 2.9386096195046496e-05, "loss": 0.3092, "step": 8813, "teacher_loss": 0.2744479179382324 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.17292936146259308, "learning_rate": 2.9385452906093828e-05, "loss": 0.2174, "step": 8814, "teacher_loss": 0.2223585844039917 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.7421229481697083, "learning_rate": 2.938480928732585e-05, "loss": 0.3258, "step": 8815, "teacher_loss": 0.2794947624206543 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.5212373733520508, "learning_rate": 2.938416533875733e-05, "loss": 0.3001, "step": 8816, "teacher_loss": 0.27547842264175415 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.44826480746269226, "learning_rate": 2.9383521060403012e-05, "loss": 0.2851, "step": 8817, "teacher_loss": 0.26698511838912964 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.4052944779396057, "learning_rate": 2.9382876452277688e-05, "loss": 0.2459, "step": 8818, "teacher_loss": 0.22823883593082428 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.3844138979911804, "learning_rate": 2.938223151439613e-05, "loss": 0.1753, "step": 8819, "teacher_loss": 0.1520141065120697 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.3801078796386719, "learning_rate": 2.9381586246773124e-05, "loss": 0.1691, "step": 8820, "teacher_loss": 0.14569216966629028 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.566926896572113, "learning_rate": 2.9380940649423462e-05, "loss": 0.2326, "step": 8821, "teacher_loss": 0.1954750120639801 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.11366164684295654, "learning_rate": 2.9380294722361943e-05, "loss": 0.1464, "step": 8822, "teacher_loss": 0.15003632009029388 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 1.1682119369506836, "learning_rate": 2.937964846560339e-05, "loss": 0.3679, "step": 8823, "teacher_loss": 0.27901265025138855 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 0.38171201944351196, "learning_rate": 2.93790018791626e-05, "loss": 0.3226, "step": 8824, "teacher_loss": 0.3160545825958252 }, { "compression_loss": 0.0, "epoch": 1.59, "label_loss": 1.2082462310791016, "learning_rate": 2.9378354963054412e-05, "loss": 0.5746, "step": 8825, "teacher_loss": 0.5041833519935608 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.3155685067176819, "learning_rate": 2.9377707717293657e-05, "loss": 0.2753, "step": 8826, "teacher_loss": 0.27086973190307617 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5268188118934631, "learning_rate": 2.9377060141895167e-05, "loss": 0.2459, "step": 8827, "teacher_loss": 0.2146688848733902 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.6934584379196167, "learning_rate": 2.9376412236873792e-05, "loss": 0.2195, "step": 8828, "teacher_loss": 0.16678529977798462 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5331879258155823, "learning_rate": 2.9375764002244386e-05, "loss": 0.3056, "step": 8829, "teacher_loss": 0.2802583575248718 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.23903435468673706, "learning_rate": 2.9375115438021815e-05, "loss": 0.2087, "step": 8830, "teacher_loss": 0.2053188532590866 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5347850322723389, "learning_rate": 2.9374466544220947e-05, "loss": 0.3201, "step": 8831, "teacher_loss": 0.29626569151878357 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.56703782081604, "learning_rate": 2.937381732085665e-05, "loss": 0.3143, "step": 8832, "teacher_loss": 0.2862128019332886 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.6218544244766235, "learning_rate": 2.9373167767943826e-05, "loss": 0.2971, "step": 8833, "teacher_loss": 0.26097577810287476 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.27498817443847656, "learning_rate": 2.9372517885497357e-05, "loss": 0.3458, "step": 8834, "teacher_loss": 0.353631854057312 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.3907712697982788, "learning_rate": 2.937186767353214e-05, "loss": 0.3253, "step": 8835, "teacher_loss": 0.31804704666137695 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.6646755337715149, "learning_rate": 2.9371217132063086e-05, "loss": 0.2978, "step": 8836, "teacher_loss": 0.2570681869983673 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.6205227375030518, "learning_rate": 2.9370566261105113e-05, "loss": 0.2011, "step": 8837, "teacher_loss": 0.1545419991016388 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.18541185557842255, "learning_rate": 2.936991506067314e-05, "loss": 0.2024, "step": 8838, "teacher_loss": 0.2042592465877533 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.8314276933670044, "learning_rate": 2.9369263530782096e-05, "loss": 0.4272, "step": 8839, "teacher_loss": 0.38223615288734436 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.32398173213005066, "learning_rate": 2.9368611671446923e-05, "loss": 0.2614, "step": 8840, "teacher_loss": 0.25439882278442383 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5677398443222046, "learning_rate": 2.9367959482682564e-05, "loss": 0.3202, "step": 8841, "teacher_loss": 0.29271644353866577 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.49968668818473816, "learning_rate": 2.9367306964503968e-05, "loss": 0.3073, "step": 8842, "teacher_loss": 0.28591781854629517 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.502788245677948, "learning_rate": 2.93666541169261e-05, "loss": 0.3987, "step": 8843, "teacher_loss": 0.38716983795166016 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.662237286567688, "learning_rate": 2.936600093996393e-05, "loss": 0.5754, "step": 8844, "teacher_loss": 0.5657215118408203 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.6181904077529907, "learning_rate": 2.936534743363243e-05, "loss": 0.3062, "step": 8845, "teacher_loss": 0.27153658866882324 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.8824832439422607, "learning_rate": 2.9364693597946583e-05, "loss": 0.3706, "step": 8846, "teacher_loss": 0.31376832723617554 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.8539354205131531, "learning_rate": 2.9364039432921374e-05, "loss": 0.3004, "step": 8847, "teacher_loss": 0.23885369300842285 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.583228349685669, "learning_rate": 2.936338493857181e-05, "loss": 0.2084, "step": 8848, "teacher_loss": 0.16680654883384705 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.41357293725013733, "learning_rate": 2.9362730114912892e-05, "loss": 0.243, "step": 8849, "teacher_loss": 0.2240189164876938 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.21565717458724976, "learning_rate": 2.9362074961959635e-05, "loss": 0.2025, "step": 8850, "teacher_loss": 0.2010529637336731 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.6318279504776001, "learning_rate": 2.936141947972706e-05, "loss": 0.2638, "step": 8851, "teacher_loss": 0.22289103269577026 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.4330080449581146, "learning_rate": 2.936076366823019e-05, "loss": 0.3472, "step": 8852, "teacher_loss": 0.33768126368522644 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.43628689646720886, "learning_rate": 2.936010752748407e-05, "loss": 0.3422, "step": 8853, "teacher_loss": 0.3317187428474426 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5459315776824951, "learning_rate": 2.9359451057503734e-05, "loss": 0.2907, "step": 8854, "teacher_loss": 0.262288361787796 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 1.1229536533355713, "learning_rate": 2.9358794258304237e-05, "loss": 0.3669, "step": 8855, "teacher_loss": 0.2828805446624756 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5496934056282043, "learning_rate": 2.9358137129900638e-05, "loss": 0.4022, "step": 8856, "teacher_loss": 0.38576555252075195 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.6932355761528015, "learning_rate": 2.9357479672308005e-05, "loss": 0.323, "step": 8857, "teacher_loss": 0.2818843126296997 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.22394317388534546, "learning_rate": 2.935682188554141e-05, "loss": 0.2305, "step": 8858, "teacher_loss": 0.23123431205749512 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.30514782667160034, "learning_rate": 2.935616376961593e-05, "loss": 0.2555, "step": 8859, "teacher_loss": 0.24992826581001282 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.8922151327133179, "learning_rate": 2.9355505324546658e-05, "loss": 0.3217, "step": 8860, "teacher_loss": 0.2582681179046631 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.8770116567611694, "learning_rate": 2.9354846550348685e-05, "loss": 0.7195, "step": 8861, "teacher_loss": 0.7020071744918823 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.823535680770874, "learning_rate": 2.9354187447037124e-05, "loss": 0.3124, "step": 8862, "teacher_loss": 0.25558415055274963 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.6106618046760559, "learning_rate": 2.935352801462708e-05, "loss": 0.2767, "step": 8863, "teacher_loss": 0.23955821990966797 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5355809330940247, "learning_rate": 2.9352868253133676e-05, "loss": 0.2556, "step": 8864, "teacher_loss": 0.22453457117080688 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5119600296020508, "learning_rate": 2.9352208162572028e-05, "loss": 0.3949, "step": 8865, "teacher_loss": 0.3818776309490204 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.24631869792938232, "learning_rate": 2.9351547742957284e-05, "loss": 0.2712, "step": 8866, "teacher_loss": 0.27391621470451355 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.1506839394569397, "learning_rate": 2.9350886994304572e-05, "loss": 0.2215, "step": 8867, "teacher_loss": 0.22936293482780457 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5472217202186584, "learning_rate": 2.935022591662905e-05, "loss": 0.2448, "step": 8868, "teacher_loss": 0.21124999225139618 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.23334720730781555, "learning_rate": 2.9349564509945875e-05, "loss": 0.3058, "step": 8869, "teacher_loss": 0.31389400362968445 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5410623550415039, "learning_rate": 2.9348902774270203e-05, "loss": 0.3162, "step": 8870, "teacher_loss": 0.29125505685806274 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.5699049830436707, "learning_rate": 2.934824070961721e-05, "loss": 0.3406, "step": 8871, "teacher_loss": 0.31508421897888184 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.38524335622787476, "learning_rate": 2.934757831600208e-05, "loss": 0.3268, "step": 8872, "teacher_loss": 0.3203461766242981 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.7102386951446533, "learning_rate": 2.934691559343999e-05, "loss": 0.3387, "step": 8873, "teacher_loss": 0.2974720895290375 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.4853948950767517, "learning_rate": 2.934625254194615e-05, "loss": 0.2747, "step": 8874, "teacher_loss": 0.2513313293457031 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.7584381699562073, "learning_rate": 2.9345589161535745e-05, "loss": 0.3005, "step": 8875, "teacher_loss": 0.24961210787296295 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.27399343252182007, "learning_rate": 2.9344925452223988e-05, "loss": 0.2716, "step": 8876, "teacher_loss": 0.2713875472545624 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.4122808277606964, "learning_rate": 2.9344261414026103e-05, "loss": 0.2756, "step": 8877, "teacher_loss": 0.26036953926086426 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.37050020694732666, "learning_rate": 2.9343597046957308e-05, "loss": 0.2512, "step": 8878, "teacher_loss": 0.2379709780216217 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.45558881759643555, "learning_rate": 2.934293235103284e-05, "loss": 0.2352, "step": 8879, "teacher_loss": 0.2106691598892212 }, { "compression_loss": 0.0, "epoch": 1.6, "label_loss": 0.357721209526062, "learning_rate": 2.934226732626793e-05, "loss": 0.2594, "step": 8880, "teacher_loss": 0.2484402060508728 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.38612765073776245, "learning_rate": 2.9341601972677833e-05, "loss": 0.2535, "step": 8881, "teacher_loss": 0.23880280554294586 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.1699083298444748, "learning_rate": 2.9340936290277802e-05, "loss": 0.1927, "step": 8882, "teacher_loss": 0.19521009922027588 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.7220228314399719, "learning_rate": 2.93402702790831e-05, "loss": 0.6361, "step": 8883, "teacher_loss": 0.6265135407447815 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.748738169670105, "learning_rate": 2.9339603939108994e-05, "loss": 0.4078, "step": 8884, "teacher_loss": 0.36989831924438477 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.512102484703064, "learning_rate": 2.933893727037076e-05, "loss": 0.3096, "step": 8885, "teacher_loss": 0.2870524823665619 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.18286707997322083, "learning_rate": 2.9338270272883686e-05, "loss": 0.1973, "step": 8886, "teacher_loss": 0.19886897504329681 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.5202264785766602, "learning_rate": 2.9337602946663064e-05, "loss": 0.26, "step": 8887, "teacher_loss": 0.23103934526443481 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.40252432227134705, "learning_rate": 2.933693529172419e-05, "loss": 0.2885, "step": 8888, "teacher_loss": 0.2757876217365265 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.5223032832145691, "learning_rate": 2.9336267308082375e-05, "loss": 0.2745, "step": 8889, "teacher_loss": 0.24700312316417694 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.4332813024520874, "learning_rate": 2.9335598995752934e-05, "loss": 0.1739, "step": 8890, "teacher_loss": 0.14508303999900818 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.7154121994972229, "learning_rate": 2.933493035475119e-05, "loss": 0.7203, "step": 8891, "teacher_loss": 0.7208747267723083 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.7543063759803772, "learning_rate": 2.9334261385092472e-05, "loss": 0.2971, "step": 8892, "teacher_loss": 0.24628636240959167 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.6075509190559387, "learning_rate": 2.9333592086792113e-05, "loss": 0.4389, "step": 8893, "teacher_loss": 0.4201599657535553 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.1301405131816864, "learning_rate": 2.933292245986546e-05, "loss": 0.2355, "step": 8894, "teacher_loss": 0.24723877012729645 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.446560263633728, "learning_rate": 2.9332252504327875e-05, "loss": 0.2835, "step": 8895, "teacher_loss": 0.26541048288345337 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.24469871819019318, "learning_rate": 2.9331582220194705e-05, "loss": 0.3367, "step": 8896, "teacher_loss": 0.346964955329895 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.24737313389778137, "learning_rate": 2.9330911607481324e-05, "loss": 0.2348, "step": 8897, "teacher_loss": 0.23338279128074646 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.497220516204834, "learning_rate": 2.9330240666203104e-05, "loss": 0.3217, "step": 8898, "teacher_loss": 0.3021583557128906 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.4921434819698334, "learning_rate": 2.932956939637543e-05, "loss": 0.3439, "step": 8899, "teacher_loss": 0.327475368976593 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.38183653354644775, "learning_rate": 2.9328897798013695e-05, "loss": 0.1723, "step": 8900, "teacher_loss": 0.1489918828010559 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.29097434878349304, "learning_rate": 2.932822587113329e-05, "loss": 0.1939, "step": 8901, "teacher_loss": 0.1830824315547943 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.26875928044319153, "learning_rate": 2.932755361574963e-05, "loss": 0.2025, "step": 8902, "teacher_loss": 0.19511710107326508 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.723259687423706, "learning_rate": 2.932688103187812e-05, "loss": 0.2979, "step": 8903, "teacher_loss": 0.25059446692466736 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.6912726759910583, "learning_rate": 2.932620811953418e-05, "loss": 0.3043, "step": 8904, "teacher_loss": 0.2613466680049896 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.2814003825187683, "learning_rate": 2.932553487873324e-05, "loss": 0.2199, "step": 8905, "teacher_loss": 0.21309661865234375 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.4984525144100189, "learning_rate": 2.9324861309490736e-05, "loss": 0.2367, "step": 8906, "teacher_loss": 0.2076694667339325 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.7079552412033081, "learning_rate": 2.932418741182211e-05, "loss": 0.3026, "step": 8907, "teacher_loss": 0.25761228799819946 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.30388885736465454, "learning_rate": 2.9323513185742813e-05, "loss": 0.2059, "step": 8908, "teacher_loss": 0.1949751079082489 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.3619818687438965, "learning_rate": 2.9322838631268303e-05, "loss": 0.1825, "step": 8909, "teacher_loss": 0.16257047653198242 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.2845340371131897, "learning_rate": 2.9322163748414044e-05, "loss": 0.2222, "step": 8910, "teacher_loss": 0.215323805809021 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.2997760474681854, "learning_rate": 2.9321488537195513e-05, "loss": 0.1715, "step": 8911, "teacher_loss": 0.15723916888237 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.6677510738372803, "learning_rate": 2.9320812997628184e-05, "loss": 0.3061, "step": 8912, "teacher_loss": 0.2659376859664917 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.5900148749351501, "learning_rate": 2.9320137129727553e-05, "loss": 0.3644, "step": 8913, "teacher_loss": 0.3392926752567291 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.7351362109184265, "learning_rate": 2.931946093350911e-05, "loss": 0.3577, "step": 8914, "teacher_loss": 0.31580817699432373 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.29090964794158936, "learning_rate": 2.931878440898836e-05, "loss": 0.1732, "step": 8915, "teacher_loss": 0.16013041138648987 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.2587795853614807, "learning_rate": 2.9318107556180812e-05, "loss": 0.218, "step": 8916, "teacher_loss": 0.21343928575515747 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.47493237257003784, "learning_rate": 2.9317430375101985e-05, "loss": 0.2281, "step": 8917, "teacher_loss": 0.20071488618850708 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.2877231538295746, "learning_rate": 2.931675286576741e-05, "loss": 0.1851, "step": 8918, "teacher_loss": 0.17369705438613892 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.3706057071685791, "learning_rate": 2.931607502819261e-05, "loss": 0.3822, "step": 8919, "teacher_loss": 0.3835172653198242 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.5336982011795044, "learning_rate": 2.9315396862393137e-05, "loss": 0.2459, "step": 8920, "teacher_loss": 0.21395309269428253 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 1.0213711261749268, "learning_rate": 2.931471836838453e-05, "loss": 0.447, "step": 8921, "teacher_loss": 0.3831454813480377 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.4956536293029785, "learning_rate": 2.9314039546182353e-05, "loss": 0.2365, "step": 8922, "teacher_loss": 0.20772519707679749 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.35153013467788696, "learning_rate": 2.931336039580216e-05, "loss": 0.3114, "step": 8923, "teacher_loss": 0.3069232106208801 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.8284546136856079, "learning_rate": 2.9312680917259527e-05, "loss": 0.3213, "step": 8924, "teacher_loss": 0.26498955488204956 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.38693302869796753, "learning_rate": 2.9312001110570034e-05, "loss": 0.4074, "step": 8925, "teacher_loss": 0.409656286239624 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.6033519506454468, "learning_rate": 2.9311320975749263e-05, "loss": 0.2808, "step": 8926, "teacher_loss": 0.24496988952159882 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.773811936378479, "learning_rate": 2.931064051281281e-05, "loss": 0.3445, "step": 8927, "teacher_loss": 0.2968422770500183 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.528587818145752, "learning_rate": 2.930995972177628e-05, "loss": 0.3317, "step": 8928, "teacher_loss": 0.30982518196105957 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.38173651695251465, "learning_rate": 2.9309278602655272e-05, "loss": 0.4563, "step": 8929, "teacher_loss": 0.46462106704711914 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.2777811288833618, "learning_rate": 2.930859715546541e-05, "loss": 0.2323, "step": 8930, "teacher_loss": 0.2272282838821411 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.22249789535999298, "learning_rate": 2.930791538022231e-05, "loss": 0.2225, "step": 8931, "teacher_loss": 0.2224869430065155 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.6952388286590576, "learning_rate": 2.9307233276941613e-05, "loss": 0.5399, "step": 8932, "teacher_loss": 0.5226446390151978 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.8608593940734863, "learning_rate": 2.9306550845638953e-05, "loss": 0.2905, "step": 8933, "teacher_loss": 0.227136492729187 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.42483726143836975, "learning_rate": 2.930586808632997e-05, "loss": 0.2453, "step": 8934, "teacher_loss": 0.2253992259502411 }, { "compression_loss": 0.0, "epoch": 1.61, "label_loss": 0.44151902198791504, "learning_rate": 2.9305184999030324e-05, "loss": 0.3266, "step": 8935, "teacher_loss": 0.31384968757629395 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.16030415892601013, "learning_rate": 2.930450158375568e-05, "loss": 0.1363, "step": 8936, "teacher_loss": 0.13366684317588806 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.2851826548576355, "learning_rate": 2.93038178405217e-05, "loss": 0.2697, "step": 8937, "teacher_loss": 0.26794740557670593 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.6795215010643005, "learning_rate": 2.930313376934406e-05, "loss": 0.3172, "step": 8938, "teacher_loss": 0.27696341276168823 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.3784475326538086, "learning_rate": 2.9302449370238447e-05, "loss": 0.2441, "step": 8939, "teacher_loss": 0.22915582358837128 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.7547372579574585, "learning_rate": 2.9301764643220553e-05, "loss": 0.4154, "step": 8940, "teacher_loss": 0.37764161825180054 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.3426719009876251, "learning_rate": 2.930107958830607e-05, "loss": 0.3605, "step": 8941, "teacher_loss": 0.3624359965324402 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.767907977104187, "learning_rate": 2.9300394205510713e-05, "loss": 0.2562, "step": 8942, "teacher_loss": 0.19929450750350952 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.41128912568092346, "learning_rate": 2.9299708494850185e-05, "loss": 0.2139, "step": 8943, "teacher_loss": 0.19191959500312805 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.29214543104171753, "learning_rate": 2.929902245634022e-05, "loss": 0.2764, "step": 8944, "teacher_loss": 0.274662584066391 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.567003071308136, "learning_rate": 2.9298336089996538e-05, "loss": 0.3016, "step": 8945, "teacher_loss": 0.27211514115333557 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.25821882486343384, "learning_rate": 2.929764939583488e-05, "loss": 0.2658, "step": 8946, "teacher_loss": 0.26661422848701477 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.38896602392196655, "learning_rate": 2.929696237387099e-05, "loss": 0.2485, "step": 8947, "teacher_loss": 0.23284614086151123 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.4900261163711548, "learning_rate": 2.9296275024120616e-05, "loss": 0.2698, "step": 8948, "teacher_loss": 0.24528133869171143 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.7298829555511475, "learning_rate": 2.9295587346599515e-05, "loss": 0.4291, "step": 8949, "teacher_loss": 0.39565277099609375 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.4621315896511078, "learning_rate": 2.9294899341323456e-05, "loss": 0.2236, "step": 8950, "teacher_loss": 0.1970585584640503 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.2600078880786896, "learning_rate": 2.929421100830822e-05, "loss": 0.2147, "step": 8951, "teacher_loss": 0.20970812439918518 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.16284877061843872, "learning_rate": 2.9293522347569575e-05, "loss": 0.2355, "step": 8952, "teacher_loss": 0.24357135593891144 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.4467979669570923, "learning_rate": 2.929283335912332e-05, "loss": 0.3168, "step": 8953, "teacher_loss": 0.3023749887943268 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.5160260796546936, "learning_rate": 2.929214404298525e-05, "loss": 0.2287, "step": 8954, "teacher_loss": 0.196784108877182 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.19141361117362976, "learning_rate": 2.929145439917116e-05, "loss": 0.2554, "step": 8955, "teacher_loss": 0.2625039219856262 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.3893775939941406, "learning_rate": 2.9290764427696875e-05, "loss": 0.2684, "step": 8956, "teacher_loss": 0.2549407482147217 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.18428722023963928, "learning_rate": 2.9290074128578207e-05, "loss": 0.1884, "step": 8957, "teacher_loss": 0.18889513611793518 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 1.0659804344177246, "learning_rate": 2.9289383501830983e-05, "loss": 0.6758, "step": 8958, "teacher_loss": 0.6324576139450073 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.3802110254764557, "learning_rate": 2.9288692547471036e-05, "loss": 0.2067, "step": 8959, "teacher_loss": 0.18744316697120667 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.7226102948188782, "learning_rate": 2.9288001265514205e-05, "loss": 0.3208, "step": 8960, "teacher_loss": 0.2761363983154297 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.15415403246879578, "learning_rate": 2.928730965597635e-05, "loss": 0.2201, "step": 8961, "teacher_loss": 0.2274424135684967 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.3802204132080078, "learning_rate": 2.9286617718873317e-05, "loss": 0.2865, "step": 8962, "teacher_loss": 0.27606001496315 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.21149007976055145, "learning_rate": 2.928592545422097e-05, "loss": 0.2358, "step": 8963, "teacher_loss": 0.2385154664516449 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.6890534162521362, "learning_rate": 2.9285232862035188e-05, "loss": 0.2898, "step": 8964, "teacher_loss": 0.24545976519584656 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.46577465534210205, "learning_rate": 2.9284539942331845e-05, "loss": 0.3066, "step": 8965, "teacher_loss": 0.2888607978820801 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.8376703262329102, "learning_rate": 2.9283846695126826e-05, "loss": 0.6335, "step": 8966, "teacher_loss": 0.6108400821685791 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.6768379807472229, "learning_rate": 2.928315312043603e-05, "loss": 0.3933, "step": 8967, "teacher_loss": 0.36178117990493774 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.48513320088386536, "learning_rate": 2.9282459218275357e-05, "loss": 0.2553, "step": 8968, "teacher_loss": 0.22979308664798737 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.3122332692146301, "learning_rate": 2.928176498866071e-05, "loss": 0.2368, "step": 8969, "teacher_loss": 0.22839249670505524 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.23835495114326477, "learning_rate": 2.9281070431608018e-05, "loss": 0.1753, "step": 8970, "teacher_loss": 0.1682862937450409 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.27147623896598816, "learning_rate": 2.928037554713319e-05, "loss": 0.1924, "step": 8971, "teacher_loss": 0.18364593386650085 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.7178797721862793, "learning_rate": 2.9279680335252173e-05, "loss": 0.3122, "step": 8972, "teacher_loss": 0.26711034774780273 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.4710690379142761, "learning_rate": 2.9278984795980898e-05, "loss": 0.2885, "step": 8973, "teacher_loss": 0.2681998908519745 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.4934837818145752, "learning_rate": 2.9278288929335308e-05, "loss": 0.2292, "step": 8974, "teacher_loss": 0.1998106688261032 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.4627881646156311, "learning_rate": 2.9277592735331362e-05, "loss": 0.2327, "step": 8975, "teacher_loss": 0.20712760090827942 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.6366965174674988, "learning_rate": 2.9276896213985022e-05, "loss": 0.3038, "step": 8976, "teacher_loss": 0.2667694687843323 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.35123080015182495, "learning_rate": 2.9276199365312252e-05, "loss": 0.302, "step": 8977, "teacher_loss": 0.296527624130249 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.508888304233551, "learning_rate": 2.927550218932904e-05, "loss": 0.2136, "step": 8978, "teacher_loss": 0.18074581027030945 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.25468921661376953, "learning_rate": 2.9274804686051358e-05, "loss": 0.1636, "step": 8979, "teacher_loss": 0.15351982414722443 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.2571745812892914, "learning_rate": 2.9274106855495207e-05, "loss": 0.204, "step": 8980, "teacher_loss": 0.19806994497776031 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.4964093565940857, "learning_rate": 2.9273408697676578e-05, "loss": 0.2828, "step": 8981, "teacher_loss": 0.25908321142196655 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.5554239749908447, "learning_rate": 2.9272710212611483e-05, "loss": 0.3144, "step": 8982, "teacher_loss": 0.2876272201538086 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.38468390703201294, "learning_rate": 2.9272011400315937e-05, "loss": 0.2257, "step": 8983, "teacher_loss": 0.20807360112667084 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.415330708026886, "learning_rate": 2.9271312260805952e-05, "loss": 0.223, "step": 8984, "teacher_loss": 0.2016492486000061 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.6870024800300598, "learning_rate": 2.9270612794097572e-05, "loss": 0.3938, "step": 8985, "teacher_loss": 0.36121055483818054 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.2554101049900055, "learning_rate": 2.9269913000206826e-05, "loss": 0.1735, "step": 8986, "teacher_loss": 0.1644374430179596 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.3364235460758209, "learning_rate": 2.9269212879149754e-05, "loss": 0.2214, "step": 8987, "teacher_loss": 0.20866690576076508 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.6370645761489868, "learning_rate": 2.9268512430942414e-05, "loss": 0.3021, "step": 8988, "teacher_loss": 0.26488587260246277 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.32711923122406006, "learning_rate": 2.926781165560086e-05, "loss": 0.2781, "step": 8989, "teacher_loss": 0.2726441025733948 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.9321675300598145, "learning_rate": 2.926711055314116e-05, "loss": 0.5197, "step": 8990, "teacher_loss": 0.4738415479660034 }, { "compression_loss": 0.0, "epoch": 1.62, "label_loss": 0.34943169355392456, "learning_rate": 2.9266409123579403e-05, "loss": 0.1656, "step": 8991, "teacher_loss": 0.14518259465694427 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.32197678089141846, "learning_rate": 2.9265707366931643e-05, "loss": 0.2938, "step": 8992, "teacher_loss": 0.2906179428100586 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 1.026017665863037, "learning_rate": 2.9265005283213996e-05, "loss": 0.7076, "step": 8993, "teacher_loss": 0.6722649335861206 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.4119094908237457, "learning_rate": 2.9264302872442542e-05, "loss": 0.2531, "step": 8994, "teacher_loss": 0.23541949689388275 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.37426674365997314, "learning_rate": 2.9263600134633385e-05, "loss": 0.2322, "step": 8995, "teacher_loss": 0.2164537012577057 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.2763417959213257, "learning_rate": 2.9262897069802643e-05, "loss": 0.2775, "step": 8996, "teacher_loss": 0.2775970697402954 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.5463225245475769, "learning_rate": 2.926219367796644e-05, "loss": 0.2551, "step": 8997, "teacher_loss": 0.222771555185318 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.3921516537666321, "learning_rate": 2.926148995914089e-05, "loss": 0.27, "step": 8998, "teacher_loss": 0.2564300298690796 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.6816219091415405, "learning_rate": 2.9260785913342134e-05, "loss": 0.4254, "step": 8999, "teacher_loss": 0.3969782888889313 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.13172490894794464, "learning_rate": 2.9260081540586316e-05, "loss": 0.152, "step": 9000, "teacher_loss": 0.1542419195175171 }, { "epoch": 1.63, "eval_exact_match": 79.49858088930937, "eval_f1": 86.98887453329, "step": 9000 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.2775450646877289, "learning_rate": 2.9259376840889577e-05, "loss": 0.2705, "step": 9001, "teacher_loss": 0.26972103118896484 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.5461021661758423, "learning_rate": 2.9258671814268085e-05, "loss": 0.2745, "step": 9002, "teacher_loss": 0.2442985326051712 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.6333494186401367, "learning_rate": 2.9257966460737995e-05, "loss": 0.343, "step": 9003, "teacher_loss": 0.31071579456329346 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.7762037515640259, "learning_rate": 2.9257260780315485e-05, "loss": 0.6891, "step": 9004, "teacher_loss": 0.6793792247772217 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.213931143283844, "learning_rate": 2.9256554773016726e-05, "loss": 0.2084, "step": 9005, "teacher_loss": 0.20781943202018738 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.45667171478271484, "learning_rate": 2.9255848438857914e-05, "loss": 0.2149, "step": 9006, "teacher_loss": 0.18809139728546143 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.5289179682731628, "learning_rate": 2.9255141777855234e-05, "loss": 0.2499, "step": 9007, "teacher_loss": 0.21894359588623047 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.2061433345079422, "learning_rate": 2.9254434790024894e-05, "loss": 0.1528, "step": 9008, "teacher_loss": 0.14688506722450256 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.12677393853664398, "learning_rate": 2.92537274753831e-05, "loss": 0.2171, "step": 9009, "teacher_loss": 0.22715824842453003 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.5902462005615234, "learning_rate": 2.925301983394607e-05, "loss": 0.2648, "step": 9010, "teacher_loss": 0.22865138947963715 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.30914685130119324, "learning_rate": 2.9252311865730032e-05, "loss": 0.2449, "step": 9011, "teacher_loss": 0.23771902918815613 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.23548491299152374, "learning_rate": 2.9251603570751208e-05, "loss": 0.3577, "step": 9012, "teacher_loss": 0.37123483419418335 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.5109137892723083, "learning_rate": 2.9250894949025843e-05, "loss": 0.3313, "step": 9013, "teacher_loss": 0.31133121252059937 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.42207154631614685, "learning_rate": 2.925018600057019e-05, "loss": 0.2055, "step": 9014, "teacher_loss": 0.1813814640045166 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.5506953001022339, "learning_rate": 2.924947672540049e-05, "loss": 0.2281, "step": 9015, "teacher_loss": 0.1923052966594696 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.4330574870109558, "learning_rate": 2.9248767123533013e-05, "loss": 0.2772, "step": 9016, "teacher_loss": 0.2598758041858673 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.5268689393997192, "learning_rate": 2.9248057194984024e-05, "loss": 0.444, "step": 9017, "teacher_loss": 0.43482697010040283 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.6163475513458252, "learning_rate": 2.9247346939769803e-05, "loss": 0.3993, "step": 9018, "teacher_loss": 0.3751649260520935 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.6395128965377808, "learning_rate": 2.9246636357906634e-05, "loss": 0.2627, "step": 9019, "teacher_loss": 0.22086858749389648 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.13514599204063416, "learning_rate": 2.9245925449410802e-05, "loss": 0.2006, "step": 9020, "teacher_loss": 0.20785696804523468 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.5912529230117798, "learning_rate": 2.9245214214298618e-05, "loss": 0.3054, "step": 9021, "teacher_loss": 0.2736865282058716 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.6193641424179077, "learning_rate": 2.9244502652586376e-05, "loss": 0.2885, "step": 9022, "teacher_loss": 0.25176793336868286 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.10610171407461166, "learning_rate": 2.92437907642904e-05, "loss": 0.2476, "step": 9023, "teacher_loss": 0.26327043771743774 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.4786786139011383, "learning_rate": 2.9243078549427e-05, "loss": 0.2771, "step": 9024, "teacher_loss": 0.2546723484992981 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.20772261917591095, "learning_rate": 2.9242366008012515e-05, "loss": 0.1914, "step": 9025, "teacher_loss": 0.1895974576473236 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.36129048466682434, "learning_rate": 2.924165314006328e-05, "loss": 0.313, "step": 9026, "teacher_loss": 0.30766934156417847 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.31653621792793274, "learning_rate": 2.9240939945595635e-05, "loss": 0.3129, "step": 9027, "teacher_loss": 0.31248581409454346 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.565127968788147, "learning_rate": 2.9240226424625937e-05, "loss": 0.4714, "step": 9028, "teacher_loss": 0.46099215745925903 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.332736074924469, "learning_rate": 2.923951257717054e-05, "loss": 0.2423, "step": 9029, "teacher_loss": 0.2322634607553482 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.33455342054367065, "learning_rate": 2.9238798403245806e-05, "loss": 0.2115, "step": 9030, "teacher_loss": 0.19782695174217224 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.364576131105423, "learning_rate": 2.9238083902868123e-05, "loss": 0.2352, "step": 9031, "teacher_loss": 0.22077983617782593 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.41065308451652527, "learning_rate": 2.9237369076053863e-05, "loss": 0.3668, "step": 9032, "teacher_loss": 0.3619014620780945 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.57323157787323, "learning_rate": 2.9236653922819413e-05, "loss": 0.3006, "step": 9033, "teacher_loss": 0.27034929394721985 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.31630322337150574, "learning_rate": 2.9235938443181173e-05, "loss": 0.2455, "step": 9034, "teacher_loss": 0.23765721917152405 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.46365129947662354, "learning_rate": 2.9235222637155545e-05, "loss": 0.264, "step": 9035, "teacher_loss": 0.24181796610355377 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.6756209135055542, "learning_rate": 2.9234506504758947e-05, "loss": 0.4013, "step": 9036, "teacher_loss": 0.37087541818618774 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.40011829137802124, "learning_rate": 2.9233790046007788e-05, "loss": 0.2508, "step": 9037, "teacher_loss": 0.23416680097579956 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.8036366105079651, "learning_rate": 2.9233073260918497e-05, "loss": 0.3005, "step": 9038, "teacher_loss": 0.24463282525539398 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.7782121300697327, "learning_rate": 2.9232356149507506e-05, "loss": 0.3367, "step": 9039, "teacher_loss": 0.28761789202690125 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.3888051509857178, "learning_rate": 2.9231638711791266e-05, "loss": 0.2375, "step": 9040, "teacher_loss": 0.22067710757255554 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.2764420807361603, "learning_rate": 2.923092094778622e-05, "loss": 0.2418, "step": 9041, "teacher_loss": 0.23798778653144836 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.295362651348114, "learning_rate": 2.9230202857508816e-05, "loss": 0.2099, "step": 9042, "teacher_loss": 0.20044925808906555 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.5992039442062378, "learning_rate": 2.922948444097553e-05, "loss": 0.3471, "step": 9043, "teacher_loss": 0.3191283941268921 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.28975823521614075, "learning_rate": 2.9228765698202826e-05, "loss": 0.1868, "step": 9044, "teacher_loss": 0.1753292679786682 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.3033132553100586, "learning_rate": 2.922804662920718e-05, "loss": 0.2184, "step": 9045, "teacher_loss": 0.20897182822227478 }, { "compression_loss": 0.0, "epoch": 1.63, "label_loss": 0.5455037355422974, "learning_rate": 2.922732723400509e-05, "loss": 0.247, "step": 9046, "teacher_loss": 0.21379461884498596 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.3931177854537964, "learning_rate": 2.9226607512613042e-05, "loss": 0.3164, "step": 9047, "teacher_loss": 0.30787965655326843 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.9656020998954773, "learning_rate": 2.9225887465047534e-05, "loss": 0.4078, "step": 9048, "teacher_loss": 0.34587740898132324 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.412752628326416, "learning_rate": 2.9225167091325082e-05, "loss": 0.3278, "step": 9049, "teacher_loss": 0.31834664940834045 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.37509581446647644, "learning_rate": 2.9224446391462192e-05, "loss": 0.3387, "step": 9050, "teacher_loss": 0.3346675634384155 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.3306233286857605, "learning_rate": 2.92237253654754e-05, "loss": 0.2769, "step": 9051, "teacher_loss": 0.2709203362464905 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.38647693395614624, "learning_rate": 2.9223004013381225e-05, "loss": 0.269, "step": 9052, "teacher_loss": 0.2559296786785126 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.7728319764137268, "learning_rate": 2.9222282335196213e-05, "loss": 0.3472, "step": 9053, "teacher_loss": 0.29990658164024353 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.3392825126647949, "learning_rate": 2.9221560330936906e-05, "loss": 0.1719, "step": 9054, "teacher_loss": 0.1533445566892624 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.36017727851867676, "learning_rate": 2.922083800061986e-05, "loss": 0.2298, "step": 9055, "teacher_loss": 0.21535304188728333 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.2214267998933792, "learning_rate": 2.9220115344261636e-05, "loss": 0.1502, "step": 9056, "teacher_loss": 0.14225971698760986 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.2785983979701996, "learning_rate": 2.92193923618788e-05, "loss": 0.2625, "step": 9057, "teacher_loss": 0.26066938042640686 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.4297206997871399, "learning_rate": 2.921866905348793e-05, "loss": 0.3632, "step": 9058, "teacher_loss": 0.35582199692726135 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.43686607480049133, "learning_rate": 2.9217945419105607e-05, "loss": 0.3753, "step": 9059, "teacher_loss": 0.3685005009174347 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 1.0970160961151123, "learning_rate": 2.9217221458748422e-05, "loss": 0.3261, "step": 9060, "teacher_loss": 0.2404937446117401 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.6919724941253662, "learning_rate": 2.9216497172432976e-05, "loss": 0.3807, "step": 9061, "teacher_loss": 0.34614098072052 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.5340515971183777, "learning_rate": 2.9215772560175877e-05, "loss": 0.3082, "step": 9062, "teacher_loss": 0.2831352651119232 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 1.0165231227874756, "learning_rate": 2.9215047621993728e-05, "loss": 0.3067, "step": 9063, "teacher_loss": 0.22780652344226837 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.2056713104248047, "learning_rate": 2.921432235790316e-05, "loss": 0.2499, "step": 9064, "teacher_loss": 0.25481289625167847 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.45500198006629944, "learning_rate": 2.9213596767920795e-05, "loss": 0.3414, "step": 9065, "teacher_loss": 0.3287578821182251 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.7360314130783081, "learning_rate": 2.9212870852063273e-05, "loss": 0.3475, "step": 9066, "teacher_loss": 0.30433177947998047 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.27344343066215515, "learning_rate": 2.921214461034723e-05, "loss": 0.3741, "step": 9067, "teacher_loss": 0.38526418805122375 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.5669922828674316, "learning_rate": 2.9211418042789325e-05, "loss": 0.2746, "step": 9068, "teacher_loss": 0.2420617789030075 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.7302707433700562, "learning_rate": 2.9210691149406214e-05, "loss": 0.315, "step": 9069, "teacher_loss": 0.2688038945198059 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.9473024606704712, "learning_rate": 2.920996393021456e-05, "loss": 0.4028, "step": 9070, "teacher_loss": 0.3423303961753845 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.2851428985595703, "learning_rate": 2.9209236385231035e-05, "loss": 0.2689, "step": 9071, "teacher_loss": 0.26709654927253723 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.4193482995033264, "learning_rate": 2.920850851447232e-05, "loss": 0.3216, "step": 9072, "teacher_loss": 0.3106866478919983 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.2575805187225342, "learning_rate": 2.9207780317955105e-05, "loss": 0.3243, "step": 9073, "teacher_loss": 0.33174487948417664 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.8053159713745117, "learning_rate": 2.9207051795696086e-05, "loss": 0.5612, "step": 9074, "teacher_loss": 0.5340703725814819 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.5010508298873901, "learning_rate": 2.9206322947711963e-05, "loss": 0.2438, "step": 9075, "teacher_loss": 0.21521982550621033 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.6863592267036438, "learning_rate": 2.9205593774019447e-05, "loss": 0.3695, "step": 9076, "teacher_loss": 0.33430618047714233 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.2966417372226715, "learning_rate": 2.920486427463526e-05, "loss": 0.2333, "step": 9077, "teacher_loss": 0.22628529369831085 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.2085174024105072, "learning_rate": 2.920413444957612e-05, "loss": 0.2124, "step": 9078, "teacher_loss": 0.21279451251029968 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.7356836795806885, "learning_rate": 2.9203404298858767e-05, "loss": 0.2772, "step": 9079, "teacher_loss": 0.2262895703315735 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.18428349494934082, "learning_rate": 2.9202673822499932e-05, "loss": 0.2197, "step": 9080, "teacher_loss": 0.22368015348911285 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.44111815094947815, "learning_rate": 2.920194302051637e-05, "loss": 0.508, "step": 9081, "teacher_loss": 0.5154268741607666 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.18327198922634125, "learning_rate": 2.9201211892924834e-05, "loss": 0.2523, "step": 9082, "teacher_loss": 0.2599583566188812 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.1522093415260315, "learning_rate": 2.9200480439742092e-05, "loss": 0.1954, "step": 9083, "teacher_loss": 0.20016592741012573 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.6587107181549072, "learning_rate": 2.91997486609849e-05, "loss": 0.4255, "step": 9084, "teacher_loss": 0.3995497226715088 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.8274158239364624, "learning_rate": 2.919901655667005e-05, "loss": 0.3695, "step": 9085, "teacher_loss": 0.3186189532279968 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.5901550650596619, "learning_rate": 2.9198284126814318e-05, "loss": 0.274, "step": 9086, "teacher_loss": 0.23882898688316345 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.3918238878250122, "learning_rate": 2.9197551371434504e-05, "loss": 0.3561, "step": 9087, "teacher_loss": 0.35213425755500793 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.17997834086418152, "learning_rate": 2.9196818290547402e-05, "loss": 0.2195, "step": 9088, "teacher_loss": 0.22384393215179443 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.2517014443874359, "learning_rate": 2.919608488416982e-05, "loss": 0.2997, "step": 9089, "teacher_loss": 0.3050664961338043 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.14812707901000977, "learning_rate": 2.919535115231857e-05, "loss": 0.1585, "step": 9090, "teacher_loss": 0.15967410802841187 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.4033532738685608, "learning_rate": 2.9194617095010483e-05, "loss": 0.2675, "step": 9091, "teacher_loss": 0.25237852334976196 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.495887815952301, "learning_rate": 2.9193882712262385e-05, "loss": 0.2796, "step": 9092, "teacher_loss": 0.2555280923843384 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.557904839515686, "learning_rate": 2.9193148004091106e-05, "loss": 0.2991, "step": 9093, "teacher_loss": 0.2703282833099365 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.5590754151344299, "learning_rate": 2.9192412970513503e-05, "loss": 0.2339, "step": 9094, "teacher_loss": 0.19781169295310974 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.5037227272987366, "learning_rate": 2.9191677611546418e-05, "loss": 0.3516, "step": 9095, "teacher_loss": 0.3346906304359436 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.6285238265991211, "learning_rate": 2.9190941927206714e-05, "loss": 0.2826, "step": 9096, "teacher_loss": 0.24419990181922913 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.2855442464351654, "learning_rate": 2.919020591751126e-05, "loss": 0.1945, "step": 9097, "teacher_loss": 0.1844368427991867 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.5357562303543091, "learning_rate": 2.9189469582476925e-05, "loss": 0.3168, "step": 9098, "teacher_loss": 0.2925136387348175 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.4319174885749817, "learning_rate": 2.9188732922120597e-05, "loss": 0.2324, "step": 9099, "teacher_loss": 0.2102731615304947 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 1.1863384246826172, "learning_rate": 2.9187995936459164e-05, "loss": 0.4169, "step": 9100, "teacher_loss": 0.33135542273521423 }, { "compression_loss": 0.0, "epoch": 1.64, "label_loss": 0.4632750451564789, "learning_rate": 2.9187258625509518e-05, "loss": 0.3712, "step": 9101, "teacher_loss": 0.36099544167518616 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.38158440589904785, "learning_rate": 2.9186520989288574e-05, "loss": 0.1905, "step": 9102, "teacher_loss": 0.1692637801170349 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.29963284730911255, "learning_rate": 2.9185783027813233e-05, "loss": 0.2409, "step": 9103, "teacher_loss": 0.23442545533180237 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.44087713956832886, "learning_rate": 2.918504474110042e-05, "loss": 0.3083, "step": 9104, "teacher_loss": 0.2935802936553955 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.5815075635910034, "learning_rate": 2.918430612916706e-05, "loss": 0.2272, "step": 9105, "teacher_loss": 0.18782316148281097 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.3396179676055908, "learning_rate": 2.9183567192030087e-05, "loss": 0.2595, "step": 9106, "teacher_loss": 0.25058799982070923 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.2538607120513916, "learning_rate": 2.918282792970644e-05, "loss": 0.1486, "step": 9107, "teacher_loss": 0.1368706226348877 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.6355059742927551, "learning_rate": 2.9182088342213074e-05, "loss": 0.3823, "step": 9108, "teacher_loss": 0.35412734746932983 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.6335360407829285, "learning_rate": 2.9181348429566944e-05, "loss": 0.2409, "step": 9109, "teacher_loss": 0.19730710983276367 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.14662745594978333, "learning_rate": 2.9180608191785005e-05, "loss": 0.2052, "step": 9110, "teacher_loss": 0.21166807413101196 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.293609082698822, "learning_rate": 2.917986762888424e-05, "loss": 0.2686, "step": 9111, "teacher_loss": 0.2658686935901642 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.8239504098892212, "learning_rate": 2.9179126740881627e-05, "loss": 0.3773, "step": 9112, "teacher_loss": 0.3276805281639099 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.2325119972229004, "learning_rate": 2.9178385527794148e-05, "loss": 0.2388, "step": 9113, "teacher_loss": 0.23952540755271912 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.7991150617599487, "learning_rate": 2.9177643989638795e-05, "loss": 0.2831, "step": 9114, "teacher_loss": 0.22571980953216553 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.40664488077163696, "learning_rate": 2.9176902126432573e-05, "loss": 0.2077, "step": 9115, "teacher_loss": 0.185613214969635 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.20584745705127716, "learning_rate": 2.9176159938192488e-05, "loss": 0.2834, "step": 9116, "teacher_loss": 0.29207220673561096 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.41206902265548706, "learning_rate": 2.917541742493556e-05, "loss": 0.2526, "step": 9117, "teacher_loss": 0.23488499224185944 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.550771176815033, "learning_rate": 2.9174674586678817e-05, "loss": 0.2626, "step": 9118, "teacher_loss": 0.23062163591384888 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.2395303100347519, "learning_rate": 2.9173931423439275e-05, "loss": 0.2238, "step": 9119, "teacher_loss": 0.2221044898033142 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.48667821288108826, "learning_rate": 2.917318793523398e-05, "loss": 0.2558, "step": 9120, "teacher_loss": 0.23009879887104034 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.49438217282295227, "learning_rate": 2.9172444122079985e-05, "loss": 0.3426, "step": 9121, "teacher_loss": 0.32576125860214233 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.33360904455184937, "learning_rate": 2.9171699983994334e-05, "loss": 0.238, "step": 9122, "teacher_loss": 0.22737759351730347 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.4617688059806824, "learning_rate": 2.9170955520994095e-05, "loss": 0.3006, "step": 9123, "teacher_loss": 0.28264304995536804 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.9464321136474609, "learning_rate": 2.9170210733096328e-05, "loss": 0.3181, "step": 9124, "teacher_loss": 0.24831557273864746 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.6949301958084106, "learning_rate": 2.9169465620318114e-05, "loss": 0.5848, "step": 9125, "teacher_loss": 0.5725321769714355 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.22288376092910767, "learning_rate": 2.9168720182676542e-05, "loss": 0.2984, "step": 9126, "teacher_loss": 0.30681461095809937 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 1.2912129163742065, "learning_rate": 2.916797442018869e-05, "loss": 0.3276, "step": 9127, "teacher_loss": 0.2204945832490921 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.4136827886104584, "learning_rate": 2.9167228332871664e-05, "loss": 0.2902, "step": 9128, "teacher_loss": 0.276501327753067 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.5210403800010681, "learning_rate": 2.9166481920742565e-05, "loss": 0.33, "step": 9129, "teacher_loss": 0.3087441921234131 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.9094825983047485, "learning_rate": 2.916573518381851e-05, "loss": 0.6093, "step": 9130, "teacher_loss": 0.5759446620941162 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.6826096773147583, "learning_rate": 2.916498812211662e-05, "loss": 0.3886, "step": 9131, "teacher_loss": 0.35588371753692627 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.820324182510376, "learning_rate": 2.9164240735654024e-05, "loss": 0.3699, "step": 9132, "teacher_loss": 0.3198615610599518 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.38956284523010254, "learning_rate": 2.9163493024447848e-05, "loss": 0.2129, "step": 9133, "teacher_loss": 0.19325563311576843 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.6421723961830139, "learning_rate": 2.9162744988515244e-05, "loss": 0.2735, "step": 9134, "teacher_loss": 0.23250806331634521 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.40422120690345764, "learning_rate": 2.9161996627873362e-05, "loss": 0.227, "step": 9135, "teacher_loss": 0.20733872056007385 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.47665566205978394, "learning_rate": 2.9161247942539354e-05, "loss": 0.464, "step": 9136, "teacher_loss": 0.46256759762763977 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.2436458170413971, "learning_rate": 2.916049893253039e-05, "loss": 0.2476, "step": 9137, "teacher_loss": 0.2479856312274933 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.5855885148048401, "learning_rate": 2.9159749597863642e-05, "loss": 0.2543, "step": 9138, "teacher_loss": 0.217438742518425 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.6897940635681152, "learning_rate": 2.9158999938556287e-05, "loss": 0.2787, "step": 9139, "teacher_loss": 0.23306520283222198 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.3864523768424988, "learning_rate": 2.9158249954625514e-05, "loss": 0.3303, "step": 9140, "teacher_loss": 0.3241148889064789 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.8880658745765686, "learning_rate": 2.9157499646088516e-05, "loss": 0.3219, "step": 9141, "teacher_loss": 0.2589852213859558 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.5536705255508423, "learning_rate": 2.9156749012962503e-05, "loss": 0.2034, "step": 9142, "teacher_loss": 0.16445383429527283 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.6476191282272339, "learning_rate": 2.9155998055264676e-05, "loss": 0.3762, "step": 9143, "teacher_loss": 0.34609195590019226 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.2706192135810852, "learning_rate": 2.915524677301226e-05, "loss": 0.1901, "step": 9144, "teacher_loss": 0.18110932409763336 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.4505302906036377, "learning_rate": 2.9154495166222467e-05, "loss": 0.2674, "step": 9145, "teacher_loss": 0.2470816969871521 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.4657009541988373, "learning_rate": 2.9153743234912542e-05, "loss": 0.3199, "step": 9146, "teacher_loss": 0.30373120307922363 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.5785669088363647, "learning_rate": 2.9152990979099722e-05, "loss": 0.3287, "step": 9147, "teacher_loss": 0.3009897768497467 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.5754903554916382, "learning_rate": 2.9152238398801247e-05, "loss": 0.2928, "step": 9148, "teacher_loss": 0.2613980174064636 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.41845351457595825, "learning_rate": 2.9151485494034375e-05, "loss": 0.2179, "step": 9149, "teacher_loss": 0.19566407799720764 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.9027433395385742, "learning_rate": 2.9150732264816373e-05, "loss": 0.2776, "step": 9150, "teacher_loss": 0.2081412672996521 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.29552245140075684, "learning_rate": 2.9149978711164506e-05, "loss": 0.2592, "step": 9151, "teacher_loss": 0.25517046451568604 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.3398802578449249, "learning_rate": 2.9149224833096048e-05, "loss": 0.2685, "step": 9152, "teacher_loss": 0.260617196559906 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.08707955479621887, "learning_rate": 2.914847063062829e-05, "loss": 0.1478, "step": 9153, "teacher_loss": 0.1544952392578125 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.29313355684280396, "learning_rate": 2.914771610377852e-05, "loss": 0.2187, "step": 9154, "teacher_loss": 0.210408017039299 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.5280560255050659, "learning_rate": 2.9146961252564036e-05, "loss": 0.3287, "step": 9155, "teacher_loss": 0.30655890703201294 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.5884580612182617, "learning_rate": 2.9146206077002144e-05, "loss": 0.3101, "step": 9156, "teacher_loss": 0.2792201638221741 }, { "compression_loss": 0.0, "epoch": 1.65, "label_loss": 0.45135048031806946, "learning_rate": 2.9145450577110158e-05, "loss": 0.3613, "step": 9157, "teacher_loss": 0.3513393700122833 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.34703540802001953, "learning_rate": 2.91446947529054e-05, "loss": 0.1799, "step": 9158, "teacher_loss": 0.16128291189670563 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.28634852170944214, "learning_rate": 2.9143938604405202e-05, "loss": 0.2983, "step": 9159, "teacher_loss": 0.29964128136634827 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.1861371397972107, "learning_rate": 2.9143182131626894e-05, "loss": 0.2139, "step": 9160, "teacher_loss": 0.21699032187461853 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.43974313139915466, "learning_rate": 2.9142425334587832e-05, "loss": 0.2957, "step": 9161, "teacher_loss": 0.27972549200057983 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.37349262833595276, "learning_rate": 2.914166821330535e-05, "loss": 0.2139, "step": 9162, "teacher_loss": 0.19613327085971832 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.323818564414978, "learning_rate": 2.9140910767796815e-05, "loss": 0.2629, "step": 9163, "teacher_loss": 0.2561449408531189 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.32089412212371826, "learning_rate": 2.9140152998079594e-05, "loss": 0.407, "step": 9164, "teacher_loss": 0.41659343242645264 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.12852616608142853, "learning_rate": 2.9139394904171064e-05, "loss": 0.168, "step": 9165, "teacher_loss": 0.1724090278148651 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.2106059491634369, "learning_rate": 2.9138636486088598e-05, "loss": 0.1802, "step": 9166, "teacher_loss": 0.1767694503068924 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.9225035309791565, "learning_rate": 2.9137877743849584e-05, "loss": 0.3036, "step": 9167, "teacher_loss": 0.23487567901611328 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.3759137690067291, "learning_rate": 2.9137118677471424e-05, "loss": 0.2648, "step": 9168, "teacher_loss": 0.2524906396865845 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.6851135492324829, "learning_rate": 2.9136359286971516e-05, "loss": 0.2516, "step": 9169, "teacher_loss": 0.20341722667217255 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.35278797149658203, "learning_rate": 2.913559957236727e-05, "loss": 0.2856, "step": 9170, "teacher_loss": 0.2780858278274536 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.41789448261260986, "learning_rate": 2.9134839533676115e-05, "loss": 0.2301, "step": 9171, "teacher_loss": 0.20921780169010162 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.3479253351688385, "learning_rate": 2.913407917091546e-05, "loss": 0.3206, "step": 9172, "teacher_loss": 0.3175867199897766 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.5016261339187622, "learning_rate": 2.913331848410275e-05, "loss": 0.2401, "step": 9173, "teacher_loss": 0.2110644280910492 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.1500146985054016, "learning_rate": 2.9132557473255423e-05, "loss": 0.1356, "step": 9174, "teacher_loss": 0.13395142555236816 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.4810018539428711, "learning_rate": 2.9131796138390916e-05, "loss": 0.2436, "step": 9175, "teacher_loss": 0.21721789240837097 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.6518871188163757, "learning_rate": 2.91310344795267e-05, "loss": 0.583, "step": 9176, "teacher_loss": 0.5752996206283569 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.8711276054382324, "learning_rate": 2.913027249668023e-05, "loss": 0.3921, "step": 9177, "teacher_loss": 0.3388691544532776 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.3561475872993469, "learning_rate": 2.9129510189868974e-05, "loss": 0.5587, "step": 9178, "teacher_loss": 0.581181526184082 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.3938180208206177, "learning_rate": 2.9128747559110412e-05, "loss": 0.385, "step": 9179, "teacher_loss": 0.38401904702186584 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.3391727805137634, "learning_rate": 2.9127984604422033e-05, "loss": 0.2212, "step": 9180, "teacher_loss": 0.20806317031383514 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.46113285422325134, "learning_rate": 2.9127221325821322e-05, "loss": 0.3613, "step": 9181, "teacher_loss": 0.35018473863601685 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.519561231136322, "learning_rate": 2.912645772332578e-05, "loss": 0.3047, "step": 9182, "teacher_loss": 0.2808586359024048 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.7171036005020142, "learning_rate": 2.9125693796952916e-05, "loss": 0.4206, "step": 9183, "teacher_loss": 0.38763055205345154 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.17597715556621552, "learning_rate": 2.9124929546720244e-05, "loss": 0.2244, "step": 9184, "teacher_loss": 0.22979843616485596 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.3520622253417969, "learning_rate": 2.912416497264529e-05, "loss": 0.2158, "step": 9185, "teacher_loss": 0.20062606036663055 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.7940888404846191, "learning_rate": 2.9123400074745575e-05, "loss": 0.3042, "step": 9186, "teacher_loss": 0.24978914856910706 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.4371662139892578, "learning_rate": 2.9122634853038642e-05, "loss": 0.263, "step": 9187, "teacher_loss": 0.24369487166404724 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.4842766225337982, "learning_rate": 2.9121869307542035e-05, "loss": 0.1765, "step": 9188, "teacher_loss": 0.14227421581745148 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.6293430328369141, "learning_rate": 2.9121103438273305e-05, "loss": 0.3706, "step": 9189, "teacher_loss": 0.34190118312835693 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.8457003831863403, "learning_rate": 2.912033724525001e-05, "loss": 0.256, "step": 9190, "teacher_loss": 0.19047951698303223 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.5857725143432617, "learning_rate": 2.9119570728489713e-05, "loss": 0.4017, "step": 9191, "teacher_loss": 0.38119202852249146 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.617472767829895, "learning_rate": 2.9118803888009994e-05, "loss": 0.2826, "step": 9192, "teacher_loss": 0.24534976482391357 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.540016770362854, "learning_rate": 2.9118036723828434e-05, "loss": 0.4679, "step": 9193, "teacher_loss": 0.45985740423202515 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.6320154070854187, "learning_rate": 2.9117269235962615e-05, "loss": 0.3233, "step": 9194, "teacher_loss": 0.28901833295822144 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.7137870788574219, "learning_rate": 2.9116501424430145e-05, "loss": 0.3359, "step": 9195, "teacher_loss": 0.2938670814037323 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.5262972712516785, "learning_rate": 2.9115733289248616e-05, "loss": 0.2781, "step": 9196, "teacher_loss": 0.2505384385585785 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.24922776222229004, "learning_rate": 2.9114964830435648e-05, "loss": 0.1998, "step": 9197, "teacher_loss": 0.19428768754005432 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.3508811593055725, "learning_rate": 2.911419604800885e-05, "loss": 0.2061, "step": 9198, "teacher_loss": 0.18997475504875183 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.4926711916923523, "learning_rate": 2.9113426941985858e-05, "loss": 0.2751, "step": 9199, "teacher_loss": 0.250963032245636 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.2966512441635132, "learning_rate": 2.9112657512384296e-05, "loss": 0.1693, "step": 9200, "teacher_loss": 0.15516717731952667 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.14558416604995728, "learning_rate": 2.9111887759221813e-05, "loss": 0.157, "step": 9201, "teacher_loss": 0.15823706984519958 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.3428613543510437, "learning_rate": 2.9111117682516055e-05, "loss": 0.252, "step": 9202, "teacher_loss": 0.24191418290138245 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.42056864500045776, "learning_rate": 2.9110347282284673e-05, "loss": 0.2424, "step": 9203, "teacher_loss": 0.22260910272598267 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.11226047575473785, "learning_rate": 2.910957655854533e-05, "loss": 0.1434, "step": 9204, "teacher_loss": 0.14691483974456787 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.5073035359382629, "learning_rate": 2.9108805511315706e-05, "loss": 0.2177, "step": 9205, "teacher_loss": 0.18552683293819427 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.2442966103553772, "learning_rate": 2.910803414061347e-05, "loss": 0.1605, "step": 9206, "teacher_loss": 0.15113897621631622 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.1659238487482071, "learning_rate": 2.9107262446456302e-05, "loss": 0.2879, "step": 9207, "teacher_loss": 0.3014804720878601 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.6373984813690186, "learning_rate": 2.9106490428861908e-05, "loss": 0.2535, "step": 9208, "teacher_loss": 0.2108057737350464 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.26981639862060547, "learning_rate": 2.9105718087847983e-05, "loss": 0.2083, "step": 9209, "teacher_loss": 0.20146358013153076 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.7666878700256348, "learning_rate": 2.910494542343223e-05, "loss": 0.2924, "step": 9210, "teacher_loss": 0.23973098397254944 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.6488733291625977, "learning_rate": 2.910417243563237e-05, "loss": 0.3363, "step": 9211, "teacher_loss": 0.3016122877597809 }, { "compression_loss": 0.0, "epoch": 1.66, "label_loss": 0.4720398783683777, "learning_rate": 2.910339912446612e-05, "loss": 0.2593, "step": 9212, "teacher_loss": 0.23561860620975494 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.32900285720825195, "learning_rate": 2.9102625489951212e-05, "loss": 0.2193, "step": 9213, "teacher_loss": 0.20714855194091797 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.15560680627822876, "learning_rate": 2.910185153210539e-05, "loss": 0.205, "step": 9214, "teacher_loss": 0.21053211390972137 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.4644492268562317, "learning_rate": 2.9101077250946388e-05, "loss": 0.3256, "step": 9215, "teacher_loss": 0.31014496088027954 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.38424551486968994, "learning_rate": 2.9100302646491953e-05, "loss": 0.2388, "step": 9216, "teacher_loss": 0.2226463258266449 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.23442524671554565, "learning_rate": 2.9099527718759865e-05, "loss": 0.1849, "step": 9217, "teacher_loss": 0.17943312227725983 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5474934577941895, "learning_rate": 2.9098752467767876e-05, "loss": 0.2807, "step": 9218, "teacher_loss": 0.2510283291339874 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.43334537744522095, "learning_rate": 2.909797689353376e-05, "loss": 0.2001, "step": 9219, "teacher_loss": 0.17415004968643188 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.38292402029037476, "learning_rate": 2.9097200996075303e-05, "loss": 0.2521, "step": 9220, "teacher_loss": 0.2376130223274231 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.3658839166164398, "learning_rate": 2.9096424775410295e-05, "loss": 0.2305, "step": 9221, "teacher_loss": 0.21548418700695038 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.28370532393455505, "learning_rate": 2.9095648231556525e-05, "loss": 0.1966, "step": 9222, "teacher_loss": 0.18696634471416473 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5835572481155396, "learning_rate": 2.909487136453181e-05, "loss": 0.4144, "step": 9223, "teacher_loss": 0.3955951929092407 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5398017764091492, "learning_rate": 2.909409417435395e-05, "loss": 0.2965, "step": 9224, "teacher_loss": 0.2695165276527405 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.6575205326080322, "learning_rate": 2.9093316661040763e-05, "loss": 0.3395, "step": 9225, "teacher_loss": 0.3041561245918274 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5120516419410706, "learning_rate": 2.9092538824610085e-05, "loss": 0.2579, "step": 9226, "teacher_loss": 0.22961165010929108 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.4826245605945587, "learning_rate": 2.909176066507974e-05, "loss": 0.2436, "step": 9227, "teacher_loss": 0.21709519624710083 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.48871737718582153, "learning_rate": 2.909098218246757e-05, "loss": 0.3039, "step": 9228, "teacher_loss": 0.283365398645401 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.9395196437835693, "learning_rate": 2.909020337679143e-05, "loss": 0.3322, "step": 9229, "teacher_loss": 0.26474103331565857 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.4654906690120697, "learning_rate": 2.908942424806917e-05, "loss": 0.2329, "step": 9230, "teacher_loss": 0.20709139108657837 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.3360847234725952, "learning_rate": 2.9088644796318654e-05, "loss": 0.5773, "step": 9231, "teacher_loss": 0.6040756106376648 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.3834913671016693, "learning_rate": 2.908786502155775e-05, "loss": 0.2069, "step": 9232, "teacher_loss": 0.18724675476551056 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5472903251647949, "learning_rate": 2.9087084923804342e-05, "loss": 0.2869, "step": 9233, "teacher_loss": 0.2579546272754669 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5131911039352417, "learning_rate": 2.9086304503076307e-05, "loss": 0.6983, "step": 9234, "teacher_loss": 0.7188769578933716 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 1.0257718563079834, "learning_rate": 2.9085523759391547e-05, "loss": 0.3589, "step": 9235, "teacher_loss": 0.2847823202610016 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.27047398686408997, "learning_rate": 2.908474269276796e-05, "loss": 0.1863, "step": 9236, "teacher_loss": 0.17691370844841003 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.6767044067382812, "learning_rate": 2.9083961303223444e-05, "loss": 0.3408, "step": 9237, "teacher_loss": 0.30342957377433777 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.22198769450187683, "learning_rate": 2.9083179590775928e-05, "loss": 0.2749, "step": 9238, "teacher_loss": 0.2807576060295105 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.2379516363143921, "learning_rate": 2.908239755544333e-05, "loss": 0.2751, "step": 9239, "teacher_loss": 0.2791757583618164 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5076822638511658, "learning_rate": 2.908161519724357e-05, "loss": 0.3569, "step": 9240, "teacher_loss": 0.34017837047576904 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5089656114578247, "learning_rate": 2.9080832516194596e-05, "loss": 0.3203, "step": 9241, "teacher_loss": 0.2993620038032532 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.36689141392707825, "learning_rate": 2.9080049512314343e-05, "loss": 0.2392, "step": 9242, "teacher_loss": 0.22506119310855865 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.7230166792869568, "learning_rate": 2.9079266185620774e-05, "loss": 0.2927, "step": 9243, "teacher_loss": 0.24490296840667725 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.2775919735431671, "learning_rate": 2.907848253613185e-05, "loss": 0.2635, "step": 9244, "teacher_loss": 0.26190197467803955 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5631516575813293, "learning_rate": 2.907769856386552e-05, "loss": 0.3146, "step": 9245, "teacher_loss": 0.2869395911693573 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.36183035373687744, "learning_rate": 2.907691426883977e-05, "loss": 0.251, "step": 9246, "teacher_loss": 0.2386607825756073 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.3816518187522888, "learning_rate": 2.9076129651072587e-05, "loss": 0.207, "step": 9247, "teacher_loss": 0.18758907914161682 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.6186832189559937, "learning_rate": 2.907534471058195e-05, "loss": 0.2848, "step": 9248, "teacher_loss": 0.247691810131073 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.7001530528068542, "learning_rate": 2.907455944738586e-05, "loss": 0.4486, "step": 9249, "teacher_loss": 0.4206329882144928 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.36669573187828064, "learning_rate": 2.9073773861502315e-05, "loss": 0.3121, "step": 9250, "teacher_loss": 0.3059922456741333 }, { "epoch": 1.67, "eval_exact_match": 78.98770104068117, "eval_f1": 86.61258879189994, "step": 9250 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.9323701858520508, "learning_rate": 2.9072987952949337e-05, "loss": 0.5258, "step": 9251, "teacher_loss": 0.4806171953678131 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.1984187662601471, "learning_rate": 2.9072201721744935e-05, "loss": 0.3295, "step": 9252, "teacher_loss": 0.34404256939888 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.38764792680740356, "learning_rate": 2.9071415167907136e-05, "loss": 0.1964, "step": 9253, "teacher_loss": 0.17520058155059814 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.4380306303501129, "learning_rate": 2.907062829145398e-05, "loss": 0.3262, "step": 9254, "teacher_loss": 0.3137598931789398 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.8820582628250122, "learning_rate": 2.90698410924035e-05, "loss": 0.2866, "step": 9255, "teacher_loss": 0.22039452195167542 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.7891812324523926, "learning_rate": 2.9069053570773752e-05, "loss": 0.3336, "step": 9256, "teacher_loss": 0.2829534113407135 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.4762847423553467, "learning_rate": 2.906826572658278e-05, "loss": 0.1818, "step": 9257, "teacher_loss": 0.14908862113952637 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5055019855499268, "learning_rate": 2.9067477559848655e-05, "loss": 0.2694, "step": 9258, "teacher_loss": 0.24312575161457062 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.6891862750053406, "learning_rate": 2.9066689070589453e-05, "loss": 0.4136, "step": 9259, "teacher_loss": 0.3829249143600464 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.4040907025337219, "learning_rate": 2.906590025882324e-05, "loss": 0.2256, "step": 9260, "teacher_loss": 0.2057361751794815 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.7869569659233093, "learning_rate": 2.9065111124568105e-05, "loss": 0.5716, "step": 9261, "teacher_loss": 0.5477244853973389 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.9112817049026489, "learning_rate": 2.906432166784214e-05, "loss": 0.4773, "step": 9262, "teacher_loss": 0.429031103849411 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5311615467071533, "learning_rate": 2.906353188866345e-05, "loss": 0.2391, "step": 9263, "teacher_loss": 0.20668652653694153 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.5544610023498535, "learning_rate": 2.9062741787050134e-05, "loss": 0.2283, "step": 9264, "teacher_loss": 0.19200721383094788 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.35473915934562683, "learning_rate": 2.9061951363020317e-05, "loss": 0.2168, "step": 9265, "teacher_loss": 0.2015204131603241 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.37907469272613525, "learning_rate": 2.906116061659211e-05, "loss": 0.2686, "step": 9266, "teacher_loss": 0.2563498318195343 }, { "compression_loss": 0.0, "epoch": 1.67, "label_loss": 0.7442792057991028, "learning_rate": 2.9060369547783653e-05, "loss": 0.3597, "step": 9267, "teacher_loss": 0.3169637620449066 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.4627285301685333, "learning_rate": 2.9059578156613074e-05, "loss": 0.2441, "step": 9268, "teacher_loss": 0.21980109810829163 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.09949947148561478, "learning_rate": 2.905878644309852e-05, "loss": 0.1786, "step": 9269, "teacher_loss": 0.1874256730079651 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.4641808867454529, "learning_rate": 2.9057994407258143e-05, "loss": 0.2933, "step": 9270, "teacher_loss": 0.2743515968322754 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.5340152382850647, "learning_rate": 2.9057202049110105e-05, "loss": 0.2742, "step": 9271, "teacher_loss": 0.2453407645225525 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.6270548105239868, "learning_rate": 2.9056409368672564e-05, "loss": 0.3086, "step": 9272, "teacher_loss": 0.2731887102127075 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.20683591067790985, "learning_rate": 2.9055616365963706e-05, "loss": 0.1758, "step": 9273, "teacher_loss": 0.17235158383846283 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.5508305430412292, "learning_rate": 2.9054823041001705e-05, "loss": 0.2708, "step": 9274, "teacher_loss": 0.2396995574235916 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.44754481315612793, "learning_rate": 2.905402939380475e-05, "loss": 0.4151, "step": 9275, "teacher_loss": 0.41147178411483765 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.43648675084114075, "learning_rate": 2.9053235424391032e-05, "loss": 0.324, "step": 9276, "teacher_loss": 0.31154996156692505 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.3455158770084381, "learning_rate": 2.9052441132778766e-05, "loss": 0.259, "step": 9277, "teacher_loss": 0.24937212467193604 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.3073540925979614, "learning_rate": 2.9051646518986154e-05, "loss": 0.3141, "step": 9278, "teacher_loss": 0.314807653427124 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.48548024892807007, "learning_rate": 2.905085158303141e-05, "loss": 0.257, "step": 9279, "teacher_loss": 0.2316540777683258 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.320823073387146, "learning_rate": 2.9050056324932773e-05, "loss": 0.2213, "step": 9280, "teacher_loss": 0.2102929800748825 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.5867105722427368, "learning_rate": 2.904926074470847e-05, "loss": 0.2623, "step": 9281, "teacher_loss": 0.22629155218601227 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.5472455620765686, "learning_rate": 2.9048464842376736e-05, "loss": 0.3346, "step": 9282, "teacher_loss": 0.3110141158103943 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.3994990587234497, "learning_rate": 2.904766861795582e-05, "loss": 0.3554, "step": 9283, "teacher_loss": 0.3504706621170044 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.6690493226051331, "learning_rate": 2.9046872071463988e-05, "loss": 0.3163, "step": 9284, "teacher_loss": 0.27708274126052856 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.5111331343650818, "learning_rate": 2.9046075202919487e-05, "loss": 0.2483, "step": 9285, "teacher_loss": 0.219122514128685 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.5614480376243591, "learning_rate": 2.9045278012340593e-05, "loss": 0.2893, "step": 9286, "teacher_loss": 0.25906965136528015 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.19893768429756165, "learning_rate": 2.904448049974559e-05, "loss": 0.1982, "step": 9287, "teacher_loss": 0.19816677272319794 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.5493618249893188, "learning_rate": 2.9043682665152756e-05, "loss": 0.2332, "step": 9288, "teacher_loss": 0.19810637831687927 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.3882234990596771, "learning_rate": 2.9042884508580383e-05, "loss": 0.2459, "step": 9289, "teacher_loss": 0.2301032692193985 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.3313745856285095, "learning_rate": 2.904208603004677e-05, "loss": 0.2239, "step": 9290, "teacher_loss": 0.2119961827993393 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.33346739411354065, "learning_rate": 2.9041287229570222e-05, "loss": 0.352, "step": 9291, "teacher_loss": 0.3541010618209839 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.5447283387184143, "learning_rate": 2.904048810716906e-05, "loss": 0.2614, "step": 9292, "teacher_loss": 0.22986991703510284 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.5627639293670654, "learning_rate": 2.90396886628616e-05, "loss": 0.3035, "step": 9293, "teacher_loss": 0.27467185258865356 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.14594316482543945, "learning_rate": 2.9038888896666172e-05, "loss": 0.1764, "step": 9294, "teacher_loss": 0.17978979647159576 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.6003696918487549, "learning_rate": 2.9038088808601114e-05, "loss": 0.2908, "step": 9295, "teacher_loss": 0.25641506910324097 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.4198756217956543, "learning_rate": 2.9037288398684762e-05, "loss": 0.3045, "step": 9296, "teacher_loss": 0.2916848659515381 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.2558099627494812, "learning_rate": 2.903648766693548e-05, "loss": 0.2528, "step": 9297, "teacher_loss": 0.2524198889732361 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.45294368267059326, "learning_rate": 2.9035686613371616e-05, "loss": 0.3183, "step": 9298, "teacher_loss": 0.3033705949783325 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.7846300601959229, "learning_rate": 2.9034885238011543e-05, "loss": 0.4943, "step": 9299, "teacher_loss": 0.46204277873039246 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.4580080211162567, "learning_rate": 2.9034083540873624e-05, "loss": 0.2925, "step": 9300, "teacher_loss": 0.2740847170352936 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.23906934261322021, "learning_rate": 2.9033281521976248e-05, "loss": 0.2443, "step": 9301, "teacher_loss": 0.24485427141189575 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.7126936912536621, "learning_rate": 2.9032479181337804e-05, "loss": 0.4218, "step": 9302, "teacher_loss": 0.38953304290771484 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.42940568923950195, "learning_rate": 2.903167651897668e-05, "loss": 0.1962, "step": 9303, "teacher_loss": 0.17027972638607025 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.5111311674118042, "learning_rate": 2.903087353491128e-05, "loss": 0.2643, "step": 9304, "teacher_loss": 0.23687729239463806 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.4613502025604248, "learning_rate": 2.903007022916002e-05, "loss": 0.3221, "step": 9305, "teacher_loss": 0.3065764904022217 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.6822640895843506, "learning_rate": 2.9029266601741317e-05, "loss": 0.6515, "step": 9306, "teacher_loss": 0.6480617523193359 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.8899911642074585, "learning_rate": 2.9028462652673588e-05, "loss": 0.4088, "step": 9307, "teacher_loss": 0.35533642768859863 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.4414692521095276, "learning_rate": 2.9027658381975273e-05, "loss": 0.2003, "step": 9308, "teacher_loss": 0.17351919412612915 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.2900228202342987, "learning_rate": 2.9026853789664808e-05, "loss": 0.236, "step": 9309, "teacher_loss": 0.22996850311756134 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.7469189167022705, "learning_rate": 2.902604887576064e-05, "loss": 0.474, "step": 9310, "teacher_loss": 0.4436472952365875 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.40438586473464966, "learning_rate": 2.9025243640281226e-05, "loss": 0.2464, "step": 9311, "teacher_loss": 0.22882801294326782 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.3126750588417053, "learning_rate": 2.9024438083245023e-05, "loss": 0.2262, "step": 9312, "teacher_loss": 0.2165645956993103 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.8984996676445007, "learning_rate": 2.9023632204670497e-05, "loss": 0.3609, "step": 9313, "teacher_loss": 0.3011930584907532 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.9566714763641357, "learning_rate": 2.9022826004576137e-05, "loss": 0.3335, "step": 9314, "teacher_loss": 0.26422858238220215 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.6724084615707397, "learning_rate": 2.9022019482980416e-05, "loss": 0.56, "step": 9315, "teacher_loss": 0.547517716884613 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.2508290708065033, "learning_rate": 2.902121263990183e-05, "loss": 0.3716, "step": 9316, "teacher_loss": 0.3849754333496094 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.42668718099594116, "learning_rate": 2.9020405475358872e-05, "loss": 0.2073, "step": 9317, "teacher_loss": 0.1829012930393219 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.48074281215667725, "learning_rate": 2.9019597989370055e-05, "loss": 0.302, "step": 9318, "teacher_loss": 0.2821105122566223 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.22151502966880798, "learning_rate": 2.9018790181953892e-05, "loss": 0.2293, "step": 9319, "teacher_loss": 0.2301289588212967 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.2705234885215759, "learning_rate": 2.9017982053128895e-05, "loss": 0.2583, "step": 9320, "teacher_loss": 0.2569606900215149 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.31468263268470764, "learning_rate": 2.90171736029136e-05, "loss": 0.2213, "step": 9321, "teacher_loss": 0.21087901294231415 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.40045833587646484, "learning_rate": 2.9016364831326538e-05, "loss": 0.2242, "step": 9322, "teacher_loss": 0.20466187596321106 }, { "compression_loss": 0.0, "epoch": 1.68, "label_loss": 0.28022196888923645, "learning_rate": 2.9015555738386258e-05, "loss": 0.191, "step": 9323, "teacher_loss": 0.18113242089748383 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.1975923627614975, "learning_rate": 2.90147463241113e-05, "loss": 0.214, "step": 9324, "teacher_loss": 0.21580807864665985 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.5414063334465027, "learning_rate": 2.9013936588520235e-05, "loss": 0.2507, "step": 9325, "teacher_loss": 0.218429297208786 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.6998558640480042, "learning_rate": 2.9013126531631616e-05, "loss": 0.2906, "step": 9326, "teacher_loss": 0.24511653184890747 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.8140338659286499, "learning_rate": 2.9012316153464017e-05, "loss": 0.333, "step": 9327, "teacher_loss": 0.27954739332199097 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 1.1546919345855713, "learning_rate": 2.901150545403602e-05, "loss": 0.4454, "step": 9328, "teacher_loss": 0.36658209562301636 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.4790791869163513, "learning_rate": 2.9010694433366213e-05, "loss": 0.3509, "step": 9329, "teacher_loss": 0.3366243839263916 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.3284538984298706, "learning_rate": 2.900988309147319e-05, "loss": 0.1535, "step": 9330, "teacher_loss": 0.1340821385383606 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.6214761137962341, "learning_rate": 2.900907142837555e-05, "loss": 0.3882, "step": 9331, "teacher_loss": 0.3623168170452118 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.2939201593399048, "learning_rate": 2.9008259444091906e-05, "loss": 0.3834, "step": 9332, "teacher_loss": 0.39338138699531555 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.09034471213817596, "learning_rate": 2.900744713864087e-05, "loss": 0.196, "step": 9333, "teacher_loss": 0.2077823281288147 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.4003854990005493, "learning_rate": 2.9006634512041067e-05, "loss": 0.192, "step": 9334, "teacher_loss": 0.16889601945877075 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.41204673051834106, "learning_rate": 2.9005821564311127e-05, "loss": 0.271, "step": 9335, "teacher_loss": 0.2553806006908417 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.577776312828064, "learning_rate": 2.9005008295469696e-05, "loss": 0.3016, "step": 9336, "teacher_loss": 0.2709653675556183 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.5871777534484863, "learning_rate": 2.9004194705535405e-05, "loss": 0.3534, "step": 9337, "teacher_loss": 0.3273926377296448 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.34036558866500854, "learning_rate": 2.9003380794526926e-05, "loss": 0.1979, "step": 9338, "teacher_loss": 0.18208590149879456 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.28699785470962524, "learning_rate": 2.90025665624629e-05, "loss": 0.3359, "step": 9339, "teacher_loss": 0.34132862091064453 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.2708887755870819, "learning_rate": 2.9001752009362014e-05, "loss": 0.1937, "step": 9340, "teacher_loss": 0.1851045787334442 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.263755738735199, "learning_rate": 2.9000937135242932e-05, "loss": 0.2355, "step": 9341, "teacher_loss": 0.23238103091716766 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.22057479619979858, "learning_rate": 2.9000121940124333e-05, "loss": 0.1736, "step": 9342, "teacher_loss": 0.16837728023529053 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.6109610199928284, "learning_rate": 2.8999306424024915e-05, "loss": 0.4234, "step": 9343, "teacher_loss": 0.4026148021221161 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.5595384836196899, "learning_rate": 2.8998490586963373e-05, "loss": 0.237, "step": 9344, "teacher_loss": 0.20113666355609894 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.6873195171356201, "learning_rate": 2.899767442895841e-05, "loss": 0.4059, "step": 9345, "teacher_loss": 0.37465405464172363 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.4221692681312561, "learning_rate": 2.8996857950028744e-05, "loss": 0.2917, "step": 9346, "teacher_loss": 0.2772250175476074 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.4763318598270416, "learning_rate": 2.8996041150193084e-05, "loss": 0.2319, "step": 9347, "teacher_loss": 0.20479440689086914 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.34323960542678833, "learning_rate": 2.899522402947017e-05, "loss": 0.1863, "step": 9348, "teacher_loss": 0.1688569039106369 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.5284562110900879, "learning_rate": 2.8994406587878722e-05, "loss": 0.2413, "step": 9349, "teacher_loss": 0.20940876007080078 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.4097611904144287, "learning_rate": 2.8993588825437493e-05, "loss": 0.3131, "step": 9350, "teacher_loss": 0.30235347151756287 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.2623571455478668, "learning_rate": 2.8992770742165226e-05, "loss": 0.2217, "step": 9351, "teacher_loss": 0.21721863746643066 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.3416444659233093, "learning_rate": 2.8991952338080677e-05, "loss": 0.2405, "step": 9352, "teacher_loss": 0.229261577129364 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.7279170751571655, "learning_rate": 2.8991133613202615e-05, "loss": 0.4087, "step": 9353, "teacher_loss": 0.3732330799102783 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.5482463836669922, "learning_rate": 2.89903145675498e-05, "loss": 0.4332, "step": 9354, "teacher_loss": 0.4204011559486389 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.37647539377212524, "learning_rate": 2.898949520114102e-05, "loss": 0.3056, "step": 9355, "teacher_loss": 0.29775065183639526 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.6451793313026428, "learning_rate": 2.898867551399506e-05, "loss": 0.2722, "step": 9356, "teacher_loss": 0.23076388239860535 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.5896247029304504, "learning_rate": 2.8987855506130708e-05, "loss": 0.3018, "step": 9357, "teacher_loss": 0.2698572278022766 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.22080765664577484, "learning_rate": 2.8987035177566767e-05, "loss": 0.2162, "step": 9358, "teacher_loss": 0.21566995978355408 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.3908550441265106, "learning_rate": 2.8986214528322044e-05, "loss": 0.2698, "step": 9359, "teacher_loss": 0.25636622309684753 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.6212261319160461, "learning_rate": 2.8985393558415364e-05, "loss": 0.2096, "step": 9360, "teacher_loss": 0.16388392448425293 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.2948904037475586, "learning_rate": 2.8984572267865527e-05, "loss": 0.1651, "step": 9361, "teacher_loss": 0.15062521398067474 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.7019200325012207, "learning_rate": 2.8983750656691385e-05, "loss": 0.2572, "step": 9362, "teacher_loss": 0.20776838064193726 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.4429117441177368, "learning_rate": 2.8982928724911762e-05, "loss": 0.3066, "step": 9363, "teacher_loss": 0.29147735238075256 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.4424898624420166, "learning_rate": 2.8982106472545507e-05, "loss": 0.1802, "step": 9364, "teacher_loss": 0.15107297897338867 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.6375385522842407, "learning_rate": 2.898128389961147e-05, "loss": 0.4571, "step": 9365, "teacher_loss": 0.4371042251586914 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.6873155832290649, "learning_rate": 2.8980461006128512e-05, "loss": 0.3902, "step": 9366, "teacher_loss": 0.3571946620941162 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.6286658048629761, "learning_rate": 2.89796377921155e-05, "loss": 0.3648, "step": 9367, "teacher_loss": 0.3354980945587158 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.4112156629562378, "learning_rate": 2.8978814257591304e-05, "loss": 0.2148, "step": 9368, "teacher_loss": 0.19303107261657715 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.8636107444763184, "learning_rate": 2.897799040257481e-05, "loss": 0.5014, "step": 9369, "teacher_loss": 0.46116888523101807 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.46559083461761475, "learning_rate": 2.8977166227084903e-05, "loss": 0.2432, "step": 9370, "teacher_loss": 0.21844364702701569 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.318211168050766, "learning_rate": 2.8976341731140478e-05, "loss": 0.4443, "step": 9371, "teacher_loss": 0.458340048789978 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.22801688313484192, "learning_rate": 2.8975516914760442e-05, "loss": 0.1996, "step": 9372, "teacher_loss": 0.19641321897506714 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.33171647787094116, "learning_rate": 2.8974691777963707e-05, "loss": 0.2859, "step": 9373, "teacher_loss": 0.2807769477367401 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.16290518641471863, "learning_rate": 2.8973866320769186e-05, "loss": 0.2152, "step": 9374, "teacher_loss": 0.22104278206825256 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.9149197340011597, "learning_rate": 2.8973040543195803e-05, "loss": 0.3312, "step": 9375, "teacher_loss": 0.2663334012031555 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.6135046482086182, "learning_rate": 2.8972214445262498e-05, "loss": 0.3627, "step": 9376, "teacher_loss": 0.33479487895965576 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.8560186624526978, "learning_rate": 2.8971388026988203e-05, "loss": 0.3623, "step": 9377, "teacher_loss": 0.30742496252059937 }, { "compression_loss": 0.0, "epoch": 1.69, "label_loss": 0.490791916847229, "learning_rate": 2.8970561288391872e-05, "loss": 0.4867, "step": 9378, "teacher_loss": 0.48624151945114136 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.5349369049072266, "learning_rate": 2.8969734229492455e-05, "loss": 0.2826, "step": 9379, "teacher_loss": 0.25461798906326294 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.395302414894104, "learning_rate": 2.8968906850308918e-05, "loss": 0.2534, "step": 9380, "teacher_loss": 0.2376861721277237 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.2770238518714905, "learning_rate": 2.8968079150860228e-05, "loss": 0.2242, "step": 9381, "teacher_loss": 0.21831226348876953 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.6028745174407959, "learning_rate": 2.8967251131165358e-05, "loss": 0.3468, "step": 9382, "teacher_loss": 0.31834447383880615 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.6734985113143921, "learning_rate": 2.8966422791243296e-05, "loss": 0.2643, "step": 9383, "teacher_loss": 0.2188381850719452 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.4535084664821625, "learning_rate": 2.8965594131113036e-05, "loss": 0.1933, "step": 9384, "teacher_loss": 0.16440461575984955 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.718681812286377, "learning_rate": 2.896476515079357e-05, "loss": 0.3814, "step": 9385, "teacher_loss": 0.34392493963241577 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.5419446229934692, "learning_rate": 2.8963935850303905e-05, "loss": 0.3539, "step": 9386, "teacher_loss": 0.333030641078949 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.2921307682991028, "learning_rate": 2.8963106229663064e-05, "loss": 0.3177, "step": 9387, "teacher_loss": 0.3205293118953705 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.5811346769332886, "learning_rate": 2.896227628889006e-05, "loss": 0.3763, "step": 9388, "teacher_loss": 0.3535630404949188 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.22378309071063995, "learning_rate": 2.8961446028003918e-05, "loss": 0.198, "step": 9389, "teacher_loss": 0.19511735439300537 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.19910335540771484, "learning_rate": 2.896061544702368e-05, "loss": 0.1917, "step": 9390, "teacher_loss": 0.19091831147670746 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.8366166353225708, "learning_rate": 2.8959784545968384e-05, "loss": 0.5431, "step": 9391, "teacher_loss": 0.5104920268058777 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.2722296416759491, "learning_rate": 2.895895332485708e-05, "loss": 0.2188, "step": 9392, "teacher_loss": 0.21286682784557343 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.43733450770378113, "learning_rate": 2.8958121783708832e-05, "loss": 0.2274, "step": 9393, "teacher_loss": 0.20403876900672913 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.30129778385162354, "learning_rate": 2.89572899225427e-05, "loss": 0.2402, "step": 9394, "teacher_loss": 0.23338648676872253 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.19690775871276855, "learning_rate": 2.8956457741377753e-05, "loss": 0.226, "step": 9395, "teacher_loss": 0.22919979691505432 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.18240909278392792, "learning_rate": 2.895562524023308e-05, "loss": 0.1905, "step": 9396, "teacher_loss": 0.19138428568840027 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.35369741916656494, "learning_rate": 2.8954792419127753e-05, "loss": 0.3409, "step": 9397, "teacher_loss": 0.3394266366958618 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.5802950263023376, "learning_rate": 2.8953959278080875e-05, "loss": 0.309, "step": 9398, "teacher_loss": 0.2788654565811157 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.4208073616027832, "learning_rate": 2.8953125817111553e-05, "loss": 0.3893, "step": 9399, "teacher_loss": 0.3858261704444885 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.41641998291015625, "learning_rate": 2.8952292036238884e-05, "loss": 0.2528, "step": 9400, "teacher_loss": 0.23463964462280273 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.4171334505081177, "learning_rate": 2.8951457935481992e-05, "loss": 0.3333, "step": 9401, "teacher_loss": 0.32397592067718506 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.8566931486129761, "learning_rate": 2.8950623514859998e-05, "loss": 0.2589, "step": 9402, "teacher_loss": 0.19252698123455048 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.42989400029182434, "learning_rate": 2.894978877439203e-05, "loss": 0.3076, "step": 9403, "teacher_loss": 0.2939668893814087 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.31020134687423706, "learning_rate": 2.8948953714097237e-05, "loss": 0.2101, "step": 9404, "teacher_loss": 0.1990264356136322 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.4747995138168335, "learning_rate": 2.8948118333994748e-05, "loss": 0.3029, "step": 9405, "teacher_loss": 0.2838355302810669 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.22408822178840637, "learning_rate": 2.8947282634103732e-05, "loss": 0.1645, "step": 9406, "teacher_loss": 0.15789492428302765 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.5716903209686279, "learning_rate": 2.8946446614443335e-05, "loss": 0.3312, "step": 9407, "teacher_loss": 0.30449479818344116 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.14879626035690308, "learning_rate": 2.8945610275032733e-05, "loss": 0.175, "step": 9408, "teacher_loss": 0.17791524529457092 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.7454357147216797, "learning_rate": 2.8944773615891097e-05, "loss": 0.3948, "step": 9409, "teacher_loss": 0.3558773994445801 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 1.1078578233718872, "learning_rate": 2.8943936637037614e-05, "loss": 0.5531, "step": 9410, "teacher_loss": 0.49150386452674866 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.35919398069381714, "learning_rate": 2.894309933849147e-05, "loss": 0.3492, "step": 9411, "teacher_loss": 0.3481142818927765 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.2908899188041687, "learning_rate": 2.894226172027186e-05, "loss": 0.1726, "step": 9412, "teacher_loss": 0.15947763621807098 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.6169778108596802, "learning_rate": 2.8941423782397987e-05, "loss": 0.2787, "step": 9413, "teacher_loss": 0.24109011888504028 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.4549195170402527, "learning_rate": 2.894058552488907e-05, "loss": 0.4342, "step": 9414, "teacher_loss": 0.43191346526145935 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.2947240471839905, "learning_rate": 2.8939746947764317e-05, "loss": 0.2585, "step": 9415, "teacher_loss": 0.25447508692741394 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.371695876121521, "learning_rate": 2.8938908051042965e-05, "loss": 0.2558, "step": 9416, "teacher_loss": 0.24297265708446503 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.547249436378479, "learning_rate": 2.8938068834744243e-05, "loss": 0.3321, "step": 9417, "teacher_loss": 0.3082178831100464 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.522907555103302, "learning_rate": 2.8937229298887387e-05, "loss": 0.2138, "step": 9418, "teacher_loss": 0.17947739362716675 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.5087186098098755, "learning_rate": 2.8936389443491655e-05, "loss": 0.2777, "step": 9419, "teacher_loss": 0.2520167827606201 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 1.0342810153961182, "learning_rate": 2.8935549268576294e-05, "loss": 0.4388, "step": 9420, "teacher_loss": 0.37267762422561646 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.35705363750457764, "learning_rate": 2.8934708774160566e-05, "loss": 0.2072, "step": 9421, "teacher_loss": 0.1905396580696106 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.4151454269886017, "learning_rate": 2.8933867960263746e-05, "loss": 0.278, "step": 9422, "teacher_loss": 0.26272812485694885 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.32695525884628296, "learning_rate": 2.893302682690511e-05, "loss": 0.205, "step": 9423, "teacher_loss": 0.19142332673072815 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.38215094804763794, "learning_rate": 2.8932185374103946e-05, "loss": 0.2767, "step": 9424, "teacher_loss": 0.2649814784526825 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.13310247659683228, "learning_rate": 2.8931343601879535e-05, "loss": 0.2512, "step": 9425, "teacher_loss": 0.26430368423461914 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.8965725898742676, "learning_rate": 2.8930501510251187e-05, "loss": 0.4264, "step": 9426, "teacher_loss": 0.37413665652275085 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.4012833535671234, "learning_rate": 2.8929659099238207e-05, "loss": 0.2343, "step": 9427, "teacher_loss": 0.21578149497509003 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.2685379981994629, "learning_rate": 2.8928816368859904e-05, "loss": 0.2122, "step": 9428, "teacher_loss": 0.20594459772109985 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.3332647383213043, "learning_rate": 2.8927973319135605e-05, "loss": 0.2259, "step": 9429, "teacher_loss": 0.21392974257469177 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.5013045072555542, "learning_rate": 2.892712995008463e-05, "loss": 0.3194, "step": 9430, "teacher_loss": 0.2992364168167114 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.24649418890476227, "learning_rate": 2.892628626172633e-05, "loss": 0.2402, "step": 9431, "teacher_loss": 0.2394467443227768 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.38722261786460876, "learning_rate": 2.8925442254080036e-05, "loss": 0.3463, "step": 9432, "teacher_loss": 0.34170621633529663 }, { "compression_loss": 0.0, "epoch": 1.7, "label_loss": 0.3369593620300293, "learning_rate": 2.89245979271651e-05, "loss": 0.1657, "step": 9433, "teacher_loss": 0.14661921560764313 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.3820837736129761, "learning_rate": 2.8923753281000884e-05, "loss": 0.2047, "step": 9434, "teacher_loss": 0.18502630293369293 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5342718362808228, "learning_rate": 2.892290831560675e-05, "loss": 0.3058, "step": 9435, "teacher_loss": 0.28038448095321655 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.36089733242988586, "learning_rate": 2.8922063031002067e-05, "loss": 0.1716, "step": 9436, "teacher_loss": 0.15051597356796265 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.1848730891942978, "learning_rate": 2.8921217427206227e-05, "loss": 0.3084, "step": 9437, "teacher_loss": 0.3221457004547119 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5581762790679932, "learning_rate": 2.89203715042386e-05, "loss": 0.3999, "step": 9438, "teacher_loss": 0.3822594881057739 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.6545029878616333, "learning_rate": 2.8919525262118596e-05, "loss": 0.2324, "step": 9439, "teacher_loss": 0.1855495274066925 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.6595101952552795, "learning_rate": 2.8918678700865613e-05, "loss": 0.3825, "step": 9440, "teacher_loss": 0.35174164175987244 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.3666520118713379, "learning_rate": 2.8917831820499055e-05, "loss": 0.2493, "step": 9441, "teacher_loss": 0.236236110329628 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5610173940658569, "learning_rate": 2.8916984621038337e-05, "loss": 0.2606, "step": 9442, "teacher_loss": 0.22720874845981598 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.6348270177841187, "learning_rate": 2.891613710250289e-05, "loss": 0.3233, "step": 9443, "teacher_loss": 0.28865164518356323 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.2001899778842926, "learning_rate": 2.8915289264912143e-05, "loss": 0.2293, "step": 9444, "teacher_loss": 0.2325005829334259 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.48045435547828674, "learning_rate": 2.8914441108285533e-05, "loss": 0.2276, "step": 9445, "teacher_loss": 0.19946104288101196 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5375640988349915, "learning_rate": 2.8913592632642507e-05, "loss": 0.2781, "step": 9446, "teacher_loss": 0.24926477670669556 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.25137245655059814, "learning_rate": 2.891274383800251e-05, "loss": 0.2296, "step": 9447, "teacher_loss": 0.22719906270503998 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.6816695928573608, "learning_rate": 2.8911894724385023e-05, "loss": 0.3403, "step": 9448, "teacher_loss": 0.30235934257507324 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.723246693611145, "learning_rate": 2.891104529180949e-05, "loss": 0.3455, "step": 9449, "teacher_loss": 0.3035595715045929 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.2292892336845398, "learning_rate": 2.8910195540295398e-05, "loss": 0.2591, "step": 9450, "teacher_loss": 0.2623632550239563 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.42785611748695374, "learning_rate": 2.8909345469862228e-05, "loss": 0.4496, "step": 9451, "teacher_loss": 0.45201438665390015 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.2561454772949219, "learning_rate": 2.8908495080529468e-05, "loss": 0.265, "step": 9452, "teacher_loss": 0.26600703597068787 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.28792378306388855, "learning_rate": 2.8907644372316616e-05, "loss": 0.2421, "step": 9453, "teacher_loss": 0.23696283996105194 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.3974774479866028, "learning_rate": 2.8906793345243175e-05, "loss": 0.2509, "step": 9454, "teacher_loss": 0.2346021682024002 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.32158780097961426, "learning_rate": 2.8905941999328657e-05, "loss": 0.1863, "step": 9455, "teacher_loss": 0.17122483253479004 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.38021501898765564, "learning_rate": 2.8905090334592585e-05, "loss": 0.1803, "step": 9456, "teacher_loss": 0.1580427587032318 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.7404710054397583, "learning_rate": 2.890423835105448e-05, "loss": 0.4427, "step": 9457, "teacher_loss": 0.4096405506134033 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 1.1754100322723389, "learning_rate": 2.890338604873387e-05, "loss": 0.903, "step": 9458, "teacher_loss": 0.8727257251739502 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5227789282798767, "learning_rate": 2.890253342765031e-05, "loss": 0.2685, "step": 9459, "teacher_loss": 0.24024814367294312 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.4979461431503296, "learning_rate": 2.8901680487823337e-05, "loss": 0.2761, "step": 9460, "teacher_loss": 0.25143519043922424 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5062993168830872, "learning_rate": 2.890082722927251e-05, "loss": 0.3372, "step": 9461, "teacher_loss": 0.3184327483177185 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.34944581985473633, "learning_rate": 2.889997365201739e-05, "loss": 0.316, "step": 9462, "teacher_loss": 0.3122739791870117 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.29811620712280273, "learning_rate": 2.8899119756077558e-05, "loss": 0.2164, "step": 9463, "teacher_loss": 0.20732466876506805 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.3391658067703247, "learning_rate": 2.8898265541472573e-05, "loss": 0.1876, "step": 9464, "teacher_loss": 0.1708112359046936 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.29614150524139404, "learning_rate": 2.8897411008222026e-05, "loss": 0.3214, "step": 9465, "teacher_loss": 0.32418137788772583 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.27397704124450684, "learning_rate": 2.8896556156345518e-05, "loss": 0.2577, "step": 9466, "teacher_loss": 0.2559289336204529 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.7105743288993835, "learning_rate": 2.889570098586264e-05, "loss": 0.3178, "step": 9467, "teacher_loss": 0.27412912249565125 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.37822291254997253, "learning_rate": 2.8894845496792995e-05, "loss": 0.2762, "step": 9468, "teacher_loss": 0.2648215889930725 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5535728931427002, "learning_rate": 2.8893989689156202e-05, "loss": 0.3037, "step": 9469, "teacher_loss": 0.2759411931037903 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.1917286515235901, "learning_rate": 2.8893133562971886e-05, "loss": 0.2832, "step": 9470, "teacher_loss": 0.2933412194252014 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.3179375231266022, "learning_rate": 2.889227711825967e-05, "loss": 0.2917, "step": 9471, "teacher_loss": 0.2887392044067383 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.4145300090312958, "learning_rate": 2.8891420355039193e-05, "loss": 0.2825, "step": 9472, "teacher_loss": 0.26777493953704834 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.35148924589157104, "learning_rate": 2.889056327333009e-05, "loss": 0.1884, "step": 9473, "teacher_loss": 0.1702927201986313 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5350561738014221, "learning_rate": 2.8889705873152024e-05, "loss": 0.319, "step": 9474, "teacher_loss": 0.29494708776474 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.3603099584579468, "learning_rate": 2.8888848154524642e-05, "loss": 0.3167, "step": 9475, "teacher_loss": 0.3118588328361511 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5691217184066772, "learning_rate": 2.888799011746761e-05, "loss": 0.2854, "step": 9476, "teacher_loss": 0.25384092330932617 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.4562886953353882, "learning_rate": 2.8887131762000605e-05, "loss": 0.2421, "step": 9477, "teacher_loss": 0.21830147504806519 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.666765034198761, "learning_rate": 2.888627308814331e-05, "loss": 0.3631, "step": 9478, "teacher_loss": 0.329380601644516 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5053094029426575, "learning_rate": 2.88854140959154e-05, "loss": 0.4681, "step": 9479, "teacher_loss": 0.4640064537525177 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.6324402093887329, "learning_rate": 2.8884554785336577e-05, "loss": 0.278, "step": 9480, "teacher_loss": 0.23859205842018127 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.47667816281318665, "learning_rate": 2.8883695156426544e-05, "loss": 0.2634, "step": 9481, "teacher_loss": 0.2396690398454666 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.1979219764471054, "learning_rate": 2.8882835209205e-05, "loss": 0.2505, "step": 9482, "teacher_loss": 0.25639188289642334 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5739365816116333, "learning_rate": 2.8881974943691676e-05, "loss": 0.2762, "step": 9483, "teacher_loss": 0.24312105774879456 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.642431378364563, "learning_rate": 2.8881114359906283e-05, "loss": 0.2915, "step": 9484, "teacher_loss": 0.2524959444999695 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.35984623432159424, "learning_rate": 2.8880253457868552e-05, "loss": 0.2223, "step": 9485, "teacher_loss": 0.20703119039535522 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5318686366081238, "learning_rate": 2.8879392237598225e-05, "loss": 0.2296, "step": 9486, "teacher_loss": 0.1960306167602539 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.40393245220184326, "learning_rate": 2.8878530699115052e-05, "loss": 0.2633, "step": 9487, "teacher_loss": 0.24766096472740173 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.060222674161195755, "learning_rate": 2.8877668842438775e-05, "loss": 0.1607, "step": 9488, "teacher_loss": 0.17185035347938538 }, { "compression_loss": 0.0, "epoch": 1.71, "label_loss": 0.5454633235931396, "learning_rate": 2.887680666758916e-05, "loss": 0.2958, "step": 9489, "teacher_loss": 0.2680458724498749 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.5859564542770386, "learning_rate": 2.8875944174585972e-05, "loss": 0.3076, "step": 9490, "teacher_loss": 0.2766672372817993 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.20699137449264526, "learning_rate": 2.887508136344899e-05, "loss": 0.2317, "step": 9491, "teacher_loss": 0.23446646332740784 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 1.1290473937988281, "learning_rate": 2.887421823419799e-05, "loss": 0.8173, "step": 9492, "teacher_loss": 0.782699465751648 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.15283741056919098, "learning_rate": 2.8873354786852762e-05, "loss": 0.179, "step": 9493, "teacher_loss": 0.18190765380859375 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.35447508096694946, "learning_rate": 2.8872491021433105e-05, "loss": 0.3241, "step": 9494, "teacher_loss": 0.320762574672699 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.590571403503418, "learning_rate": 2.8871626937958818e-05, "loss": 0.2874, "step": 9495, "teacher_loss": 0.2536930441856384 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.6005941033363342, "learning_rate": 2.8870762536449714e-05, "loss": 0.307, "step": 9496, "teacher_loss": 0.27433955669403076 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.3539612591266632, "learning_rate": 2.8869897816925616e-05, "loss": 0.2314, "step": 9497, "teacher_loss": 0.21774816513061523 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.6836268305778503, "learning_rate": 2.8869032779406343e-05, "loss": 0.4109, "step": 9498, "teacher_loss": 0.3806215524673462 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.44198447465896606, "learning_rate": 2.8868167423911732e-05, "loss": 0.2016, "step": 9499, "teacher_loss": 0.1748543083667755 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.3481265902519226, "learning_rate": 2.8867301750461616e-05, "loss": 0.1744, "step": 9500, "teacher_loss": 0.1551232486963272 }, { "epoch": 1.72, "eval_exact_match": 79.40397350993378, "eval_f1": 86.6925610959982, "step": 9500 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.7204965949058533, "learning_rate": 2.886643575907585e-05, "loss": 0.3576, "step": 9501, "teacher_loss": 0.31725066900253296 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.38294681906700134, "learning_rate": 2.8865569449774287e-05, "loss": 0.2467, "step": 9502, "teacher_loss": 0.23151350021362305 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.6223882436752319, "learning_rate": 2.886470282257679e-05, "loss": 0.3173, "step": 9503, "teacher_loss": 0.2833506166934967 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.3250850737094879, "learning_rate": 2.8863835877503218e-05, "loss": 0.2244, "step": 9504, "teacher_loss": 0.21319669485092163 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.47057026624679565, "learning_rate": 2.8862968614573463e-05, "loss": 0.282, "step": 9505, "teacher_loss": 0.261068731546402 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.5378087759017944, "learning_rate": 2.8862101033807398e-05, "loss": 0.3104, "step": 9506, "teacher_loss": 0.2851082682609558 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.8102796077728271, "learning_rate": 2.8861233135224915e-05, "loss": 0.2784, "step": 9507, "teacher_loss": 0.21929779648780823 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.17647671699523926, "learning_rate": 2.8860364918845916e-05, "loss": 0.1774, "step": 9508, "teacher_loss": 0.17752380669116974 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.9248626232147217, "learning_rate": 2.8859496384690306e-05, "loss": 0.3532, "step": 9509, "teacher_loss": 0.2897075414657593 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.7064296007156372, "learning_rate": 2.8858627532777993e-05, "loss": 0.2722, "step": 9510, "teacher_loss": 0.2239123433828354 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.7216962575912476, "learning_rate": 2.8857758363128908e-05, "loss": 0.4304, "step": 9511, "teacher_loss": 0.39804044365882874 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.1719408929347992, "learning_rate": 2.8856888875762965e-05, "loss": 0.2071, "step": 9512, "teacher_loss": 0.21104782819747925 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.3332878053188324, "learning_rate": 2.8856019070700107e-05, "loss": 0.2821, "step": 9513, "teacher_loss": 0.2764579653739929 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.7348049283027649, "learning_rate": 2.8855148947960273e-05, "loss": 0.3929, "step": 9514, "teacher_loss": 0.354946494102478 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.2960263192653656, "learning_rate": 2.885427850756342e-05, "loss": 0.2396, "step": 9515, "teacher_loss": 0.23337848484516144 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.6953973770141602, "learning_rate": 2.8853407749529487e-05, "loss": 0.2476, "step": 9516, "teacher_loss": 0.19779713451862335 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.548407793045044, "learning_rate": 2.8852536673878458e-05, "loss": 0.3938, "step": 9517, "teacher_loss": 0.3766377568244934 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.2611948847770691, "learning_rate": 2.8851665280630294e-05, "loss": 0.2295, "step": 9518, "teacher_loss": 0.22601871192455292 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.25542911887168884, "learning_rate": 2.8850793569804968e-05, "loss": 0.1832, "step": 9519, "teacher_loss": 0.17520998418331146 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.29920563101768494, "learning_rate": 2.8849921541422478e-05, "loss": 0.2003, "step": 9520, "teacher_loss": 0.18930459022521973 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.5232792496681213, "learning_rate": 2.8849049195502812e-05, "loss": 0.3403, "step": 9521, "teacher_loss": 0.32001471519470215 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.2496078610420227, "learning_rate": 2.884817653206597e-05, "loss": 0.1706, "step": 9522, "teacher_loss": 0.161823570728302 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.4752728343009949, "learning_rate": 2.8847303551131957e-05, "loss": 0.3294, "step": 9523, "teacher_loss": 0.3132280707359314 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.18546488881111145, "learning_rate": 2.8846430252720788e-05, "loss": 0.1915, "step": 9524, "teacher_loss": 0.19219109416007996 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.4187684655189514, "learning_rate": 2.8845556636852492e-05, "loss": 0.3218, "step": 9525, "teacher_loss": 0.31099268794059753 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.5659390687942505, "learning_rate": 2.884468270354709e-05, "loss": 0.2791, "step": 9526, "teacher_loss": 0.24724087119102478 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.43867403268814087, "learning_rate": 2.8843808452824622e-05, "loss": 0.3709, "step": 9527, "teacher_loss": 0.363379567861557 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.7753872871398926, "learning_rate": 2.8842933884705132e-05, "loss": 0.3681, "step": 9528, "teacher_loss": 0.32280439138412476 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.2733902931213379, "learning_rate": 2.8842058999208672e-05, "loss": 0.1742, "step": 9529, "teacher_loss": 0.16316324472427368 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.1690545380115509, "learning_rate": 2.88411837963553e-05, "loss": 0.1528, "step": 9530, "teacher_loss": 0.15098294615745544 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.41097545623779297, "learning_rate": 2.8840308276165085e-05, "loss": 0.2539, "step": 9531, "teacher_loss": 0.2364916205406189 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.45074227452278137, "learning_rate": 2.883943243865809e-05, "loss": 0.3243, "step": 9532, "teacher_loss": 0.3102171719074249 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.6656568646430969, "learning_rate": 2.8838556283854403e-05, "loss": 0.2949, "step": 9533, "teacher_loss": 0.2536671459674835 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.3068510890007019, "learning_rate": 2.8837679811774116e-05, "loss": 0.2532, "step": 9534, "teacher_loss": 0.2472032755613327 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.4407150149345398, "learning_rate": 2.8836803022437316e-05, "loss": 0.2285, "step": 9535, "teacher_loss": 0.20488600432872772 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.32777759432792664, "learning_rate": 2.8835925915864103e-05, "loss": 0.2882, "step": 9536, "teacher_loss": 0.28377920389175415 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.4718494415283203, "learning_rate": 2.8835048492074594e-05, "loss": 0.2811, "step": 9537, "teacher_loss": 0.25991135835647583 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.4691272974014282, "learning_rate": 2.8834170751088906e-05, "loss": 0.5632, "step": 9538, "teacher_loss": 0.573616087436676 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.3983680009841919, "learning_rate": 2.8833292692927156e-05, "loss": 0.2079, "step": 9539, "teacher_loss": 0.18676550686359406 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.7350836992263794, "learning_rate": 2.883241431760948e-05, "loss": 0.227, "step": 9540, "teacher_loss": 0.1705557107925415 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.6084218621253967, "learning_rate": 2.8831535625156013e-05, "loss": 0.406, "step": 9541, "teacher_loss": 0.38354188203811646 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.15079958736896515, "learning_rate": 2.8830656615586905e-05, "loss": 0.1956, "step": 9542, "teacher_loss": 0.20052430033683777 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 0.423688679933548, "learning_rate": 2.882977728892231e-05, "loss": 0.2814, "step": 9543, "teacher_loss": 0.26560455560684204 }, { "compression_loss": 0.0, "epoch": 1.72, "label_loss": 1.216416358947754, "learning_rate": 2.8828897645182383e-05, "loss": 0.4313, "step": 9544, "teacher_loss": 0.3441035747528076 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.4851227402687073, "learning_rate": 2.8828017684387296e-05, "loss": 0.2141, "step": 9545, "teacher_loss": 0.18393851816654205 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.5699038505554199, "learning_rate": 2.882713740655722e-05, "loss": 0.4448, "step": 9546, "teacher_loss": 0.4308568239212036 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.41542109847068787, "learning_rate": 2.882625681171234e-05, "loss": 0.2142, "step": 9547, "teacher_loss": 0.19188442826271057 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.46402907371520996, "learning_rate": 2.8825375899872843e-05, "loss": 0.3516, "step": 9548, "teacher_loss": 0.33909082412719727 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.2189243584871292, "learning_rate": 2.8824494671058927e-05, "loss": 0.1673, "step": 9549, "teacher_loss": 0.16158095002174377 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.33084186911582947, "learning_rate": 2.88236131252908e-05, "loss": 0.2669, "step": 9550, "teacher_loss": 0.25983017683029175 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.2684520184993744, "learning_rate": 2.8822731262588666e-05, "loss": 0.1999, "step": 9551, "teacher_loss": 0.1922830045223236 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.3193325698375702, "learning_rate": 2.882184908297275e-05, "loss": 0.355, "step": 9552, "teacher_loss": 0.35893672704696655 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.6582856178283691, "learning_rate": 2.8820966586463272e-05, "loss": 0.3184, "step": 9553, "teacher_loss": 0.28059476613998413 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.7786076664924622, "learning_rate": 2.8820083773080468e-05, "loss": 0.5445, "step": 9554, "teacher_loss": 0.5184370875358582 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.5447124242782593, "learning_rate": 2.8819200642844576e-05, "loss": 0.3995, "step": 9555, "teacher_loss": 0.38336145877838135 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.7425686120986938, "learning_rate": 2.8818317195775848e-05, "loss": 0.2938, "step": 9556, "teacher_loss": 0.24389465153217316 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.31502705812454224, "learning_rate": 2.8817433431894537e-05, "loss": 0.3006, "step": 9557, "teacher_loss": 0.29899781942367554 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.34349325299263, "learning_rate": 2.8816549351220902e-05, "loss": 0.4821, "step": 9558, "teacher_loss": 0.49752652645111084 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.20276644825935364, "learning_rate": 2.8815664953775213e-05, "loss": 0.2418, "step": 9559, "teacher_loss": 0.24610371887683868 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.38882219791412354, "learning_rate": 2.8814780239577753e-05, "loss": 0.2335, "step": 9560, "teacher_loss": 0.21620629727840424 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.6596754193305969, "learning_rate": 2.8813895208648794e-05, "loss": 0.2348, "step": 9561, "teacher_loss": 0.1875821053981781 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.4240890145301819, "learning_rate": 2.881300986100864e-05, "loss": 0.3894, "step": 9562, "teacher_loss": 0.385598361492157 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.28374576568603516, "learning_rate": 2.8812124196677585e-05, "loss": 0.2269, "step": 9563, "teacher_loss": 0.22059650719165802 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.4922718405723572, "learning_rate": 2.881123821567593e-05, "loss": 0.2571, "step": 9564, "teacher_loss": 0.230948805809021 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.34443995356559753, "learning_rate": 2.8810351918023992e-05, "loss": 0.2683, "step": 9565, "teacher_loss": 0.25980913639068604 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.4274241626262665, "learning_rate": 2.8809465303742088e-05, "loss": 0.1775, "step": 9566, "teacher_loss": 0.1497097611427307 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.592334508895874, "learning_rate": 2.8808578372850544e-05, "loss": 0.2993, "step": 9567, "teacher_loss": 0.2667018473148346 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.25950899720191956, "learning_rate": 2.8807691125369706e-05, "loss": 0.2096, "step": 9568, "teacher_loss": 0.20408445596694946 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.5297172665596008, "learning_rate": 2.8806803561319903e-05, "loss": 0.3497, "step": 9569, "teacher_loss": 0.32970625162124634 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.7672590017318726, "learning_rate": 2.8805915680721493e-05, "loss": 0.324, "step": 9570, "teacher_loss": 0.274752676486969 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.17691412568092346, "learning_rate": 2.8805027483594827e-05, "loss": 0.1736, "step": 9571, "teacher_loss": 0.17319121956825256 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.8751059770584106, "learning_rate": 2.880413896996027e-05, "loss": 0.4666, "step": 9572, "teacher_loss": 0.42123743891716003 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.5520852208137512, "learning_rate": 2.8803250139838196e-05, "loss": 0.4019, "step": 9573, "teacher_loss": 0.3852146565914154 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.46499693393707275, "learning_rate": 2.8802360993248972e-05, "loss": 0.234, "step": 9574, "teacher_loss": 0.20837755501270294 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.2144802361726761, "learning_rate": 2.8801471530213002e-05, "loss": 0.2457, "step": 9575, "teacher_loss": 0.2491755336523056 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.31127727031707764, "learning_rate": 2.8800581750750666e-05, "loss": 0.239, "step": 9576, "teacher_loss": 0.23097771406173706 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.35348987579345703, "learning_rate": 2.8799691654882365e-05, "loss": 0.1915, "step": 9577, "teacher_loss": 0.1735423505306244 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.27336373925209045, "learning_rate": 2.8798801242628504e-05, "loss": 0.1837, "step": 9578, "teacher_loss": 0.1737854927778244 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.22664445638656616, "learning_rate": 2.8797910514009508e-05, "loss": 0.2718, "step": 9579, "teacher_loss": 0.2767956852912903 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.4108770787715912, "learning_rate": 2.8797019469045793e-05, "loss": 0.2701, "step": 9580, "teacher_loss": 0.25441914796829224 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.7577189207077026, "learning_rate": 2.8796128107757784e-05, "loss": 0.3789, "step": 9581, "teacher_loss": 0.3368479013442993 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.14617453515529633, "learning_rate": 2.879523643016592e-05, "loss": 0.2945, "step": 9582, "teacher_loss": 0.3109666705131531 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.40665876865386963, "learning_rate": 2.8794344436290645e-05, "loss": 0.2607, "step": 9583, "teacher_loss": 0.2445174604654312 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 1.2795318365097046, "learning_rate": 2.879345212615241e-05, "loss": 0.4575, "step": 9584, "teacher_loss": 0.3661792278289795 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.5661550760269165, "learning_rate": 2.8792559499771672e-05, "loss": 0.4207, "step": 9585, "teacher_loss": 0.40448668599128723 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.5286970138549805, "learning_rate": 2.8791666557168902e-05, "loss": 0.3959, "step": 9586, "teacher_loss": 0.3811742961406708 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.47100502252578735, "learning_rate": 2.8790773298364562e-05, "loss": 0.3745, "step": 9587, "teacher_loss": 0.36382627487182617 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.5432640314102173, "learning_rate": 2.8789879723379137e-05, "loss": 0.2478, "step": 9588, "teacher_loss": 0.215023010969162 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.3296983540058136, "learning_rate": 2.8788985832233117e-05, "loss": 0.3359, "step": 9589, "teacher_loss": 0.3365897536277771 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.9270901679992676, "learning_rate": 2.8788091624946995e-05, "loss": 0.3916, "step": 9590, "teacher_loss": 0.33212241530418396 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.5110504627227783, "learning_rate": 2.8787197101541266e-05, "loss": 0.2536, "step": 9591, "teacher_loss": 0.22494640946388245 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.38014084100723267, "learning_rate": 2.8786302262036447e-05, "loss": 0.2319, "step": 9592, "teacher_loss": 0.21537455916404724 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.6757920980453491, "learning_rate": 2.878540710645305e-05, "loss": 0.2849, "step": 9593, "teacher_loss": 0.24151358008384705 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.14654678106307983, "learning_rate": 2.87845116348116e-05, "loss": 0.2087, "step": 9594, "teacher_loss": 0.21561157703399658 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.5182464122772217, "learning_rate": 2.878361584713262e-05, "loss": 0.3011, "step": 9595, "teacher_loss": 0.27693048119544983 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.6885359883308411, "learning_rate": 2.8782719743436657e-05, "loss": 0.4473, "step": 9596, "teacher_loss": 0.42053189873695374 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.7021652460098267, "learning_rate": 2.8781823323744255e-05, "loss": 0.3936, "step": 9597, "teacher_loss": 0.3593422770500183 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.29757004976272583, "learning_rate": 2.8780926588075966e-05, "loss": 0.3605, "step": 9598, "teacher_loss": 0.3674991726875305 }, { "compression_loss": 0.0, "epoch": 1.73, "label_loss": 0.5416845679283142, "learning_rate": 2.8780029536452347e-05, "loss": 0.2177, "step": 9599, "teacher_loss": 0.18169677257537842 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.33476996421813965, "learning_rate": 2.8779132168893963e-05, "loss": 0.3161, "step": 9600, "teacher_loss": 0.31397363543510437 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.16670675575733185, "learning_rate": 2.877823448542139e-05, "loss": 0.1677, "step": 9601, "teacher_loss": 0.16783848404884338 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.5534331798553467, "learning_rate": 2.877733648605521e-05, "loss": 0.2396, "step": 9602, "teacher_loss": 0.20473191142082214 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.4948105216026306, "learning_rate": 2.8776438170816013e-05, "loss": 0.2355, "step": 9603, "teacher_loss": 0.20664048194885254 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.5813813209533691, "learning_rate": 2.877553953972439e-05, "loss": 0.2857, "step": 9604, "teacher_loss": 0.25284358859062195 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 1.0607283115386963, "learning_rate": 2.8774640592800948e-05, "loss": 0.7659, "step": 9605, "teacher_loss": 0.7331167459487915 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.4963480532169342, "learning_rate": 2.8773741330066297e-05, "loss": 0.3471, "step": 9606, "teacher_loss": 0.33046776056289673 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.9289522171020508, "learning_rate": 2.8772841751541054e-05, "loss": 0.3532, "step": 9607, "teacher_loss": 0.2891795039176941 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.3674055337905884, "learning_rate": 2.8771941857245837e-05, "loss": 0.2434, "step": 9608, "teacher_loss": 0.22962608933448792 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.6558855175971985, "learning_rate": 2.8771041647201292e-05, "loss": 0.3122, "step": 9609, "teacher_loss": 0.2740171253681183 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.29457545280456543, "learning_rate": 2.877014112142804e-05, "loss": 0.2371, "step": 9610, "teacher_loss": 0.23066972196102142 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.8390344381332397, "learning_rate": 2.8769240279946742e-05, "loss": 0.5272, "step": 9611, "teacher_loss": 0.4925941824913025 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.42375820875167847, "learning_rate": 2.876833912277805e-05, "loss": 0.2915, "step": 9612, "teacher_loss": 0.2768496870994568 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.3474855422973633, "learning_rate": 2.8767437649942617e-05, "loss": 0.2979, "step": 9613, "teacher_loss": 0.29241716861724854 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.5069116950035095, "learning_rate": 2.8766535861461116e-05, "loss": 0.3142, "step": 9614, "teacher_loss": 0.2928082346916199 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.4472291171550751, "learning_rate": 2.8765633757354223e-05, "loss": 0.2217, "step": 9615, "teacher_loss": 0.1966659277677536 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.32911890745162964, "learning_rate": 2.876473133764262e-05, "loss": 0.2176, "step": 9616, "teacher_loss": 0.20516744256019592 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.184114009141922, "learning_rate": 2.8763828602346993e-05, "loss": 0.1609, "step": 9617, "teacher_loss": 0.15837319195270538 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.41836196184158325, "learning_rate": 2.8762925551488048e-05, "loss": 0.281, "step": 9618, "teacher_loss": 0.26577508449554443 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.8304084539413452, "learning_rate": 2.8762022185086477e-05, "loss": 0.3073, "step": 9619, "teacher_loss": 0.24922871589660645 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.14868196845054626, "learning_rate": 2.8761118503163003e-05, "loss": 0.1784, "step": 9620, "teacher_loss": 0.1817294806241989 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.6242039799690247, "learning_rate": 2.8760214505738336e-05, "loss": 0.2643, "step": 9621, "teacher_loss": 0.22429701685905457 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.4024398922920227, "learning_rate": 2.8759310192833212e-05, "loss": 0.3543, "step": 9622, "teacher_loss": 0.3489968180656433 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.5162615776062012, "learning_rate": 2.875840556446835e-05, "loss": 0.1828, "step": 9623, "teacher_loss": 0.14573538303375244 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.1622384786605835, "learning_rate": 2.87575006206645e-05, "loss": 0.2592, "step": 9624, "teacher_loss": 0.27002087235450745 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.6179137229919434, "learning_rate": 2.875659536144241e-05, "loss": 0.2726, "step": 9625, "teacher_loss": 0.2341836392879486 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.532171368598938, "learning_rate": 2.8755689786822833e-05, "loss": 0.2629, "step": 9626, "teacher_loss": 0.23299822211265564 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.334453284740448, "learning_rate": 2.875478389682653e-05, "loss": 0.2303, "step": 9627, "teacher_loss": 0.21875648200511932 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.1583271324634552, "learning_rate": 2.8753877691474272e-05, "loss": 0.2029, "step": 9628, "teacher_loss": 0.20785056054592133 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.5318273305892944, "learning_rate": 2.8752971170786834e-05, "loss": 0.2488, "step": 9629, "teacher_loss": 0.2173895537853241 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.4228849411010742, "learning_rate": 2.8752064334785002e-05, "loss": 0.2508, "step": 9630, "teacher_loss": 0.23163577914237976 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.834248423576355, "learning_rate": 2.875115718348956e-05, "loss": 0.3979, "step": 9631, "teacher_loss": 0.3493949770927429 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.6110309362411499, "learning_rate": 2.875024971692132e-05, "loss": 0.2373, "step": 9632, "teacher_loss": 0.19582784175872803 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 1.1099125146865845, "learning_rate": 2.8749341935101075e-05, "loss": 0.3327, "step": 9633, "teacher_loss": 0.24632304906845093 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.23753051459789276, "learning_rate": 2.8748433838049642e-05, "loss": 0.1976, "step": 9634, "teacher_loss": 0.19317740201950073 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.6427618265151978, "learning_rate": 2.8747525425787844e-05, "loss": 0.3446, "step": 9635, "teacher_loss": 0.31146469712257385 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.5272940397262573, "learning_rate": 2.8746616698336504e-05, "loss": 0.392, "step": 9636, "teacher_loss": 0.3769666254520416 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.2316398024559021, "learning_rate": 2.8745707655716455e-05, "loss": 0.1885, "step": 9637, "teacher_loss": 0.1837473213672638 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.3742293119430542, "learning_rate": 2.8744798297948546e-05, "loss": 0.2874, "step": 9638, "teacher_loss": 0.27777624130249023 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.3067373037338257, "learning_rate": 2.8743888625053613e-05, "loss": 0.2842, "step": 9639, "teacher_loss": 0.2817382514476776 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.43746474385261536, "learning_rate": 2.874297863705253e-05, "loss": 0.284, "step": 9640, "teacher_loss": 0.2668987214565277 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.4802393317222595, "learning_rate": 2.874206833396614e-05, "loss": 0.2012, "step": 9641, "teacher_loss": 0.1702032834291458 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 1.6658446788787842, "learning_rate": 2.8741157715815334e-05, "loss": 0.4463, "step": 9642, "teacher_loss": 0.3108043670654297 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.6385030746459961, "learning_rate": 2.8740246782620973e-05, "loss": 0.3514, "step": 9643, "teacher_loss": 0.31951475143432617 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.3685276508331299, "learning_rate": 2.8739335534403952e-05, "loss": 0.2493, "step": 9644, "teacher_loss": 0.236043319106102 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.22222258150577545, "learning_rate": 2.873842397118516e-05, "loss": 0.2187, "step": 9645, "teacher_loss": 0.2183527797460556 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.9464833736419678, "learning_rate": 2.873751209298549e-05, "loss": 0.3562, "step": 9646, "teacher_loss": 0.2906343638896942 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.5458304286003113, "learning_rate": 2.873659989982586e-05, "loss": 0.5585, "step": 9647, "teacher_loss": 0.5599138736724854 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.6588068008422852, "learning_rate": 2.873568739172718e-05, "loss": 0.3182, "step": 9648, "teacher_loss": 0.28034892678260803 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.555245041847229, "learning_rate": 2.8734774568710372e-05, "loss": 0.3897, "step": 9649, "teacher_loss": 0.3712575137615204 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.36945241689682007, "learning_rate": 2.8733861430796355e-05, "loss": 0.2855, "step": 9650, "teacher_loss": 0.2761967182159424 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.43980157375335693, "learning_rate": 2.8732947978006073e-05, "loss": 0.3105, "step": 9651, "teacher_loss": 0.2960885763168335 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.8024044036865234, "learning_rate": 2.873203421036047e-05, "loss": 0.3599, "step": 9652, "teacher_loss": 0.31072884798049927 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.20619887113571167, "learning_rate": 2.8731120127880496e-05, "loss": 0.2096, "step": 9653, "teacher_loss": 0.20994935929775238 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.2963258624076843, "learning_rate": 2.87302057305871e-05, "loss": 0.2141, "step": 9654, "teacher_loss": 0.20498046278953552 }, { "compression_loss": 0.0, "epoch": 1.74, "label_loss": 0.3191768527030945, "learning_rate": 2.8729291018501258e-05, "loss": 0.2174, "step": 9655, "teacher_loss": 0.20611363649368286 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.32053327560424805, "learning_rate": 2.872837599164393e-05, "loss": 0.204, "step": 9656, "teacher_loss": 0.19110167026519775 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.2586548626422882, "learning_rate": 2.8727460650036102e-05, "loss": 0.2418, "step": 9657, "teacher_loss": 0.23990559577941895 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.527611255645752, "learning_rate": 2.872654499369876e-05, "loss": 0.3257, "step": 9658, "teacher_loss": 0.3032572567462921 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.42819738388061523, "learning_rate": 2.8725629022652894e-05, "loss": 0.2825, "step": 9659, "teacher_loss": 0.26631683111190796 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.3624469041824341, "learning_rate": 2.8724712736919503e-05, "loss": 0.2347, "step": 9660, "teacher_loss": 0.22052177786827087 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.47687751054763794, "learning_rate": 2.8723796136519604e-05, "loss": 0.3031, "step": 9661, "teacher_loss": 0.2838353216648102 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.4230602979660034, "learning_rate": 2.87228792214742e-05, "loss": 0.335, "step": 9662, "teacher_loss": 0.3251716196537018 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5529975891113281, "learning_rate": 2.8721961991804322e-05, "loss": 0.2895, "step": 9663, "teacher_loss": 0.26019471883773804 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.610116720199585, "learning_rate": 2.8721044447530998e-05, "loss": 0.2594, "step": 9664, "teacher_loss": 0.2204609215259552 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.31353241205215454, "learning_rate": 2.8720126588675258e-05, "loss": 0.3551, "step": 9665, "teacher_loss": 0.359742134809494 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.24362307786941528, "learning_rate": 2.8719208415258153e-05, "loss": 0.211, "step": 9666, "teacher_loss": 0.2073325514793396 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.2073344886302948, "learning_rate": 2.871828992730073e-05, "loss": 0.3241, "step": 9667, "teacher_loss": 0.33704957365989685 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.8534784913063049, "learning_rate": 2.871737112482405e-05, "loss": 0.3936, "step": 9668, "teacher_loss": 0.34253209829330444 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.19210059940814972, "learning_rate": 2.8716452007849177e-05, "loss": 0.2519, "step": 9669, "teacher_loss": 0.2585349380970001 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.3710358738899231, "learning_rate": 2.8715532576397187e-05, "loss": 0.4822, "step": 9670, "teacher_loss": 0.4945949912071228 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.6151652336120605, "learning_rate": 2.8714612830489148e-05, "loss": 0.348, "step": 9671, "teacher_loss": 0.3183022737503052 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.34702199697494507, "learning_rate": 2.8713692770146158e-05, "loss": 0.2069, "step": 9672, "teacher_loss": 0.1913062483072281 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5169847011566162, "learning_rate": 2.8712772395389312e-05, "loss": 0.2455, "step": 9673, "teacher_loss": 0.21538686752319336 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.674384593963623, "learning_rate": 2.8711851706239705e-05, "loss": 0.3621, "step": 9674, "teacher_loss": 0.32739055156707764 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.32784146070480347, "learning_rate": 2.871093070271845e-05, "loss": 0.2344, "step": 9675, "teacher_loss": 0.22406092286109924 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.4151034951210022, "learning_rate": 2.8710009384846664e-05, "loss": 0.2134, "step": 9676, "teacher_loss": 0.19093704223632812 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.28529709577560425, "learning_rate": 2.8709087752645464e-05, "loss": 0.1956, "step": 9677, "teacher_loss": 0.1856432408094406 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.27403947710990906, "learning_rate": 2.8708165806135986e-05, "loss": 0.1814, "step": 9678, "teacher_loss": 0.1711527705192566 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.2687414884567261, "learning_rate": 2.8707243545339364e-05, "loss": 0.2112, "step": 9679, "teacher_loss": 0.20479148626327515 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5349726676940918, "learning_rate": 2.870632097027674e-05, "loss": 0.2526, "step": 9680, "teacher_loss": 0.22120168805122375 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5195779204368591, "learning_rate": 2.8705398080969274e-05, "loss": 0.267, "step": 9681, "teacher_loss": 0.23892182111740112 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.313081830739975, "learning_rate": 2.870447487743812e-05, "loss": 0.258, "step": 9682, "teacher_loss": 0.2519094944000244 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 1.0189120769500732, "learning_rate": 2.8703551359704447e-05, "loss": 0.4375, "step": 9683, "teacher_loss": 0.3729253113269806 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5007901787757874, "learning_rate": 2.8702627527789427e-05, "loss": 0.2954, "step": 9684, "teacher_loss": 0.27253109216690063 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.2311154305934906, "learning_rate": 2.8701703381714237e-05, "loss": 0.1716, "step": 9685, "teacher_loss": 0.16503441333770752 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.6135351657867432, "learning_rate": 2.870077892150007e-05, "loss": 0.3598, "step": 9686, "teacher_loss": 0.33164626359939575 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.33166125416755676, "learning_rate": 2.869985414716812e-05, "loss": 0.2115, "step": 9687, "teacher_loss": 0.1980951875448227 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.3468107581138611, "learning_rate": 2.8698929058739587e-05, "loss": 0.2286, "step": 9688, "teacher_loss": 0.21543839573860168 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5006410479545593, "learning_rate": 2.8698003656235686e-05, "loss": 0.5066, "step": 9689, "teacher_loss": 0.5072280168533325 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.3160492777824402, "learning_rate": 2.8697077939677627e-05, "loss": 0.1964, "step": 9690, "teacher_loss": 0.18307873606681824 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.41973501443862915, "learning_rate": 2.8696151909086633e-05, "loss": 0.2339, "step": 9691, "teacher_loss": 0.21327200531959534 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.4365221858024597, "learning_rate": 2.869522556448395e-05, "loss": 0.2685, "step": 9692, "teacher_loss": 0.2498350739479065 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.6558871269226074, "learning_rate": 2.8694298905890795e-05, "loss": 0.3587, "step": 9693, "teacher_loss": 0.32568246126174927 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5029205679893494, "learning_rate": 2.8693371933328426e-05, "loss": 0.3843, "step": 9694, "teacher_loss": 0.3711155652999878 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.4935513734817505, "learning_rate": 2.8692444646818096e-05, "loss": 0.2867, "step": 9695, "teacher_loss": 0.26376235485076904 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.4425468444824219, "learning_rate": 2.8691517046381062e-05, "loss": 0.3191, "step": 9696, "teacher_loss": 0.30540698766708374 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5071808099746704, "learning_rate": 2.869058913203859e-05, "loss": 0.2612, "step": 9697, "teacher_loss": 0.23386384546756744 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5825119614601135, "learning_rate": 2.8689660903811956e-05, "loss": 0.3148, "step": 9698, "teacher_loss": 0.2850485146045685 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.39906707406044006, "learning_rate": 2.8688732361722438e-05, "loss": 0.3074, "step": 9699, "teacher_loss": 0.2971632480621338 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 1.064254879951477, "learning_rate": 2.868780350579133e-05, "loss": 0.4063, "step": 9700, "teacher_loss": 0.3331586718559265 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.4207301139831543, "learning_rate": 2.868687433603993e-05, "loss": 0.3103, "step": 9701, "teacher_loss": 0.2980774939060211 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5560793876647949, "learning_rate": 2.8685944852489533e-05, "loss": 0.2688, "step": 9702, "teacher_loss": 0.2368604689836502 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5776731371879578, "learning_rate": 2.8685015055161454e-05, "loss": 0.4198, "step": 9703, "teacher_loss": 0.40221595764160156 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.4112074375152588, "learning_rate": 2.8684084944077012e-05, "loss": 0.2462, "step": 9704, "teacher_loss": 0.22789642214775085 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.2412782907485962, "learning_rate": 2.8683154519257523e-05, "loss": 0.2392, "step": 9705, "teacher_loss": 0.23902060091495514 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.43352025747299194, "learning_rate": 2.868222378072433e-05, "loss": 0.3681, "step": 9706, "teacher_loss": 0.3607964515686035 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.4817594587802887, "learning_rate": 2.8681292728498768e-05, "loss": 0.3523, "step": 9707, "teacher_loss": 0.3378788232803345 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.5018646121025085, "learning_rate": 2.8680361362602178e-05, "loss": 0.2811, "step": 9708, "teacher_loss": 0.2565270960330963 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.3221518397331238, "learning_rate": 2.8679429683055918e-05, "loss": 0.2118, "step": 9709, "teacher_loss": 0.1995304375886917 }, { "compression_loss": 0.0, "epoch": 1.75, "label_loss": 0.25086843967437744, "learning_rate": 2.8678497689881354e-05, "loss": 0.1874, "step": 9710, "teacher_loss": 0.1803758293390274 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.3539390563964844, "learning_rate": 2.8677565383099845e-05, "loss": 0.4457, "step": 9711, "teacher_loss": 0.455923855304718 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.9557262659072876, "learning_rate": 2.8676632762732762e-05, "loss": 0.5112, "step": 9712, "teacher_loss": 0.46177637577056885 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.5417178869247437, "learning_rate": 2.8675699828801506e-05, "loss": 0.2265, "step": 9713, "teacher_loss": 0.19149817526340485 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.6044718027114868, "learning_rate": 2.867476658132745e-05, "loss": 0.2619, "step": 9714, "teacher_loss": 0.22383756935596466 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.2758791744709015, "learning_rate": 2.8673833020331993e-05, "loss": 0.2405, "step": 9715, "teacher_loss": 0.23660224676132202 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.9286810159683228, "learning_rate": 2.8672899145836548e-05, "loss": 0.4207, "step": 9716, "teacher_loss": 0.3642313480377197 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.669777512550354, "learning_rate": 2.8671964957862517e-05, "loss": 0.5166, "step": 9717, "teacher_loss": 0.49958646297454834 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.2174762636423111, "learning_rate": 2.8671030456431313e-05, "loss": 0.182, "step": 9718, "teacher_loss": 0.17805570363998413 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.39027640223503113, "learning_rate": 2.8670095641564375e-05, "loss": 0.2282, "step": 9719, "teacher_loss": 0.21022240817546844 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.5518113970756531, "learning_rate": 2.8669160513283125e-05, "loss": 0.2679, "step": 9720, "teacher_loss": 0.23632583022117615 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.6897218227386475, "learning_rate": 2.8668225071609012e-05, "loss": 0.3779, "step": 9721, "teacher_loss": 0.3432038128376007 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.34343016147613525, "learning_rate": 2.8667289316563476e-05, "loss": 0.2399, "step": 9722, "teacher_loss": 0.22843725979328156 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.4929710626602173, "learning_rate": 2.8666353248167967e-05, "loss": 0.2421, "step": 9723, "teacher_loss": 0.21427667140960693 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.6443290710449219, "learning_rate": 2.866541686644396e-05, "loss": 0.2804, "step": 9724, "teacher_loss": 0.2399246096611023 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.4794856309890747, "learning_rate": 2.866448017141291e-05, "loss": 0.3024, "step": 9725, "teacher_loss": 0.28271445631980896 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.5353676080703735, "learning_rate": 2.8663543163096298e-05, "loss": 0.2489, "step": 9726, "teacher_loss": 0.21712306141853333 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.8236091732978821, "learning_rate": 2.866260584151561e-05, "loss": 0.2857, "step": 9727, "teacher_loss": 0.22595667839050293 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.5364168286323547, "learning_rate": 2.866166820669233e-05, "loss": 0.2299, "step": 9728, "teacher_loss": 0.1958591639995575 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.7631489038467407, "learning_rate": 2.8660730258647962e-05, "loss": 0.2712, "step": 9729, "teacher_loss": 0.21655318140983582 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.4687633216381073, "learning_rate": 2.8659791997404e-05, "loss": 0.2241, "step": 9730, "teacher_loss": 0.19693905115127563 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.7576920986175537, "learning_rate": 2.8658853422981964e-05, "loss": 0.4193, "step": 9731, "teacher_loss": 0.381686806678772 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.23076625168323517, "learning_rate": 2.865791453540337e-05, "loss": 0.1948, "step": 9732, "teacher_loss": 0.19076424837112427 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.571945309638977, "learning_rate": 2.8656975334689746e-05, "loss": 0.2368, "step": 9733, "teacher_loss": 0.19953002035617828 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.247168630361557, "learning_rate": 2.865603582086262e-05, "loss": 0.1812, "step": 9734, "teacher_loss": 0.17381900548934937 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.7607147693634033, "learning_rate": 2.865509599394354e-05, "loss": 0.4261, "step": 9735, "teacher_loss": 0.38888630270957947 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.3236229717731476, "learning_rate": 2.8654155853954044e-05, "loss": 0.2537, "step": 9736, "teacher_loss": 0.24590489268302917 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.17992353439331055, "learning_rate": 2.8653215400915696e-05, "loss": 0.2032, "step": 9737, "teacher_loss": 0.20584172010421753 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.30910223722457886, "learning_rate": 2.865227463485005e-05, "loss": 0.3685, "step": 9738, "teacher_loss": 0.37504446506500244 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.16428303718566895, "learning_rate": 2.865133355577868e-05, "loss": 0.2474, "step": 9739, "teacher_loss": 0.2566138505935669 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.7258551120758057, "learning_rate": 2.8650392163723165e-05, "loss": 0.3786, "step": 9740, "teacher_loss": 0.340040922164917 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.39395958185195923, "learning_rate": 2.8649450458705076e-05, "loss": 0.1857, "step": 9741, "teacher_loss": 0.1625867486000061 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.2700785994529724, "learning_rate": 2.8648508440746015e-05, "loss": 0.2368, "step": 9742, "teacher_loss": 0.2331409752368927 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.34350600838661194, "learning_rate": 2.8647566109867573e-05, "loss": 0.2873, "step": 9743, "teacher_loss": 0.28108030557632446 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.33573177456855774, "learning_rate": 2.8646623466091362e-05, "loss": 0.2209, "step": 9744, "teacher_loss": 0.20811808109283447 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.3943679928779602, "learning_rate": 2.864568050943899e-05, "loss": 0.3507, "step": 9745, "teacher_loss": 0.34584128856658936 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.2950679361820221, "learning_rate": 2.8644737239932072e-05, "loss": 0.2685, "step": 9746, "teacher_loss": 0.2655636966228485 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.5501250624656677, "learning_rate": 2.8643793657592236e-05, "loss": 0.2817, "step": 9747, "teacher_loss": 0.2518511414527893 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.3824694752693176, "learning_rate": 2.8642849762441122e-05, "loss": 0.2985, "step": 9748, "teacher_loss": 0.2891874313354492 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.7683556079864502, "learning_rate": 2.8641905554500366e-05, "loss": 0.3339, "step": 9749, "teacher_loss": 0.2856735587120056 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.3394649624824524, "learning_rate": 2.8640961033791616e-05, "loss": 0.287, "step": 9750, "teacher_loss": 0.28117120265960693 }, { "epoch": 1.76, "eval_exact_match": 79.51750236518448, "eval_f1": 86.93126886079065, "step": 9750 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.4933737814426422, "learning_rate": 2.8640016200336527e-05, "loss": 0.2431, "step": 9751, "teacher_loss": 0.21523982286453247 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.29733914136886597, "learning_rate": 2.8639071054156758e-05, "loss": 0.2626, "step": 9752, "teacher_loss": 0.25875383615493774 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.393619179725647, "learning_rate": 2.8638125595273984e-05, "loss": 0.2779, "step": 9753, "teacher_loss": 0.2650052607059479 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.6418403387069702, "learning_rate": 2.8637179823709885e-05, "loss": 0.3048, "step": 9754, "teacher_loss": 0.2673349976539612 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.4987420439720154, "learning_rate": 2.8636233739486132e-05, "loss": 0.3131, "step": 9755, "teacher_loss": 0.29245319962501526 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.3940998911857605, "learning_rate": 2.8635287342624425e-05, "loss": 0.2463, "step": 9756, "teacher_loss": 0.22992157936096191 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.6517037153244019, "learning_rate": 2.863434063314646e-05, "loss": 0.2252, "step": 9757, "teacher_loss": 0.17785786092281342 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.6963098049163818, "learning_rate": 2.8633393611073943e-05, "loss": 0.3124, "step": 9758, "teacher_loss": 0.2696886956691742 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.17263562977313995, "learning_rate": 2.8632446276428582e-05, "loss": 0.1402, "step": 9759, "teacher_loss": 0.13657408952713013 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.23942387104034424, "learning_rate": 2.8631498629232103e-05, "loss": 0.176, "step": 9760, "teacher_loss": 0.168988436460495 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.43425804376602173, "learning_rate": 2.8630550669506232e-05, "loss": 0.2447, "step": 9761, "teacher_loss": 0.22359436750411987 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.6547526717185974, "learning_rate": 2.8629602397272696e-05, "loss": 0.3065, "step": 9762, "teacher_loss": 0.2677672505378723 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.8350136280059814, "learning_rate": 2.8628653812553242e-05, "loss": 0.352, "step": 9763, "teacher_loss": 0.2982976734638214 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.5028699636459351, "learning_rate": 2.8627704915369622e-05, "loss": 0.3641, "step": 9764, "teacher_loss": 0.34864526987075806 }, { "compression_loss": 0.0, "epoch": 1.76, "label_loss": 0.9776653051376343, "learning_rate": 2.862675570574358e-05, "loss": 0.3766, "step": 9765, "teacher_loss": 0.3098118305206299 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.3517906665802002, "learning_rate": 2.8625806183696885e-05, "loss": 0.2689, "step": 9766, "teacher_loss": 0.2596439719200134 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.35569554567337036, "learning_rate": 2.862485634925131e-05, "loss": 0.2142, "step": 9767, "teacher_loss": 0.1984252631664276 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 1.1156789064407349, "learning_rate": 2.8623906202428628e-05, "loss": 0.4184, "step": 9768, "teacher_loss": 0.3408896327018738 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.31348246335983276, "learning_rate": 2.8622955743250622e-05, "loss": 0.3634, "step": 9769, "teacher_loss": 0.3689323663711548 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.2325993776321411, "learning_rate": 2.8622004971739086e-05, "loss": 0.2801, "step": 9770, "teacher_loss": 0.285374253988266 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.6928762793540955, "learning_rate": 2.8621053887915813e-05, "loss": 0.3338, "step": 9771, "teacher_loss": 0.29392164945602417 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.44200950860977173, "learning_rate": 2.862010249180262e-05, "loss": 0.2281, "step": 9772, "teacher_loss": 0.2042994201183319 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.7773900032043457, "learning_rate": 2.8619150783421303e-05, "loss": 0.5007, "step": 9773, "teacher_loss": 0.469971626996994 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.70094233751297, "learning_rate": 2.8618198762793696e-05, "loss": 0.3197, "step": 9774, "teacher_loss": 0.2773780822753906 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.42249706387519836, "learning_rate": 2.861724642994162e-05, "loss": 0.3537, "step": 9775, "teacher_loss": 0.3461114168167114 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.4572070837020874, "learning_rate": 2.861629378488691e-05, "loss": 0.2855, "step": 9776, "teacher_loss": 0.26643991470336914 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.5427082180976868, "learning_rate": 2.8615340827651407e-05, "loss": 0.36, "step": 9777, "teacher_loss": 0.3397466540336609 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.5354291200637817, "learning_rate": 2.861438755825696e-05, "loss": 0.3228, "step": 9778, "teacher_loss": 0.29919272661209106 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.9165619611740112, "learning_rate": 2.8613433976725424e-05, "loss": 0.2909, "step": 9779, "teacher_loss": 0.2213437557220459 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.3289463520050049, "learning_rate": 2.8612480083078658e-05, "loss": 0.2108, "step": 9780, "teacher_loss": 0.19766588509082794 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.4514867067337036, "learning_rate": 2.8611525877338543e-05, "loss": 0.248, "step": 9781, "teacher_loss": 0.2253987193107605 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.6315058469772339, "learning_rate": 2.8610571359526944e-05, "loss": 0.2889, "step": 9782, "teacher_loss": 0.25080764293670654 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.5729396343231201, "learning_rate": 2.8609616529665753e-05, "loss": 0.3594, "step": 9783, "teacher_loss": 0.3356652855873108 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.5215010643005371, "learning_rate": 2.860866138777686e-05, "loss": 0.2557, "step": 9784, "teacher_loss": 0.2261490821838379 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.8496921062469482, "learning_rate": 2.8607705933882157e-05, "loss": 0.3558, "step": 9785, "teacher_loss": 0.30092036724090576 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.3914310336112976, "learning_rate": 2.860675016800356e-05, "loss": 0.181, "step": 9786, "teacher_loss": 0.15760132670402527 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.7524313926696777, "learning_rate": 2.8605794090162978e-05, "loss": 0.3927, "step": 9787, "teacher_loss": 0.35273706912994385 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.8007498979568481, "learning_rate": 2.8604837700382324e-05, "loss": 0.271, "step": 9788, "teacher_loss": 0.21209371089935303 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.2934896945953369, "learning_rate": 2.8603880998683535e-05, "loss": 0.2285, "step": 9789, "teacher_loss": 0.22125005722045898 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.3135831356048584, "learning_rate": 2.8602923985088543e-05, "loss": 0.171, "step": 9790, "teacher_loss": 0.15514321625232697 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.7407754063606262, "learning_rate": 2.8601966659619283e-05, "loss": 0.3897, "step": 9791, "teacher_loss": 0.35070645809173584 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.4148141145706177, "learning_rate": 2.860100902229771e-05, "loss": 0.2872, "step": 9792, "teacher_loss": 0.2729969024658203 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.6527068614959717, "learning_rate": 2.8600051073145778e-05, "loss": 0.3029, "step": 9793, "teacher_loss": 0.2640741467475891 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.403251588344574, "learning_rate": 2.8599092812185453e-05, "loss": 0.2835, "step": 9794, "teacher_loss": 0.27024146914482117 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.6129017472267151, "learning_rate": 2.8598134239438697e-05, "loss": 0.3429, "step": 9795, "teacher_loss": 0.31294164061546326 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.6439265012741089, "learning_rate": 2.8597175354927492e-05, "loss": 0.4273, "step": 9796, "teacher_loss": 0.4032716155052185 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.5898388624191284, "learning_rate": 2.8596216158673828e-05, "loss": 0.3301, "step": 9797, "teacher_loss": 0.3012082874774933 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.281993567943573, "learning_rate": 2.859525665069969e-05, "loss": 0.2386, "step": 9798, "teacher_loss": 0.2337360978126526 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.5858614444732666, "learning_rate": 2.8594296831027075e-05, "loss": 0.3866, "step": 9799, "teacher_loss": 0.36444640159606934 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.7638803720474243, "learning_rate": 2.859333669967799e-05, "loss": 0.2725, "step": 9800, "teacher_loss": 0.21795490384101868 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.128788560628891, "learning_rate": 2.8592376256674455e-05, "loss": 0.1834, "step": 9801, "teacher_loss": 0.18948069214820862 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.12381817400455475, "learning_rate": 2.8591415502038477e-05, "loss": 0.2161, "step": 9802, "teacher_loss": 0.22639340162277222 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.525272786617279, "learning_rate": 2.8590454435792096e-05, "loss": 0.2757, "step": 9803, "teacher_loss": 0.2480016052722931 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.31802237033843994, "learning_rate": 2.8589493057957337e-05, "loss": 0.3262, "step": 9804, "teacher_loss": 0.3271319270133972 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 1.077929973602295, "learning_rate": 2.8588531368556254e-05, "loss": 0.3946, "step": 9805, "teacher_loss": 0.3186517059803009 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.36122673749923706, "learning_rate": 2.8587569367610882e-05, "loss": 0.2614, "step": 9806, "teacher_loss": 0.2502981424331665 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.2923242449760437, "learning_rate": 2.858660705514328e-05, "loss": 0.2469, "step": 9807, "teacher_loss": 0.2418815940618515 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.4481073021888733, "learning_rate": 2.858564443117551e-05, "loss": 0.3191, "step": 9808, "teacher_loss": 0.3047882914543152 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.4928377866744995, "learning_rate": 2.8584681495729657e-05, "loss": 0.2201, "step": 9809, "teacher_loss": 0.18981999158859253 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.25782811641693115, "learning_rate": 2.8583718248827778e-05, "loss": 0.3193, "step": 9810, "teacher_loss": 0.32612496614456177 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.4052194058895111, "learning_rate": 2.8582754690491966e-05, "loss": 0.2999, "step": 9811, "teacher_loss": 0.28816381096839905 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.269300252199173, "learning_rate": 2.8581790820744315e-05, "loss": 0.2201, "step": 9812, "teacher_loss": 0.21467384696006775 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.2831853926181793, "learning_rate": 2.8580826639606915e-05, "loss": 0.1787, "step": 9813, "teacher_loss": 0.16705730557441711 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.28005528450012207, "learning_rate": 2.8579862147101884e-05, "loss": 0.2516, "step": 9814, "teacher_loss": 0.24841713905334473 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.46240460872650146, "learning_rate": 2.8578897343251327e-05, "loss": 0.3808, "step": 9815, "teacher_loss": 0.37174704670906067 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.9807002544403076, "learning_rate": 2.8577932228077364e-05, "loss": 0.3343, "step": 9816, "teacher_loss": 0.2624368965625763 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.47120124101638794, "learning_rate": 2.8576966801602123e-05, "loss": 0.3385, "step": 9817, "teacher_loss": 0.3237246870994568 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.21141573786735535, "learning_rate": 2.8576001063847743e-05, "loss": 0.2297, "step": 9818, "teacher_loss": 0.23177461326122284 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.206715390086174, "learning_rate": 2.8575035014836354e-05, "loss": 0.2964, "step": 9819, "teacher_loss": 0.30636298656463623 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.3624690771102905, "learning_rate": 2.8574068654590118e-05, "loss": 0.4137, "step": 9820, "teacher_loss": 0.41935306787490845 }, { "compression_loss": 0.0, "epoch": 1.77, "label_loss": 0.6026482582092285, "learning_rate": 2.8573101983131184e-05, "loss": 0.4615, "step": 9821, "teacher_loss": 0.4458516538143158 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.4024810194969177, "learning_rate": 2.8572135000481717e-05, "loss": 0.2758, "step": 9822, "teacher_loss": 0.26166924834251404 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.20206865668296814, "learning_rate": 2.857116770666389e-05, "loss": 0.2337, "step": 9823, "teacher_loss": 0.237208291888237 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.7365269660949707, "learning_rate": 2.8570200101699868e-05, "loss": 0.2601, "step": 9824, "teacher_loss": 0.20721282064914703 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.3817863464355469, "learning_rate": 2.8569232185611843e-05, "loss": 0.2581, "step": 9825, "teacher_loss": 0.24433547258377075 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.4644962251186371, "learning_rate": 2.8568263958422013e-05, "loss": 0.2864, "step": 9826, "teacher_loss": 0.26657551527023315 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.36981451511383057, "learning_rate": 2.8567295420152567e-05, "loss": 0.28, "step": 9827, "teacher_loss": 0.27003952860832214 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.607772946357727, "learning_rate": 2.856632657082571e-05, "loss": 0.302, "step": 9828, "teacher_loss": 0.2680475115776062 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.7324094772338867, "learning_rate": 2.8565357410463664e-05, "loss": 0.3301, "step": 9829, "teacher_loss": 0.2854401469230652 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.6441067457199097, "learning_rate": 2.856438793908864e-05, "loss": 0.2728, "step": 9830, "teacher_loss": 0.2315816730260849 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.6084052920341492, "learning_rate": 2.8563418156722875e-05, "loss": 0.3063, "step": 9831, "teacher_loss": 0.2727759778499603 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.718802809715271, "learning_rate": 2.8562448063388592e-05, "loss": 0.3303, "step": 9832, "teacher_loss": 0.287092924118042 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.27423417568206787, "learning_rate": 2.8561477659108034e-05, "loss": 0.2469, "step": 9833, "teacher_loss": 0.24389639496803284 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.5913593769073486, "learning_rate": 2.8560506943903455e-05, "loss": 0.2845, "step": 9834, "teacher_loss": 0.25044262409210205 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.8238702416419983, "learning_rate": 2.8559535917797114e-05, "loss": 0.2854, "step": 9835, "teacher_loss": 0.22556337714195251 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.40958574414253235, "learning_rate": 2.855856458081126e-05, "loss": 0.3014, "step": 9836, "teacher_loss": 0.28938421607017517 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.329242467880249, "learning_rate": 2.8557592932968177e-05, "loss": 0.3378, "step": 9837, "teacher_loss": 0.33874160051345825 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.36360520124435425, "learning_rate": 2.8556620974290132e-05, "loss": 0.2673, "step": 9838, "teacher_loss": 0.25660085678100586 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.32166025042533875, "learning_rate": 2.8555648704799418e-05, "loss": 0.2427, "step": 9839, "teacher_loss": 0.23396605253219604 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.3262128233909607, "learning_rate": 2.8554676124518313e-05, "loss": 0.1946, "step": 9840, "teacher_loss": 0.1800258457660675 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.3956587314605713, "learning_rate": 2.855370323346913e-05, "loss": 0.2516, "step": 9841, "teacher_loss": 0.23559823632240295 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.2078457474708557, "learning_rate": 2.8552730031674164e-05, "loss": 0.2478, "step": 9842, "teacher_loss": 0.2521919012069702 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.29933232069015503, "learning_rate": 2.8551756519155732e-05, "loss": 0.2719, "step": 9843, "teacher_loss": 0.26889461278915405 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.18209177255630493, "learning_rate": 2.8550782695936156e-05, "loss": 0.1938, "step": 9844, "teacher_loss": 0.19512730836868286 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.815851628780365, "learning_rate": 2.854980856203776e-05, "loss": 0.2908, "step": 9845, "teacher_loss": 0.23244981467723846 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.5966867804527283, "learning_rate": 2.8548834117482877e-05, "loss": 0.334, "step": 9846, "teacher_loss": 0.30483123660087585 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.4707828462123871, "learning_rate": 2.854785936229385e-05, "loss": 0.2204, "step": 9847, "teacher_loss": 0.19260910153388977 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.32042425870895386, "learning_rate": 2.8546884296493027e-05, "loss": 0.4107, "step": 9848, "teacher_loss": 0.4207236170768738 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.21508243680000305, "learning_rate": 2.854590892010276e-05, "loss": 0.2845, "step": 9849, "teacher_loss": 0.292217880487442 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.567185640335083, "learning_rate": 2.8544933233145418e-05, "loss": 0.3431, "step": 9850, "teacher_loss": 0.31823480129241943 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.5142399668693542, "learning_rate": 2.8543957235643362e-05, "loss": 0.4037, "step": 9851, "teacher_loss": 0.3913862705230713 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.19140490889549255, "learning_rate": 2.8542980927618974e-05, "loss": 0.203, "step": 9852, "teacher_loss": 0.20426297187805176 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.29847294092178345, "learning_rate": 2.854200430909464e-05, "loss": 0.2184, "step": 9853, "teacher_loss": 0.20952346920967102 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.7001780867576599, "learning_rate": 2.8541027380092747e-05, "loss": 0.4384, "step": 9854, "teacher_loss": 0.40936318039894104 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.7761991620063782, "learning_rate": 2.8540050140635694e-05, "loss": 0.361, "step": 9855, "teacher_loss": 0.314916729927063 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.7685930728912354, "learning_rate": 2.853907259074589e-05, "loss": 0.373, "step": 9856, "teacher_loss": 0.3290581703186035 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.3320949375629425, "learning_rate": 2.853809473044574e-05, "loss": 0.247, "step": 9857, "teacher_loss": 0.2374969869852066 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.4057954251766205, "learning_rate": 2.8537116559757666e-05, "loss": 0.2801, "step": 9858, "teacher_loss": 0.2661496102809906 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.3791216015815735, "learning_rate": 2.85361380787041e-05, "loss": 0.213, "step": 9859, "teacher_loss": 0.19449107348918915 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.31069695949554443, "learning_rate": 2.8535159287307466e-05, "loss": 0.2257, "step": 9860, "teacher_loss": 0.21625259518623352 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.8401548862457275, "learning_rate": 2.853418018559022e-05, "loss": 0.4218, "step": 9861, "teacher_loss": 0.37532132863998413 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.3823752701282501, "learning_rate": 2.8533200773574787e-05, "loss": 0.2749, "step": 9862, "teacher_loss": 0.2629657983779907 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 1.0753488540649414, "learning_rate": 2.853222105128364e-05, "loss": 0.3257, "step": 9863, "teacher_loss": 0.2424429953098297 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.5001740455627441, "learning_rate": 2.853124101873924e-05, "loss": 0.3137, "step": 9864, "teacher_loss": 0.29298165440559387 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 1.1806230545043945, "learning_rate": 2.8530260675964046e-05, "loss": 0.5216, "step": 9865, "teacher_loss": 0.4483719766139984 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.12440374493598938, "learning_rate": 2.8529280022980546e-05, "loss": 0.2351, "step": 9866, "teacher_loss": 0.24739037454128265 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.21661391854286194, "learning_rate": 2.8528299059811215e-05, "loss": 0.2204, "step": 9867, "teacher_loss": 0.22086814045906067 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.5308147668838501, "learning_rate": 2.852731778647855e-05, "loss": 0.2288, "step": 9868, "teacher_loss": 0.1952137053012848 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.6771795153617859, "learning_rate": 2.852633620300504e-05, "loss": 0.3439, "step": 9869, "teacher_loss": 0.30682307481765747 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.3790631890296936, "learning_rate": 2.8525354309413194e-05, "loss": 0.203, "step": 9870, "teacher_loss": 0.18340197205543518 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.6363522410392761, "learning_rate": 2.852437210572553e-05, "loss": 0.3238, "step": 9871, "teacher_loss": 0.2890468239784241 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.26097410917282104, "learning_rate": 2.852338959196456e-05, "loss": 0.2282, "step": 9872, "teacher_loss": 0.2245054543018341 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.5878296494483948, "learning_rate": 2.852240676815281e-05, "loss": 0.234, "step": 9873, "teacher_loss": 0.19466212391853333 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.27538934350013733, "learning_rate": 2.852142363431282e-05, "loss": 0.2383, "step": 9874, "teacher_loss": 0.2342124879360199 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.45194587111473083, "learning_rate": 2.852044019046712e-05, "loss": 0.2452, "step": 9875, "teacher_loss": 0.22225746512413025 }, { "compression_loss": 0.0, "epoch": 1.78, "label_loss": 0.3667333126068115, "learning_rate": 2.8519456436638264e-05, "loss": 0.2073, "step": 9876, "teacher_loss": 0.1896015703678131 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.6565039157867432, "learning_rate": 2.8518472372848807e-05, "loss": 0.3031, "step": 9877, "teacher_loss": 0.26383769512176514 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.5831775665283203, "learning_rate": 2.851748799912131e-05, "loss": 0.3206, "step": 9878, "teacher_loss": 0.29143059253692627 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.6539947986602783, "learning_rate": 2.8516503315478335e-05, "loss": 0.2663, "step": 9879, "teacher_loss": 0.2232193648815155 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.5833121538162231, "learning_rate": 2.8515518321942472e-05, "loss": 0.2865, "step": 9880, "teacher_loss": 0.25355756282806396 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.17323970794677734, "learning_rate": 2.8514533018536286e-05, "loss": 0.2257, "step": 9881, "teacher_loss": 0.23154215514659882 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 1.0290762186050415, "learning_rate": 2.851354740528238e-05, "loss": 0.3406, "step": 9882, "teacher_loss": 0.2640581727027893 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.44317227602005005, "learning_rate": 2.8512561482203353e-05, "loss": 0.2494, "step": 9883, "teacher_loss": 0.2278556078672409 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 1.023535132408142, "learning_rate": 2.85115752493218e-05, "loss": 0.4149, "step": 9884, "teacher_loss": 0.3473111391067505 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.44727179408073425, "learning_rate": 2.8510588706660338e-05, "loss": 0.289, "step": 9885, "teacher_loss": 0.27137070894241333 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.9495588541030884, "learning_rate": 2.8509601854241582e-05, "loss": 0.3094, "step": 9886, "teacher_loss": 0.23825867474079132 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.2791885733604431, "learning_rate": 2.850861469208816e-05, "loss": 0.3149, "step": 9887, "teacher_loss": 0.3189193904399872 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.4645458459854126, "learning_rate": 2.8507627220222703e-05, "loss": 0.2793, "step": 9888, "teacher_loss": 0.2587423026561737 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.618566632270813, "learning_rate": 2.8506639438667853e-05, "loss": 0.5393, "step": 9889, "teacher_loss": 0.5304646492004395 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.4126269817352295, "learning_rate": 2.850565134744625e-05, "loss": 0.2857, "step": 9890, "teacher_loss": 0.2716103494167328 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.15565399825572968, "learning_rate": 2.8504662946580563e-05, "loss": 0.2011, "step": 9891, "teacher_loss": 0.20614346861839294 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.2347583770751953, "learning_rate": 2.8503674236093438e-05, "loss": 0.2252, "step": 9892, "teacher_loss": 0.22415444254875183 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.6579325795173645, "learning_rate": 2.850268521600755e-05, "loss": 0.378, "step": 9893, "teacher_loss": 0.34686344861984253 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.582531750202179, "learning_rate": 2.8501695886345574e-05, "loss": 0.3054, "step": 9894, "teacher_loss": 0.27456730604171753 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.28062915802001953, "learning_rate": 2.850070624713019e-05, "loss": 0.2643, "step": 9895, "teacher_loss": 0.2624356746673584 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.46978843212127686, "learning_rate": 2.8499716298384094e-05, "loss": 0.1822, "step": 9896, "teacher_loss": 0.15021324157714844 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.5566151738166809, "learning_rate": 2.849872604012997e-05, "loss": 0.2532, "step": 9897, "teacher_loss": 0.21945026516914368 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.41958922147750854, "learning_rate": 2.8497735472390535e-05, "loss": 0.2956, "step": 9898, "teacher_loss": 0.2818142771720886 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.6815909147262573, "learning_rate": 2.849674459518849e-05, "loss": 0.2954, "step": 9899, "teacher_loss": 0.2524435818195343 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.3325532078742981, "learning_rate": 2.849575340854656e-05, "loss": 0.2232, "step": 9900, "teacher_loss": 0.2110215276479721 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.04777495190501213, "learning_rate": 2.8494761912487466e-05, "loss": 0.1255, "step": 9901, "teacher_loss": 0.13408830761909485 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.2978960871696472, "learning_rate": 2.8493770107033935e-05, "loss": 0.195, "step": 9902, "teacher_loss": 0.18360914289951324 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.27499282360076904, "learning_rate": 2.8492777992208722e-05, "loss": 0.2622, "step": 9903, "teacher_loss": 0.26077011227607727 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.47969692945480347, "learning_rate": 2.8491785568034558e-05, "loss": 0.2405, "step": 9904, "teacher_loss": 0.2139766812324524 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 1.199562668800354, "learning_rate": 2.84907928345342e-05, "loss": 0.4675, "step": 9905, "teacher_loss": 0.38611388206481934 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.38313227891921997, "learning_rate": 2.848979979173041e-05, "loss": 0.235, "step": 9906, "teacher_loss": 0.2185504287481308 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.2430182695388794, "learning_rate": 2.8488806439645957e-05, "loss": 0.1943, "step": 9907, "teacher_loss": 0.1888800859451294 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.2562699317932129, "learning_rate": 2.8487812778303615e-05, "loss": 0.194, "step": 9908, "teacher_loss": 0.18711219727993011 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.5980132818222046, "learning_rate": 2.8486818807726162e-05, "loss": 0.2616, "step": 9909, "teacher_loss": 0.22427132725715637 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.2748788595199585, "learning_rate": 2.8485824527936388e-05, "loss": 0.2585, "step": 9910, "teacher_loss": 0.256646990776062 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.7118290662765503, "learning_rate": 2.8484829938957094e-05, "loss": 0.2347, "step": 9911, "teacher_loss": 0.18172098696231842 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.44738996028900146, "learning_rate": 2.8483835040811076e-05, "loss": 0.2368, "step": 9912, "teacher_loss": 0.21338830888271332 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.09399784356355667, "learning_rate": 2.8482839833521147e-05, "loss": 0.1697, "step": 9913, "teacher_loss": 0.1780831217765808 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.7724782228469849, "learning_rate": 2.8481844317110127e-05, "loss": 0.3442, "step": 9914, "teacher_loss": 0.2965834140777588 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.9141056537628174, "learning_rate": 2.8480848491600838e-05, "loss": 0.3963, "step": 9915, "teacher_loss": 0.3387743830680847 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.5922085046768188, "learning_rate": 2.847985235701611e-05, "loss": 0.2358, "step": 9916, "teacher_loss": 0.19620540738105774 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 1.0426573753356934, "learning_rate": 2.847885591337878e-05, "loss": 0.3734, "step": 9917, "teacher_loss": 0.2990170419216156 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.8257579803466797, "learning_rate": 2.8477859160711696e-05, "loss": 0.3603, "step": 9918, "teacher_loss": 0.3086114227771759 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.5918781757354736, "learning_rate": 2.8476862099037712e-05, "loss": 0.3419, "step": 9919, "teacher_loss": 0.3141464293003082 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.5281621217727661, "learning_rate": 2.8475864728379682e-05, "loss": 0.303, "step": 9920, "teacher_loss": 0.27793627977371216 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.3837212324142456, "learning_rate": 2.8474867048760482e-05, "loss": 0.3354, "step": 9921, "teacher_loss": 0.33006808161735535 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.5989212989807129, "learning_rate": 2.8473869060202976e-05, "loss": 0.4041, "step": 9922, "teacher_loss": 0.3824467062950134 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.38015252351760864, "learning_rate": 2.847287076273005e-05, "loss": 0.2438, "step": 9923, "teacher_loss": 0.22866132855415344 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.41769686341285706, "learning_rate": 2.847187215636459e-05, "loss": 0.2307, "step": 9924, "teacher_loss": 0.2099752426147461 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.9899097084999084, "learning_rate": 2.8470873241129495e-05, "loss": 0.3067, "step": 9925, "teacher_loss": 0.23078115284442902 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.6791456937789917, "learning_rate": 2.8469874017047665e-05, "loss": 0.2448, "step": 9926, "teacher_loss": 0.1965523064136505 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.36018267273902893, "learning_rate": 2.846887448414201e-05, "loss": 0.244, "step": 9927, "teacher_loss": 0.23109301924705505 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.4715564548969269, "learning_rate": 2.846787464243544e-05, "loss": 0.4214, "step": 9928, "teacher_loss": 0.41578084230422974 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.2985368072986603, "learning_rate": 2.8466874491950887e-05, "loss": 0.3594, "step": 9929, "teacher_loss": 0.3661215007305145 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.3729763925075531, "learning_rate": 2.8465874032711278e-05, "loss": 0.2684, "step": 9930, "teacher_loss": 0.2567366361618042 }, { "compression_loss": 0.0, "epoch": 1.79, "label_loss": 0.573815107345581, "learning_rate": 2.8464873264739543e-05, "loss": 0.2992, "step": 9931, "teacher_loss": 0.26864123344421387 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.764059841632843, "learning_rate": 2.8463872188058638e-05, "loss": 0.33, "step": 9932, "teacher_loss": 0.28173232078552246 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.25906500220298767, "learning_rate": 2.846287080269151e-05, "loss": 0.2029, "step": 9933, "teacher_loss": 0.19660684466362 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.36125412583351135, "learning_rate": 2.846186910866112e-05, "loss": 0.2451, "step": 9934, "teacher_loss": 0.23220570385456085 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.7969882488250732, "learning_rate": 2.8460867105990432e-05, "loss": 0.4164, "step": 9935, "teacher_loss": 0.37408044934272766 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.5133858919143677, "learning_rate": 2.845986479470242e-05, "loss": 0.289, "step": 9936, "teacher_loss": 0.2640360891819 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.44553142786026, "learning_rate": 2.8458862174820064e-05, "loss": 0.3113, "step": 9937, "teacher_loss": 0.29635781049728394 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.6729985475540161, "learning_rate": 2.8457859246366348e-05, "loss": 0.3297, "step": 9938, "teacher_loss": 0.29154330492019653 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.4567737579345703, "learning_rate": 2.845685600936427e-05, "loss": 0.2038, "step": 9939, "teacher_loss": 0.1756627857685089 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.6258205771446228, "learning_rate": 2.8455852463836826e-05, "loss": 0.3584, "step": 9940, "teacher_loss": 0.32867884635925293 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.6175175905227661, "learning_rate": 2.845484860980703e-05, "loss": 0.2989, "step": 9941, "teacher_loss": 0.2635374069213867 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.6379503607749939, "learning_rate": 2.845384444729789e-05, "loss": 0.2831, "step": 9942, "teacher_loss": 0.24371957778930664 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.46774429082870483, "learning_rate": 2.845283997633244e-05, "loss": 0.3026, "step": 9943, "teacher_loss": 0.2841984033584595 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.296292245388031, "learning_rate": 2.8451835196933703e-05, "loss": 0.2022, "step": 9944, "teacher_loss": 0.19170412421226501 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.5297198295593262, "learning_rate": 2.8450830109124712e-05, "loss": 0.2266, "step": 9945, "teacher_loss": 0.192958801984787 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.3401559889316559, "learning_rate": 2.8449824712928518e-05, "loss": 0.2736, "step": 9946, "teacher_loss": 0.26623162627220154 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.15827253460884094, "learning_rate": 2.8448819008368167e-05, "loss": 0.2246, "step": 9947, "teacher_loss": 0.2319531887769699 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.28979456424713135, "learning_rate": 2.8447812995466718e-05, "loss": 0.2944, "step": 9948, "teacher_loss": 0.2948848009109497 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.3130207061767578, "learning_rate": 2.844680667424723e-05, "loss": 0.1846, "step": 9949, "teacher_loss": 0.17032350599765778 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.37660524249076843, "learning_rate": 2.8445800044732787e-05, "loss": 0.1886, "step": 9950, "teacher_loss": 0.1676691472530365 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.2756948471069336, "learning_rate": 2.8444793106946458e-05, "loss": 0.2418, "step": 9951, "teacher_loss": 0.23802588880062103 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.8937015533447266, "learning_rate": 2.8443785860911337e-05, "loss": 0.373, "step": 9952, "teacher_loss": 0.315145343542099 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.7640095353126526, "learning_rate": 2.844277830665051e-05, "loss": 0.2439, "step": 9953, "teacher_loss": 0.18606916069984436 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.1451510190963745, "learning_rate": 2.844177044418708e-05, "loss": 0.1885, "step": 9954, "teacher_loss": 0.19331926107406616 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.2929535508155823, "learning_rate": 2.844076227354415e-05, "loss": 0.2178, "step": 9955, "teacher_loss": 0.20941904187202454 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.6764476299285889, "learning_rate": 2.8439753794744848e-05, "loss": 0.3585, "step": 9956, "teacher_loss": 0.32313090562820435 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.6294410228729248, "learning_rate": 2.843874500781228e-05, "loss": 0.2763, "step": 9957, "teacher_loss": 0.23709014058113098 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.6707977652549744, "learning_rate": 2.8437735912769578e-05, "loss": 0.4574, "step": 9958, "teacher_loss": 0.43368589878082275 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.4132692813873291, "learning_rate": 2.8436726509639883e-05, "loss": 0.1976, "step": 9959, "teacher_loss": 0.1736908257007599 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.213484525680542, "learning_rate": 2.8435716798446338e-05, "loss": 0.2447, "step": 9960, "teacher_loss": 0.24814267456531525 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 1.1961371898651123, "learning_rate": 2.8434706779212083e-05, "loss": 0.4944, "step": 9961, "teacher_loss": 0.4164496064186096 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.40899306535720825, "learning_rate": 2.843369645196028e-05, "loss": 0.3176, "step": 9962, "teacher_loss": 0.3074972331523895 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.49438756704330444, "learning_rate": 2.8432685816714103e-05, "loss": 0.3754, "step": 9963, "teacher_loss": 0.3621580898761749 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.4733750820159912, "learning_rate": 2.8431674873496706e-05, "loss": 0.4121, "step": 9964, "teacher_loss": 0.4052943289279938 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.46345195174217224, "learning_rate": 2.843066362233128e-05, "loss": 0.2429, "step": 9965, "teacher_loss": 0.21841484308242798 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.626489520072937, "learning_rate": 2.8429652063240996e-05, "loss": 0.3139, "step": 9966, "teacher_loss": 0.2791442275047302 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.3096197843551636, "learning_rate": 2.842864019624906e-05, "loss": 0.2373, "step": 9967, "teacher_loss": 0.22926105558872223 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.6614104509353638, "learning_rate": 2.8427628021378666e-05, "loss": 0.249, "step": 9968, "teacher_loss": 0.20317423343658447 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.46194642782211304, "learning_rate": 2.8426615538653016e-05, "loss": 0.2883, "step": 9969, "teacher_loss": 0.2689683735370636 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.2057897299528122, "learning_rate": 2.8425602748095328e-05, "loss": 0.204, "step": 9970, "teacher_loss": 0.20376136898994446 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.41850346326828003, "learning_rate": 2.8424589649728825e-05, "loss": 0.2543, "step": 9971, "teacher_loss": 0.2361023724079132 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.339938759803772, "learning_rate": 2.8423576243576726e-05, "loss": 0.3325, "step": 9972, "teacher_loss": 0.3316519260406494 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.4035336971282959, "learning_rate": 2.8422562529662273e-05, "loss": 0.225, "step": 9973, "teacher_loss": 0.20515784621238708 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.44656214118003845, "learning_rate": 2.84215485080087e-05, "loss": 0.2922, "step": 9974, "teacher_loss": 0.27501797676086426 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.16691406071186066, "learning_rate": 2.8420534178639265e-05, "loss": 0.2176, "step": 9975, "teacher_loss": 0.22323036193847656 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.9612618684768677, "learning_rate": 2.841951954157721e-05, "loss": 0.4305, "step": 9976, "teacher_loss": 0.3715289235115051 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.6616100072860718, "learning_rate": 2.841850459684581e-05, "loss": 0.3198, "step": 9977, "teacher_loss": 0.2817884087562561 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.5083587169647217, "learning_rate": 2.8417489344468334e-05, "loss": 0.3753, "step": 9978, "teacher_loss": 0.3604700565338135 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.3334214687347412, "learning_rate": 2.841647378446805e-05, "loss": 0.2187, "step": 9979, "teacher_loss": 0.20590610802173615 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.41912841796875, "learning_rate": 2.841545791686825e-05, "loss": 0.2928, "step": 9980, "teacher_loss": 0.27873697876930237 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.40075385570526123, "learning_rate": 2.8414441741692222e-05, "loss": 0.2616, "step": 9981, "teacher_loss": 0.24614089727401733 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.5900130271911621, "learning_rate": 2.841342525896326e-05, "loss": 0.3473, "step": 9982, "teacher_loss": 0.3203285336494446 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.6284139752388, "learning_rate": 2.8412408468704673e-05, "loss": 0.3818, "step": 9983, "teacher_loss": 0.35445356369018555 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.47278058528900146, "learning_rate": 2.8411391370939772e-05, "loss": 0.3755, "step": 9984, "teacher_loss": 0.36466896533966064 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.2656656503677368, "learning_rate": 2.841037396569188e-05, "loss": 0.2093, "step": 9985, "teacher_loss": 0.20304420590400696 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.5919592976570129, "learning_rate": 2.8409356252984315e-05, "loss": 0.3223, "step": 9986, "teacher_loss": 0.2923741936683655 }, { "compression_loss": 0.0, "epoch": 1.8, "label_loss": 0.5456598997116089, "learning_rate": 2.8408338232840417e-05, "loss": 0.2549, "step": 9987, "teacher_loss": 0.22263062000274658 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.3786388337612152, "learning_rate": 2.8407319905283522e-05, "loss": 0.2728, "step": 9988, "teacher_loss": 0.2610505223274231 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.759294867515564, "learning_rate": 2.840630127033698e-05, "loss": 0.2799, "step": 9989, "teacher_loss": 0.22659653425216675 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.22815167903900146, "learning_rate": 2.8405282328024146e-05, "loss": 0.1731, "step": 9990, "teacher_loss": 0.167032390832901 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.3255714774131775, "learning_rate": 2.8404263078368377e-05, "loss": 0.2168, "step": 9991, "teacher_loss": 0.20470184087753296 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.831087589263916, "learning_rate": 2.8403243521393045e-05, "loss": 0.4199, "step": 9992, "teacher_loss": 0.3742283284664154 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.4013445973396301, "learning_rate": 2.840222365712152e-05, "loss": 0.2635, "step": 9993, "teacher_loss": 0.24819760024547577 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.540683388710022, "learning_rate": 2.8401203485577192e-05, "loss": 0.282, "step": 9994, "teacher_loss": 0.2532673478126526 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.25122275948524475, "learning_rate": 2.8400183006783446e-05, "loss": 0.223, "step": 9995, "teacher_loss": 0.21985140442848206 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.4782525599002838, "learning_rate": 2.8399162220763678e-05, "loss": 0.3334, "step": 9996, "teacher_loss": 0.31726616621017456 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.8623383045196533, "learning_rate": 2.83981411275413e-05, "loss": 0.64, "step": 9997, "teacher_loss": 0.6152902841567993 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.44521552324295044, "learning_rate": 2.8397119727139708e-05, "loss": 0.2733, "step": 9998, "teacher_loss": 0.25417762994766235 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.423713743686676, "learning_rate": 2.8396098019582333e-05, "loss": 0.3331, "step": 9999, "teacher_loss": 0.32298362255096436 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.4213792085647583, "learning_rate": 2.839507600489259e-05, "loss": 0.2469, "step": 10000, "teacher_loss": 0.22746586799621582 }, { "epoch": 1.81, "eval_exact_match": 79.48912015137181, "eval_f1": 87.01614426531715, "step": 10000 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.39092931151390076, "learning_rate": 2.8394053683093916e-05, "loss": 0.2119, "step": 10001, "teacher_loss": 0.19196656346321106 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.5520212650299072, "learning_rate": 2.839303105420975e-05, "loss": 0.399, "step": 10002, "teacher_loss": 0.38202935457229614 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.28513625264167786, "learning_rate": 2.8392008118263533e-05, "loss": 0.2307, "step": 10003, "teacher_loss": 0.22462332248687744 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.3025251626968384, "learning_rate": 2.8390984875278724e-05, "loss": 0.2268, "step": 10004, "teacher_loss": 0.21842685341835022 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.6585941910743713, "learning_rate": 2.8389961325278776e-05, "loss": 0.3615, "step": 10005, "teacher_loss": 0.3284519910812378 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.26177719235420227, "learning_rate": 2.8388937468287166e-05, "loss": 0.1783, "step": 10006, "teacher_loss": 0.16898679733276367 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 1.025555968284607, "learning_rate": 2.8387913304327356e-05, "loss": 0.454, "step": 10007, "teacher_loss": 0.3905356526374817 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.45299583673477173, "learning_rate": 2.8386888833422833e-05, "loss": 0.2697, "step": 10008, "teacher_loss": 0.24938833713531494 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.4554581642150879, "learning_rate": 2.838586405559709e-05, "loss": 0.2899, "step": 10009, "teacher_loss": 0.271531879901886 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.2757774591445923, "learning_rate": 2.8384838970873613e-05, "loss": 0.176, "step": 10010, "teacher_loss": 0.1649159938097 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.22103755176067352, "learning_rate": 2.8383813579275912e-05, "loss": 0.2237, "step": 10011, "teacher_loss": 0.22398334741592407 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.6249642372131348, "learning_rate": 2.8382787880827488e-05, "loss": 0.3466, "step": 10012, "teacher_loss": 0.3157137632369995 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.5051115155220032, "learning_rate": 2.8381761875551865e-05, "loss": 0.2602, "step": 10013, "teacher_loss": 0.2329789251089096 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.46477973461151123, "learning_rate": 2.8380735563472564e-05, "loss": 0.4664, "step": 10014, "teacher_loss": 0.466605007648468 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.4993470311164856, "learning_rate": 2.8379708944613112e-05, "loss": 0.2636, "step": 10015, "teacher_loss": 0.23744003474712372 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.7189319133758545, "learning_rate": 2.8378682018997046e-05, "loss": 0.2464, "step": 10016, "teacher_loss": 0.19386765360832214 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.5674004554748535, "learning_rate": 2.8377654786647916e-05, "loss": 0.3371, "step": 10017, "teacher_loss": 0.3115500211715698 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.2641545534133911, "learning_rate": 2.8376627247589268e-05, "loss": 0.2601, "step": 10018, "teacher_loss": 0.2596549391746521 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.3129035532474518, "learning_rate": 2.8375599401844665e-05, "loss": 0.172, "step": 10019, "teacher_loss": 0.15633898973464966 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.18933723866939545, "learning_rate": 2.8374571249437666e-05, "loss": 0.2484, "step": 10020, "teacher_loss": 0.2549378275871277 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.18536841869354248, "learning_rate": 2.837354279039185e-05, "loss": 0.2221, "step": 10021, "teacher_loss": 0.22623273730278015 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.2101885974407196, "learning_rate": 2.8372514024730792e-05, "loss": 0.2298, "step": 10022, "teacher_loss": 0.23194262385368347 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.48914965987205505, "learning_rate": 2.837148495247808e-05, "loss": 0.3205, "step": 10023, "teacher_loss": 0.3017808794975281 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.26564300060272217, "learning_rate": 2.837045557365731e-05, "loss": 0.3501, "step": 10024, "teacher_loss": 0.35946038365364075 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.5714926719665527, "learning_rate": 2.836942588829208e-05, "loss": 0.2595, "step": 10025, "teacher_loss": 0.22483046352863312 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.7653529644012451, "learning_rate": 2.8368395896405997e-05, "loss": 0.2967, "step": 10026, "teacher_loss": 0.24461224675178528 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.6550235152244568, "learning_rate": 2.836736559802268e-05, "loss": 0.3184, "step": 10027, "teacher_loss": 0.2809876799583435 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.5254228115081787, "learning_rate": 2.8366334993165744e-05, "loss": 0.3109, "step": 10028, "teacher_loss": 0.2870434522628784 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.4441121816635132, "learning_rate": 2.836530408185882e-05, "loss": 0.2124, "step": 10029, "teacher_loss": 0.18661056458950043 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.29186293482780457, "learning_rate": 2.836427286412555e-05, "loss": 0.3306, "step": 10030, "teacher_loss": 0.33490413427352905 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.22067967057228088, "learning_rate": 2.8363241339989567e-05, "loss": 0.2584, "step": 10031, "teacher_loss": 0.26260995864868164 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.44406452775001526, "learning_rate": 2.8362209509474525e-05, "loss": 0.3025, "step": 10032, "teacher_loss": 0.28674912452697754 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.721721351146698, "learning_rate": 2.836117737260408e-05, "loss": 0.2223, "step": 10033, "teacher_loss": 0.16681170463562012 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.2818625271320343, "learning_rate": 2.83601449294019e-05, "loss": 0.2368, "step": 10034, "teacher_loss": 0.23182731866836548 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.7916396856307983, "learning_rate": 2.835911217989165e-05, "loss": 0.2997, "step": 10035, "teacher_loss": 0.2450808584690094 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.3018155097961426, "learning_rate": 2.8358079124097013e-05, "loss": 0.2592, "step": 10036, "teacher_loss": 0.25441431999206543 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.35869908332824707, "learning_rate": 2.835704576204167e-05, "loss": 0.3396, "step": 10037, "teacher_loss": 0.3374679684638977 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.20626285672187805, "learning_rate": 2.8356012093749312e-05, "loss": 0.2218, "step": 10038, "teacher_loss": 0.2235313355922699 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.7450047731399536, "learning_rate": 2.8354978119243643e-05, "loss": 0.2842, "step": 10039, "teacher_loss": 0.2330470085144043 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.2529345452785492, "learning_rate": 2.8353943838548365e-05, "loss": 0.3474, "step": 10040, "teacher_loss": 0.35788941383361816 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.475966215133667, "learning_rate": 2.8352909251687193e-05, "loss": 0.4137, "step": 10041, "teacher_loss": 0.40676021575927734 }, { "compression_loss": 0.0, "epoch": 1.81, "label_loss": 0.7100175619125366, "learning_rate": 2.8351874358683844e-05, "loss": 0.4265, "step": 10042, "teacher_loss": 0.39495301246643066 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.6449384689331055, "learning_rate": 2.8350839159562047e-05, "loss": 0.3245, "step": 10043, "teacher_loss": 0.28892967104911804 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.22249314188957214, "learning_rate": 2.8349803654345538e-05, "loss": 0.1712, "step": 10044, "teacher_loss": 0.1654951572418213 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.5362383127212524, "learning_rate": 2.8348767843058054e-05, "loss": 0.2871, "step": 10045, "teacher_loss": 0.25945594906806946 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.21562406420707703, "learning_rate": 2.8347731725723346e-05, "loss": 0.1791, "step": 10046, "teacher_loss": 0.1750330626964569 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.642021119594574, "learning_rate": 2.8346695302365165e-05, "loss": 0.2823, "step": 10047, "teacher_loss": 0.2423771172761917 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.35786062479019165, "learning_rate": 2.8345658573007286e-05, "loss": 0.1922, "step": 10048, "teacher_loss": 0.1737847924232483 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.4084697365760803, "learning_rate": 2.834462153767346e-05, "loss": 0.2245, "step": 10049, "teacher_loss": 0.20402434468269348 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.18152669072151184, "learning_rate": 2.834358419638747e-05, "loss": 0.2266, "step": 10050, "teacher_loss": 0.23164907097816467 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.8116253018379211, "learning_rate": 2.8342546549173104e-05, "loss": 0.2668, "step": 10051, "teacher_loss": 0.20628829300403595 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.3889818489551544, "learning_rate": 2.834150859605415e-05, "loss": 0.2248, "step": 10052, "teacher_loss": 0.20655973255634308 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.19807803630828857, "learning_rate": 2.8340470337054402e-05, "loss": 0.2144, "step": 10053, "teacher_loss": 0.2161865532398224 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.4396580457687378, "learning_rate": 2.8339431772197668e-05, "loss": 0.1799, "step": 10054, "teacher_loss": 0.15105697512626648 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.17564627528190613, "learning_rate": 2.833839290150775e-05, "loss": 0.1805, "step": 10055, "teacher_loss": 0.18101385235786438 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.26237696409225464, "learning_rate": 2.8337353725008482e-05, "loss": 0.2522, "step": 10056, "teacher_loss": 0.2510750889778137 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.48744410276412964, "learning_rate": 2.8336314242723674e-05, "loss": 0.3148, "step": 10057, "teacher_loss": 0.2956140339374542 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.25666099786758423, "learning_rate": 2.8335274454677168e-05, "loss": 0.2218, "step": 10058, "teacher_loss": 0.21788930892944336 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.7666101455688477, "learning_rate": 2.8334234360892797e-05, "loss": 0.3207, "step": 10059, "teacher_loss": 0.2711900472640991 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.7123481035232544, "learning_rate": 2.8333193961394415e-05, "loss": 0.2505, "step": 10060, "teacher_loss": 0.19918644428253174 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.4849434494972229, "learning_rate": 2.833215325620587e-05, "loss": 0.2957, "step": 10061, "teacher_loss": 0.2747054398059845 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.36107003688812256, "learning_rate": 2.8331112245351018e-05, "loss": 0.3197, "step": 10062, "teacher_loss": 0.3150624632835388 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.34174734354019165, "learning_rate": 2.8330070928853734e-05, "loss": 0.2148, "step": 10063, "teacher_loss": 0.20071497559547424 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.36111021041870117, "learning_rate": 2.832902930673789e-05, "loss": 0.2706, "step": 10064, "teacher_loss": 0.2605125308036804 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.30980220437049866, "learning_rate": 2.8327987379027364e-05, "loss": 0.2287, "step": 10065, "teacher_loss": 0.2196970283985138 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.29366597533226013, "learning_rate": 2.8326945145746053e-05, "loss": 0.2184, "step": 10066, "teacher_loss": 0.21008357405662537 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.7954883575439453, "learning_rate": 2.832590260691784e-05, "loss": 0.2903, "step": 10067, "teacher_loss": 0.23414069414138794 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.6837584972381592, "learning_rate": 2.8324859762566634e-05, "loss": 0.3752, "step": 10068, "teacher_loss": 0.3409465551376343 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.2502906620502472, "learning_rate": 2.8323816612716345e-05, "loss": 0.2311, "step": 10069, "teacher_loss": 0.22891896963119507 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.9472180604934692, "learning_rate": 2.8322773157390887e-05, "loss": 0.4705, "step": 10070, "teacher_loss": 0.41748660802841187 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.4407423138618469, "learning_rate": 2.8321729396614185e-05, "loss": 0.3442, "step": 10071, "teacher_loss": 0.333477258682251 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.46047964692115784, "learning_rate": 2.832068533041017e-05, "loss": 0.2319, "step": 10072, "teacher_loss": 0.20654237270355225 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.38682061433792114, "learning_rate": 2.831964095880277e-05, "loss": 0.3607, "step": 10073, "teacher_loss": 0.35777562856674194 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.5228363275527954, "learning_rate": 2.8318596281815948e-05, "loss": 0.2657, "step": 10074, "teacher_loss": 0.23716843128204346 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.2322489619255066, "learning_rate": 2.831755129947364e-05, "loss": 0.1683, "step": 10075, "teacher_loss": 0.16121214628219604 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.38180452585220337, "learning_rate": 2.8316506011799808e-05, "loss": 0.2207, "step": 10076, "teacher_loss": 0.2028535008430481 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.6159590482711792, "learning_rate": 2.8315460418818416e-05, "loss": 0.3025, "step": 10077, "teacher_loss": 0.26765990257263184 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.5178865194320679, "learning_rate": 2.831441452055344e-05, "loss": 0.3505, "step": 10078, "teacher_loss": 0.33192354440689087 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.7684535384178162, "learning_rate": 2.8313368317028862e-05, "loss": 0.2792, "step": 10079, "teacher_loss": 0.22478681802749634 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.5380355715751648, "learning_rate": 2.831232180826866e-05, "loss": 0.3462, "step": 10080, "teacher_loss": 0.3248831629753113 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.5595866441726685, "learning_rate": 2.8311274994296835e-05, "loss": 0.3168, "step": 10081, "teacher_loss": 0.28977569937705994 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.6758938431739807, "learning_rate": 2.831022787513738e-05, "loss": 0.3626, "step": 10082, "teacher_loss": 0.3278322219848633 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.6684085130691528, "learning_rate": 2.8309180450814304e-05, "loss": 0.3742, "step": 10083, "teacher_loss": 0.3415384292602539 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.7728433609008789, "learning_rate": 2.830813272135163e-05, "loss": 0.3425, "step": 10084, "teacher_loss": 0.29471856355667114 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.4471004605293274, "learning_rate": 2.8307084686773367e-05, "loss": 0.265, "step": 10085, "teacher_loss": 0.24478890001773834 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.3530995547771454, "learning_rate": 2.830603634710355e-05, "loss": 0.2421, "step": 10086, "teacher_loss": 0.2297591269016266 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.28494173288345337, "learning_rate": 2.8304987702366214e-05, "loss": 0.217, "step": 10087, "teacher_loss": 0.20944523811340332 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.6312648057937622, "learning_rate": 2.83039387525854e-05, "loss": 0.5607, "step": 10088, "teacher_loss": 0.5528750419616699 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.28036999702453613, "learning_rate": 2.8302889497785156e-05, "loss": 0.2622, "step": 10089, "teacher_loss": 0.26023340225219727 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.3508296012878418, "learning_rate": 2.830183993798954e-05, "loss": 0.2376, "step": 10090, "teacher_loss": 0.225011944770813 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.6957626342773438, "learning_rate": 2.830079007322262e-05, "loss": 0.3616, "step": 10091, "teacher_loss": 0.32449856400489807 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.3001045882701874, "learning_rate": 2.829973990350846e-05, "loss": 0.2415, "step": 10092, "teacher_loss": 0.23493772745132446 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.5480846166610718, "learning_rate": 2.8298689428871135e-05, "loss": 0.3804, "step": 10093, "teacher_loss": 0.3617645502090454 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.3611062169075012, "learning_rate": 2.829763864933473e-05, "loss": 0.276, "step": 10094, "teacher_loss": 0.26650309562683105 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.4307994246482849, "learning_rate": 2.8296587564923346e-05, "loss": 0.1753, "step": 10095, "teacher_loss": 0.14691662788391113 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.25874757766723633, "learning_rate": 2.8295536175661073e-05, "loss": 0.3003, "step": 10096, "teacher_loss": 0.30494174361228943 }, { "compression_loss": 0.0, "epoch": 1.82, "label_loss": 0.15567296743392944, "learning_rate": 2.8294484481572018e-05, "loss": 0.1981, "step": 10097, "teacher_loss": 0.20278194546699524 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.40125876665115356, "learning_rate": 2.8293432482680292e-05, "loss": 0.2449, "step": 10098, "teacher_loss": 0.2275681346654892 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.25205883383750916, "learning_rate": 2.8292380179010014e-05, "loss": 0.292, "step": 10099, "teacher_loss": 0.29645413160324097 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.4218711256980896, "learning_rate": 2.8291327570585312e-05, "loss": 0.2594, "step": 10100, "teacher_loss": 0.2413722574710846 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.41986334323883057, "learning_rate": 2.829027465743032e-05, "loss": 0.4004, "step": 10101, "teacher_loss": 0.3982834219932556 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.23255914449691772, "learning_rate": 2.8289221439569172e-05, "loss": 0.2781, "step": 10102, "teacher_loss": 0.28320854902267456 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.7649527788162231, "learning_rate": 2.8288167917026022e-05, "loss": 0.3654, "step": 10103, "teacher_loss": 0.32100915908813477 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.559751033782959, "learning_rate": 2.8287114089825022e-05, "loss": 0.3137, "step": 10104, "teacher_loss": 0.2863689363002777 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.7335920333862305, "learning_rate": 2.8286059957990334e-05, "loss": 0.5446, "step": 10105, "teacher_loss": 0.5235691070556641 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.4732711911201477, "learning_rate": 2.8285005521546122e-05, "loss": 0.2097, "step": 10106, "teacher_loss": 0.18043681979179382 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.8340111970901489, "learning_rate": 2.8283950780516567e-05, "loss": 0.3805, "step": 10107, "teacher_loss": 0.33015868067741394 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.16369491815567017, "learning_rate": 2.8282895734925846e-05, "loss": 0.3181, "step": 10108, "teacher_loss": 0.3352566063404083 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.2734563946723938, "learning_rate": 2.8281840384798147e-05, "loss": 0.24, "step": 10109, "teacher_loss": 0.23631922900676727 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.583782434463501, "learning_rate": 2.8280784730157676e-05, "loss": 0.2547, "step": 10110, "teacher_loss": 0.21817341446876526 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.30771467089653015, "learning_rate": 2.8279728771028623e-05, "loss": 0.2511, "step": 10111, "teacher_loss": 0.24480858445167542 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.3733453154563904, "learning_rate": 2.8278672507435207e-05, "loss": 0.2883, "step": 10112, "teacher_loss": 0.2788010835647583 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.503891110420227, "learning_rate": 2.827761593940164e-05, "loss": 0.2786, "step": 10113, "teacher_loss": 0.2535475194454193 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.15241241455078125, "learning_rate": 2.827655906695215e-05, "loss": 0.1786, "step": 10114, "teacher_loss": 0.18150311708450317 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.2417047917842865, "learning_rate": 2.8275501890110966e-05, "loss": 0.2329, "step": 10115, "teacher_loss": 0.23192723095417023 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.8406541347503662, "learning_rate": 2.827444440890232e-05, "loss": 0.352, "step": 10116, "teacher_loss": 0.2977423667907715 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.5134698748588562, "learning_rate": 2.827338662335047e-05, "loss": 0.1835, "step": 10117, "teacher_loss": 0.14684215188026428 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.49457627534866333, "learning_rate": 2.827232853347966e-05, "loss": 0.2637, "step": 10118, "teacher_loss": 0.23808389902114868 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.2242611050605774, "learning_rate": 2.8271270139314144e-05, "loss": 0.1725, "step": 10119, "teacher_loss": 0.1667870730161667 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.5023941397666931, "learning_rate": 2.82702114408782e-05, "loss": 0.2196, "step": 10120, "teacher_loss": 0.18816867470741272 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.6886653900146484, "learning_rate": 2.8269152438196086e-05, "loss": 0.2976, "step": 10121, "teacher_loss": 0.25413933396339417 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.4681745767593384, "learning_rate": 2.826809313129209e-05, "loss": 0.2122, "step": 10122, "teacher_loss": 0.18379315733909607 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.31001555919647217, "learning_rate": 2.82670335201905e-05, "loss": 0.2282, "step": 10123, "teacher_loss": 0.2190977931022644 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.6931607127189636, "learning_rate": 2.8265973604915613e-05, "loss": 0.3191, "step": 10124, "teacher_loss": 0.27755096554756165 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.7336114645004272, "learning_rate": 2.826491338549172e-05, "loss": 0.2732, "step": 10125, "teacher_loss": 0.22206394374370575 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.487165629863739, "learning_rate": 2.8263852861943134e-05, "loss": 0.26, "step": 10126, "teacher_loss": 0.2347734570503235 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.4697193503379822, "learning_rate": 2.826279203429417e-05, "loss": 0.2655, "step": 10127, "teacher_loss": 0.24282614886760712 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.4233858585357666, "learning_rate": 2.8261730902569146e-05, "loss": 0.4074, "step": 10128, "teacher_loss": 0.4056728482246399 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.19034206867218018, "learning_rate": 2.8260669466792394e-05, "loss": 0.2043, "step": 10129, "teacher_loss": 0.20580735802650452 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.39717885851860046, "learning_rate": 2.8259607726988252e-05, "loss": 0.2395, "step": 10130, "teacher_loss": 0.22195212543010712 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.5770330429077148, "learning_rate": 2.8258545683181058e-05, "loss": 0.3568, "step": 10131, "teacher_loss": 0.3323458433151245 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.47333037853240967, "learning_rate": 2.825748333539516e-05, "loss": 0.2968, "step": 10132, "teacher_loss": 0.27721142768859863 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.26156559586524963, "learning_rate": 2.825642068365492e-05, "loss": 0.2922, "step": 10133, "teacher_loss": 0.29555946588516235 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.7056933641433716, "learning_rate": 2.82553577279847e-05, "loss": 0.3848, "step": 10134, "teacher_loss": 0.34914886951446533 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.3210175335407257, "learning_rate": 2.8254294468408862e-05, "loss": 0.2311, "step": 10135, "teacher_loss": 0.22111928462982178 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 1.003559947013855, "learning_rate": 2.8253230904951794e-05, "loss": 0.3414, "step": 10136, "teacher_loss": 0.2678506374359131 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.3333669900894165, "learning_rate": 2.825216703763788e-05, "loss": 0.2808, "step": 10137, "teacher_loss": 0.274944931268692 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.32442617416381836, "learning_rate": 2.8251102866491507e-05, "loss": 0.2413, "step": 10138, "teacher_loss": 0.23203572630882263 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.22870692610740662, "learning_rate": 2.825003839153707e-05, "loss": 0.1696, "step": 10139, "teacher_loss": 0.16301561892032623 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.4195851683616638, "learning_rate": 2.8248973612798975e-05, "loss": 0.256, "step": 10140, "teacher_loss": 0.23787939548492432 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.20134034752845764, "learning_rate": 2.8247908530301646e-05, "loss": 0.2581, "step": 10141, "teacher_loss": 0.26441875100135803 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 1.0713342428207397, "learning_rate": 2.824684314406949e-05, "loss": 0.3513, "step": 10142, "teacher_loss": 0.2712668776512146 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.32342541217803955, "learning_rate": 2.8245777454126937e-05, "loss": 0.2652, "step": 10143, "teacher_loss": 0.25872209668159485 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.3959038555622101, "learning_rate": 2.824471146049842e-05, "loss": 0.3081, "step": 10144, "teacher_loss": 0.29834097623825073 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.26506179571151733, "learning_rate": 2.824364516320838e-05, "loss": 0.2216, "step": 10145, "teacher_loss": 0.2167806178331375 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.36636945605278015, "learning_rate": 2.8242578562281266e-05, "loss": 0.2682, "step": 10146, "teacher_loss": 0.2573147416114807 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.1470733880996704, "learning_rate": 2.8241511657741525e-05, "loss": 0.2031, "step": 10147, "teacher_loss": 0.2093411535024643 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.6461858749389648, "learning_rate": 2.824044444961362e-05, "loss": 0.2538, "step": 10148, "teacher_loss": 0.2102227807044983 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.4144626557826996, "learning_rate": 2.8239376937922022e-05, "loss": 0.2305, "step": 10149, "teacher_loss": 0.21000996232032776 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.26975417137145996, "learning_rate": 2.8238309122691206e-05, "loss": 0.1744, "step": 10150, "teacher_loss": 0.16384296119213104 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.44550907611846924, "learning_rate": 2.823724100394565e-05, "loss": 0.3536, "step": 10151, "teacher_loss": 0.343362957239151 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.6039840579032898, "learning_rate": 2.8236172581709844e-05, "loss": 0.2742, "step": 10152, "teacher_loss": 0.23760539293289185 }, { "compression_loss": 0.0, "epoch": 1.83, "label_loss": 0.5013165473937988, "learning_rate": 2.823510385600829e-05, "loss": 0.3639, "step": 10153, "teacher_loss": 0.3486413061618805 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.33236414194107056, "learning_rate": 2.823403482686548e-05, "loss": 0.2587, "step": 10154, "teacher_loss": 0.25050631165504456 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.20856590569019318, "learning_rate": 2.8232965494305934e-05, "loss": 0.2025, "step": 10155, "teacher_loss": 0.2018308937549591 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.27331018447875977, "learning_rate": 2.8231895858354162e-05, "loss": 0.2814, "step": 10156, "teacher_loss": 0.282331258058548 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.3737063407897949, "learning_rate": 2.8230825919034687e-05, "loss": 0.2258, "step": 10157, "teacher_loss": 0.2093178927898407 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.971958339214325, "learning_rate": 2.8229755676372043e-05, "loss": 0.3436, "step": 10158, "teacher_loss": 0.27379894256591797 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.3210369348526001, "learning_rate": 2.822868513039077e-05, "loss": 0.1905, "step": 10159, "teacher_loss": 0.17594194412231445 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.5199267864227295, "learning_rate": 2.8227614281115404e-05, "loss": 0.3311, "step": 10160, "teacher_loss": 0.31009846925735474 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.6118385791778564, "learning_rate": 2.82265431285705e-05, "loss": 0.3156, "step": 10161, "teacher_loss": 0.2827160954475403 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.43781721591949463, "learning_rate": 2.822547167278062e-05, "loss": 0.2694, "step": 10162, "teacher_loss": 0.25066712498664856 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.7488399744033813, "learning_rate": 2.822439991377033e-05, "loss": 0.3808, "step": 10163, "teacher_loss": 0.3399509787559509 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.2312871217727661, "learning_rate": 2.8223327851564193e-05, "loss": 0.2556, "step": 10164, "teacher_loss": 0.2583398222923279 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.2981010377407074, "learning_rate": 2.8222255486186798e-05, "loss": 0.2966, "step": 10165, "teacher_loss": 0.29647552967071533 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.510927677154541, "learning_rate": 2.822118281766272e-05, "loss": 0.3766, "step": 10166, "teacher_loss": 0.36167585849761963 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.4946654736995697, "learning_rate": 2.8220109846016568e-05, "loss": 0.2047, "step": 10167, "teacher_loss": 0.17251738905906677 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.21726711094379425, "learning_rate": 2.821903657127293e-05, "loss": 0.1878, "step": 10168, "teacher_loss": 0.1845286786556244 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.446840763092041, "learning_rate": 2.8217962993456415e-05, "loss": 0.2738, "step": 10169, "teacher_loss": 0.2545472979545593 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.3201420307159424, "learning_rate": 2.8216889112591635e-05, "loss": 0.2497, "step": 10170, "teacher_loss": 0.24187727272510529 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.6139057874679565, "learning_rate": 2.821581492870322e-05, "loss": 0.3174, "step": 10171, "teacher_loss": 0.28450679779052734 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.6112133264541626, "learning_rate": 2.821474044181579e-05, "loss": 0.2902, "step": 10172, "teacher_loss": 0.25452956557273865 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.5521390438079834, "learning_rate": 2.8213665651953977e-05, "loss": 0.4799, "step": 10173, "teacher_loss": 0.4718799591064453 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.31955623626708984, "learning_rate": 2.821259055914243e-05, "loss": 0.2485, "step": 10174, "teacher_loss": 0.24061307311058044 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.4162052869796753, "learning_rate": 2.8211515163405798e-05, "loss": 0.2387, "step": 10175, "teacher_loss": 0.21893858909606934 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.6645916700363159, "learning_rate": 2.8210439464768733e-05, "loss": 0.3088, "step": 10176, "teacher_loss": 0.2692273259162903 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.6467615365982056, "learning_rate": 2.82093634632559e-05, "loss": 0.3555, "step": 10177, "teacher_loss": 0.323160320520401 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.39171579480171204, "learning_rate": 2.8208287158891956e-05, "loss": 0.3172, "step": 10178, "teacher_loss": 0.308951199054718 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.37491148710250854, "learning_rate": 2.82072105517016e-05, "loss": 0.2971, "step": 10179, "teacher_loss": 0.28850528597831726 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.45946401357650757, "learning_rate": 2.82061336417095e-05, "loss": 0.3302, "step": 10180, "teacher_loss": 0.3158247768878937 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.5471202731132507, "learning_rate": 2.8205056428940342e-05, "loss": 0.337, "step": 10181, "teacher_loss": 0.3136206269264221 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.5714937448501587, "learning_rate": 2.820397891341884e-05, "loss": 0.4733, "step": 10182, "teacher_loss": 0.46238064765930176 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.4085143804550171, "learning_rate": 2.8202901095169684e-05, "loss": 0.235, "step": 10183, "teacher_loss": 0.21576423943042755 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 1.5349762439727783, "learning_rate": 2.8201822974217592e-05, "loss": 0.4565, "step": 10184, "teacher_loss": 0.33662736415863037 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.32591256499290466, "learning_rate": 2.820074455058728e-05, "loss": 0.2639, "step": 10185, "teacher_loss": 0.2570186257362366 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.67108154296875, "learning_rate": 2.8199665824303473e-05, "loss": 0.4043, "step": 10186, "teacher_loss": 0.37470734119415283 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.3161235749721527, "learning_rate": 2.8198586795390903e-05, "loss": 0.3299, "step": 10187, "teacher_loss": 0.33146941661834717 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.3146645426750183, "learning_rate": 2.8197507463874312e-05, "loss": 0.1895, "step": 10188, "teacher_loss": 0.1756211519241333 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.3265650272369385, "learning_rate": 2.819642782977844e-05, "loss": 0.2113, "step": 10189, "teacher_loss": 0.19849266111850739 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.29414069652557373, "learning_rate": 2.8195347893128046e-05, "loss": 0.1848, "step": 10190, "teacher_loss": 0.17267151176929474 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.7510318756103516, "learning_rate": 2.8194267653947886e-05, "loss": 0.273, "step": 10191, "teacher_loss": 0.21990731358528137 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.5448933839797974, "learning_rate": 2.8193187112262725e-05, "loss": 0.3282, "step": 10192, "teacher_loss": 0.30417752265930176 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.36576300859451294, "learning_rate": 2.8192106268097336e-05, "loss": 0.2133, "step": 10193, "teacher_loss": 0.19635042548179626 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.08481509983539581, "learning_rate": 2.8191025121476505e-05, "loss": 0.1847, "step": 10194, "teacher_loss": 0.1957492232322693 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.22363883256912231, "learning_rate": 2.818994367242502e-05, "loss": 0.1963, "step": 10195, "teacher_loss": 0.19323772192001343 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.7947820425033569, "learning_rate": 2.818886192096767e-05, "loss": 0.2681, "step": 10196, "teacher_loss": 0.20955385267734528 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.3694137930870056, "learning_rate": 2.8187779867129255e-05, "loss": 0.1636, "step": 10197, "teacher_loss": 0.14070913195610046 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.25417378544807434, "learning_rate": 2.818669751093459e-05, "loss": 0.2793, "step": 10198, "teacher_loss": 0.28213492035865784 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.5340924263000488, "learning_rate": 2.8185614852408488e-05, "loss": 0.2909, "step": 10199, "teacher_loss": 0.26389771699905396 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.6272094249725342, "learning_rate": 2.8184531891575766e-05, "loss": 0.3619, "step": 10200, "teacher_loss": 0.3324025273323059 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.22327786684036255, "learning_rate": 2.8183448628461262e-05, "loss": 0.2749, "step": 10201, "teacher_loss": 0.28066080808639526 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.7067087888717651, "learning_rate": 2.8182365063089803e-05, "loss": 0.409, "step": 10202, "teacher_loss": 0.37595629692077637 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.21253371238708496, "learning_rate": 2.8181281195486238e-05, "loss": 0.2261, "step": 10203, "teacher_loss": 0.22765298187732697 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.6708394289016724, "learning_rate": 2.8180197025675412e-05, "loss": 0.5265, "step": 10204, "teacher_loss": 0.510500431060791 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.44892236590385437, "learning_rate": 2.817911255368219e-05, "loss": 0.2643, "step": 10205, "teacher_loss": 0.24373331665992737 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.7344803214073181, "learning_rate": 2.8178027779531422e-05, "loss": 0.3394, "step": 10206, "teacher_loss": 0.29550835490226746 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.3964819014072418, "learning_rate": 2.8176942703247993e-05, "loss": 0.2015, "step": 10207, "teacher_loss": 0.1797998994588852 }, { "compression_loss": 0.0, "epoch": 1.84, "label_loss": 0.6019722819328308, "learning_rate": 2.8175857324856774e-05, "loss": 0.309, "step": 10208, "teacher_loss": 0.2764304280281067 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.45681822299957275, "learning_rate": 2.8174771644382646e-05, "loss": 0.3935, "step": 10209, "teacher_loss": 0.3864993453025818 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.25299179553985596, "learning_rate": 2.8173685661850508e-05, "loss": 0.2075, "step": 10210, "teacher_loss": 0.2024236023426056 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.22379331290721893, "learning_rate": 2.8172599377285252e-05, "loss": 0.2195, "step": 10211, "teacher_loss": 0.21905580163002014 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.3946741819381714, "learning_rate": 2.8171512790711788e-05, "loss": 0.2778, "step": 10212, "teacher_loss": 0.26478272676467896 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.5840538740158081, "learning_rate": 2.8170425902155025e-05, "loss": 0.6142, "step": 10213, "teacher_loss": 0.6175504922866821 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.37158724665641785, "learning_rate": 2.8169338711639886e-05, "loss": 0.3092, "step": 10214, "teacher_loss": 0.3022826910018921 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.5920960307121277, "learning_rate": 2.816825121919129e-05, "loss": 0.2899, "step": 10215, "teacher_loss": 0.256344735622406 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.23958027362823486, "learning_rate": 2.8167163424834175e-05, "loss": 0.2363, "step": 10216, "teacher_loss": 0.23591157793998718 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.3541507124900818, "learning_rate": 2.816607532859348e-05, "loss": 0.2601, "step": 10217, "teacher_loss": 0.24967412650585175 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.2839756906032562, "learning_rate": 2.8164986930494153e-05, "loss": 0.3072, "step": 10218, "teacher_loss": 0.3097820281982422 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.40960606932640076, "learning_rate": 2.816389823056114e-05, "loss": 0.3108, "step": 10219, "teacher_loss": 0.29984527826309204 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.37976568937301636, "learning_rate": 2.8162809228819417e-05, "loss": 0.2385, "step": 10220, "teacher_loss": 0.22282083332538605 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.5235974788665771, "learning_rate": 2.8161719925293937e-05, "loss": 0.2447, "step": 10221, "teacher_loss": 0.21365785598754883 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.48798346519470215, "learning_rate": 2.8160630320009683e-05, "loss": 0.3752, "step": 10222, "teacher_loss": 0.3626946210861206 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.3554057776927948, "learning_rate": 2.815954041299163e-05, "loss": 0.2251, "step": 10223, "teacher_loss": 0.2106718122959137 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.6373621225357056, "learning_rate": 2.815845020426477e-05, "loss": 0.3036, "step": 10224, "teacher_loss": 0.26646918058395386 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.5369683504104614, "learning_rate": 2.81573596938541e-05, "loss": 0.4047, "step": 10225, "teacher_loss": 0.3900377154350281 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.6793599128723145, "learning_rate": 2.815626888178462e-05, "loss": 0.3922, "step": 10226, "teacher_loss": 0.3602656424045563 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.8496882915496826, "learning_rate": 2.815517776808134e-05, "loss": 0.476, "step": 10227, "teacher_loss": 0.43449491262435913 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.48280957341194153, "learning_rate": 2.8154086352769274e-05, "loss": 0.3022, "step": 10228, "teacher_loss": 0.28212571144104004 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 1.0122978687286377, "learning_rate": 2.8152994635873444e-05, "loss": 0.3084, "step": 10229, "teacher_loss": 0.2302239090204239 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.9756971001625061, "learning_rate": 2.8151902617418886e-05, "loss": 0.4192, "step": 10230, "teacher_loss": 0.35741370916366577 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.5650604963302612, "learning_rate": 2.8150810297430624e-05, "loss": 0.2741, "step": 10231, "teacher_loss": 0.24173066020011902 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.6005864143371582, "learning_rate": 2.814971767593372e-05, "loss": 0.8256, "step": 10232, "teacher_loss": 0.8506519794464111 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.4202248454093933, "learning_rate": 2.814862475295321e-05, "loss": 0.2318, "step": 10233, "teacher_loss": 0.2108844518661499 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 1.152825951576233, "learning_rate": 2.8147531528514155e-05, "loss": 0.3656, "step": 10234, "teacher_loss": 0.27807706594467163 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.41623347997665405, "learning_rate": 2.814643800264162e-05, "loss": 0.2444, "step": 10235, "teacher_loss": 0.22528806328773499 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.5408117175102234, "learning_rate": 2.8145344175360682e-05, "loss": 0.2905, "step": 10236, "teacher_loss": 0.26263225078582764 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.16792520880699158, "learning_rate": 2.814425004669641e-05, "loss": 0.2365, "step": 10237, "teacher_loss": 0.24406728148460388 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.8898427486419678, "learning_rate": 2.8143155616673893e-05, "loss": 0.2851, "step": 10238, "teacher_loss": 0.2179526388645172 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 1.5235463380813599, "learning_rate": 2.8142060885318223e-05, "loss": 0.4215, "step": 10239, "teacher_loss": 0.2990465760231018 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.23764052987098694, "learning_rate": 2.81409658526545e-05, "loss": 0.1375, "step": 10240, "teacher_loss": 0.12635567784309387 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.25337323546409607, "learning_rate": 2.8139870518707824e-05, "loss": 0.2281, "step": 10241, "teacher_loss": 0.22531333565711975 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.2043193280696869, "learning_rate": 2.8138774883503317e-05, "loss": 0.266, "step": 10242, "teacher_loss": 0.2728240489959717 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.14969919621944427, "learning_rate": 2.8137678947066095e-05, "loss": 0.1999, "step": 10243, "teacher_loss": 0.20552149415016174 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.49693363904953003, "learning_rate": 2.8136582709421283e-05, "loss": 0.2759, "step": 10244, "teacher_loss": 0.25134241580963135 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.3156214654445648, "learning_rate": 2.813548617059401e-05, "loss": 0.2117, "step": 10245, "teacher_loss": 0.20010985434055328 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.6361654996871948, "learning_rate": 2.8134389330609424e-05, "loss": 0.2892, "step": 10246, "teacher_loss": 0.25063055753707886 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.35382696986198425, "learning_rate": 2.8133292189492673e-05, "loss": 0.3031, "step": 10247, "teacher_loss": 0.297427237033844 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.30127787590026855, "learning_rate": 2.8132194747268904e-05, "loss": 0.2269, "step": 10248, "teacher_loss": 0.21867021918296814 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.5748451948165894, "learning_rate": 2.8131097003963285e-05, "loss": 0.3392, "step": 10249, "teacher_loss": 0.31306034326553345 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.2904551327228546, "learning_rate": 2.812999895960098e-05, "loss": 0.2205, "step": 10250, "teacher_loss": 0.2127016931772232 }, { "epoch": 1.85, "eval_exact_match": 79.68779564806054, "eval_f1": 87.05700786746246, "step": 10250 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.3238072991371155, "learning_rate": 2.8128900614207162e-05, "loss": 0.3607, "step": 10251, "teacher_loss": 0.36484265327453613 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.7196825742721558, "learning_rate": 2.8127801967807016e-05, "loss": 0.2651, "step": 10252, "teacher_loss": 0.21462179720401764 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.6263768672943115, "learning_rate": 2.8126703020425733e-05, "loss": 0.2882, "step": 10253, "teacher_loss": 0.25067460536956787 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.3905763030052185, "learning_rate": 2.8125603772088504e-05, "loss": 0.2566, "step": 10254, "teacher_loss": 0.2417493760585785 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.3641684651374817, "learning_rate": 2.812450422282053e-05, "loss": 0.3182, "step": 10255, "teacher_loss": 0.31309932470321655 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.7814779281616211, "learning_rate": 2.812340437264703e-05, "loss": 0.4126, "step": 10256, "teacher_loss": 0.37165772914886475 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.45672136545181274, "learning_rate": 2.8122304221593205e-05, "loss": 0.3321, "step": 10257, "teacher_loss": 0.318206787109375 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.4643021523952484, "learning_rate": 2.8121203769684293e-05, "loss": 0.2557, "step": 10258, "teacher_loss": 0.23249977827072144 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 1.0059096813201904, "learning_rate": 2.8120103016945518e-05, "loss": 0.3634, "step": 10259, "teacher_loss": 0.29198941588401794 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.46291807293891907, "learning_rate": 2.8119001963402117e-05, "loss": 0.2846, "step": 10260, "teacher_loss": 0.2648296654224396 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.5385724306106567, "learning_rate": 2.811790060907933e-05, "loss": 0.2761, "step": 10261, "teacher_loss": 0.2469625622034073 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.42845237255096436, "learning_rate": 2.8116798954002417e-05, "loss": 0.2733, "step": 10262, "teacher_loss": 0.25606852769851685 }, { "compression_loss": 0.0, "epoch": 1.85, "label_loss": 0.8055862188339233, "learning_rate": 2.8115696998196627e-05, "loss": 0.3378, "step": 10263, "teacher_loss": 0.2858313322067261 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.28058311343193054, "learning_rate": 2.8114594741687226e-05, "loss": 0.2118, "step": 10264, "teacher_loss": 0.20418499410152435 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.5593300461769104, "learning_rate": 2.811349218449949e-05, "loss": 0.287, "step": 10265, "teacher_loss": 0.2567441165447235 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.7448901534080505, "learning_rate": 2.8112389326658695e-05, "loss": 0.3594, "step": 10266, "teacher_loss": 0.3165847361087799 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.6551687717437744, "learning_rate": 2.811128616819012e-05, "loss": 0.294, "step": 10267, "teacher_loss": 0.25382906198501587 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.45019686222076416, "learning_rate": 2.811018270911907e-05, "loss": 0.3458, "step": 10268, "teacher_loss": 0.3341796100139618 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.44331735372543335, "learning_rate": 2.8109078949470833e-05, "loss": 0.279, "step": 10269, "teacher_loss": 0.2607024610042572 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.412151575088501, "learning_rate": 2.810797488927072e-05, "loss": 0.289, "step": 10270, "teacher_loss": 0.27534863352775574 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.2876285910606384, "learning_rate": 2.8106870528544044e-05, "loss": 0.3257, "step": 10271, "teacher_loss": 0.3298966884613037 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.5396019220352173, "learning_rate": 2.8105765867316122e-05, "loss": 0.37, "step": 10272, "teacher_loss": 0.35120469331741333 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.33282193541526794, "learning_rate": 2.810466090561228e-05, "loss": 0.2422, "step": 10273, "teacher_loss": 0.2321617752313614 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.5569236278533936, "learning_rate": 2.8103555643457855e-05, "loss": 0.2606, "step": 10274, "teacher_loss": 0.22768956422805786 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.5272619724273682, "learning_rate": 2.8102450080878183e-05, "loss": 0.3529, "step": 10275, "teacher_loss": 0.33354583382606506 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.32882049679756165, "learning_rate": 2.8101344217898614e-05, "loss": 0.2434, "step": 10276, "teacher_loss": 0.23395654559135437 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.839247465133667, "learning_rate": 2.8100238054544507e-05, "loss": 0.3454, "step": 10277, "teacher_loss": 0.29049360752105713 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.24745948612689972, "learning_rate": 2.8099131590841213e-05, "loss": 0.2056, "step": 10278, "teacher_loss": 0.20092225074768066 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.618982195854187, "learning_rate": 2.8098024826814108e-05, "loss": 0.2356, "step": 10279, "teacher_loss": 0.19299820065498352 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.2928692102432251, "learning_rate": 2.8096917762488565e-05, "loss": 0.1815, "step": 10280, "teacher_loss": 0.16912904381752014 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.5782037377357483, "learning_rate": 2.809581039788996e-05, "loss": 0.3167, "step": 10281, "teacher_loss": 0.2876846492290497 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.4565497636795044, "learning_rate": 2.8094702733043688e-05, "loss": 0.299, "step": 10282, "teacher_loss": 0.281515508890152 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.34388020634651184, "learning_rate": 2.8093594767975142e-05, "loss": 0.2511, "step": 10283, "teacher_loss": 0.24075892567634583 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.605820894241333, "learning_rate": 2.809248650270972e-05, "loss": 0.2424, "step": 10284, "teacher_loss": 0.20201963186264038 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.2903047800064087, "learning_rate": 2.8091377937272843e-05, "loss": 0.2534, "step": 10285, "teacher_loss": 0.24929940700531006 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.5776563286781311, "learning_rate": 2.809026907168992e-05, "loss": 0.2914, "step": 10286, "teacher_loss": 0.2596386671066284 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.3714195787906647, "learning_rate": 2.808915990598637e-05, "loss": 0.3024, "step": 10287, "teacher_loss": 0.29476410150527954 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.8987783193588257, "learning_rate": 2.8088050440187623e-05, "loss": 0.4274, "step": 10288, "teacher_loss": 0.3749772310256958 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.18082700669765472, "learning_rate": 2.8086940674319128e-05, "loss": 0.2016, "step": 10289, "teacher_loss": 0.20385286211967468 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.3965546488761902, "learning_rate": 2.8085830608406314e-05, "loss": 0.2735, "step": 10290, "teacher_loss": 0.2597719430923462 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.2651168704032898, "learning_rate": 2.808472024247464e-05, "loss": 0.2225, "step": 10291, "teacher_loss": 0.21779808402061462 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.3597174286842346, "learning_rate": 2.808360957654956e-05, "loss": 0.2854, "step": 10292, "teacher_loss": 0.27714797854423523 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.421950101852417, "learning_rate": 2.808249861065654e-05, "loss": 0.2614, "step": 10293, "teacher_loss": 0.2435457557439804 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.5364779233932495, "learning_rate": 2.808138734482105e-05, "loss": 0.3611, "step": 10294, "teacher_loss": 0.34162285923957825 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.7266249656677246, "learning_rate": 2.8080275779068566e-05, "loss": 0.2856, "step": 10295, "teacher_loss": 0.23655295372009277 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 1.0063552856445312, "learning_rate": 2.8079163913424578e-05, "loss": 0.5515, "step": 10296, "teacher_loss": 0.5010131597518921 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.6146499514579773, "learning_rate": 2.8078051747914575e-05, "loss": 0.3196, "step": 10297, "teacher_loss": 0.28685081005096436 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.627341091632843, "learning_rate": 2.8076939282564054e-05, "loss": 0.5015, "step": 10298, "teacher_loss": 0.4875109791755676 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.17753227055072784, "learning_rate": 2.8075826517398523e-05, "loss": 0.2078, "step": 10299, "teacher_loss": 0.21119153499603271 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.12642362713813782, "learning_rate": 2.8074713452443492e-05, "loss": 0.1963, "step": 10300, "teacher_loss": 0.20410653948783875 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.9918332695960999, "learning_rate": 2.8073600087724483e-05, "loss": 0.421, "step": 10301, "teacher_loss": 0.3575218617916107 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.23232416808605194, "learning_rate": 2.807248642326702e-05, "loss": 0.2254, "step": 10302, "teacher_loss": 0.22466540336608887 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.6067183017730713, "learning_rate": 2.807137245909664e-05, "loss": 0.2871, "step": 10303, "teacher_loss": 0.25162273645401 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.8547205924987793, "learning_rate": 2.807025819523887e-05, "loss": 0.4478, "step": 10304, "teacher_loss": 0.40259578824043274 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.5844119787216187, "learning_rate": 2.8069143631719276e-05, "loss": 0.3317, "step": 10305, "teacher_loss": 0.3035754859447479 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.6955540776252747, "learning_rate": 2.8068028768563398e-05, "loss": 0.2897, "step": 10306, "teacher_loss": 0.2446369230747223 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.5403918027877808, "learning_rate": 2.80669136057968e-05, "loss": 0.2281, "step": 10307, "teacher_loss": 0.19342437386512756 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.40683913230895996, "learning_rate": 2.806579814344505e-05, "loss": 0.2769, "step": 10308, "teacher_loss": 0.2624482810497284 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.22389420866966248, "learning_rate": 2.806468238153372e-05, "loss": 0.1865, "step": 10309, "teacher_loss": 0.18238306045532227 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.4098944365978241, "learning_rate": 2.8063566320088398e-05, "loss": 0.2401, "step": 10310, "teacher_loss": 0.22123856842517853 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.718734085559845, "learning_rate": 2.8062449959134663e-05, "loss": 0.2996, "step": 10311, "teacher_loss": 0.2530561685562134 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.9787742495536804, "learning_rate": 2.8061333298698114e-05, "loss": 0.3759, "step": 10312, "teacher_loss": 0.30889958143234253 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.4509371519088745, "learning_rate": 2.8060216338804353e-05, "loss": 0.4063, "step": 10313, "teacher_loss": 0.4012956917285919 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.3239341378211975, "learning_rate": 2.805909907947899e-05, "loss": 0.2405, "step": 10314, "teacher_loss": 0.23124219477176666 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.5805948972702026, "learning_rate": 2.8057981520747632e-05, "loss": 0.269, "step": 10315, "teacher_loss": 0.23441605269908905 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.40246596932411194, "learning_rate": 2.8056863662635912e-05, "loss": 0.3997, "step": 10316, "teacher_loss": 0.39942896366119385 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.20643341541290283, "learning_rate": 2.8055745505169457e-05, "loss": 0.1857, "step": 10317, "teacher_loss": 0.18341004848480225 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.7771695256233215, "learning_rate": 2.80546270483739e-05, "loss": 0.2919, "step": 10318, "teacher_loss": 0.23798736929893494 }, { "compression_loss": 0.0, "epoch": 1.86, "label_loss": 0.44528406858444214, "learning_rate": 2.8053508292274878e-05, "loss": 0.3219, "step": 10319, "teacher_loss": 0.3081396222114563 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.4121386706829071, "learning_rate": 2.8052389236898055e-05, "loss": 0.2661, "step": 10320, "teacher_loss": 0.24985355138778687 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.38992244005203247, "learning_rate": 2.8051269882269076e-05, "loss": 0.2591, "step": 10321, "teacher_loss": 0.24451500177383423 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.6654698848724365, "learning_rate": 2.8050150228413613e-05, "loss": 0.3569, "step": 10322, "teacher_loss": 0.32262516021728516 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.557917594909668, "learning_rate": 2.8049030275357324e-05, "loss": 0.2809, "step": 10323, "teacher_loss": 0.25014495849609375 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.7684891223907471, "learning_rate": 2.8047910023125897e-05, "loss": 0.2665, "step": 10324, "teacher_loss": 0.2106896936893463 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.2600611448287964, "learning_rate": 2.8046789471745012e-05, "loss": 0.2548, "step": 10325, "teacher_loss": 0.25418734550476074 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.4234950542449951, "learning_rate": 2.8045668621240364e-05, "loss": 0.3084, "step": 10326, "teacher_loss": 0.29563868045806885 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.41755688190460205, "learning_rate": 2.8044547471637646e-05, "loss": 0.2866, "step": 10327, "teacher_loss": 0.27202218770980835 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.10755197703838348, "learning_rate": 2.8043426022962563e-05, "loss": 0.1555, "step": 10328, "teacher_loss": 0.16083385050296783 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.11279372125864029, "learning_rate": 2.8042304275240827e-05, "loss": 0.2035, "step": 10329, "teacher_loss": 0.21358585357666016 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.7404637336730957, "learning_rate": 2.8041182228498162e-05, "loss": 0.3086, "step": 10330, "teacher_loss": 0.2605902850627899 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.5024974346160889, "learning_rate": 2.804005988276028e-05, "loss": 0.2821, "step": 10331, "teacher_loss": 0.2576325237751007 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.2890140414237976, "learning_rate": 2.8038937238052926e-05, "loss": 0.2613, "step": 10332, "teacher_loss": 0.2582029700279236 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.35045111179351807, "learning_rate": 2.8037814294401835e-05, "loss": 0.2869, "step": 10333, "teacher_loss": 0.27984681725502014 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.4111517071723938, "learning_rate": 2.803669105183275e-05, "loss": 0.2395, "step": 10334, "teacher_loss": 0.22047272324562073 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.3578011989593506, "learning_rate": 2.8035567510371425e-05, "loss": 0.2691, "step": 10335, "teacher_loss": 0.25926080346107483 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.3235445022583008, "learning_rate": 2.803444367004362e-05, "loss": 0.3337, "step": 10336, "teacher_loss": 0.334883451461792 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.7203150987625122, "learning_rate": 2.80333195308751e-05, "loss": 0.27, "step": 10337, "teacher_loss": 0.2199324667453766 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.8291847705841064, "learning_rate": 2.803219509289164e-05, "loss": 0.4049, "step": 10338, "teacher_loss": 0.35776910185813904 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.3767836093902588, "learning_rate": 2.8031070356119015e-05, "loss": 0.2553, "step": 10339, "teacher_loss": 0.2417478859424591 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.32927972078323364, "learning_rate": 2.8029945320583025e-05, "loss": 0.2349, "step": 10340, "teacher_loss": 0.22436952590942383 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.6173535585403442, "learning_rate": 2.8028819986309443e-05, "loss": 0.3463, "step": 10341, "teacher_loss": 0.3162160813808441 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.2319202423095703, "learning_rate": 2.802769435332409e-05, "loss": 0.1572, "step": 10342, "teacher_loss": 0.14884592592716217 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.37412744760513306, "learning_rate": 2.8026568421652763e-05, "loss": 0.2343, "step": 10343, "teacher_loss": 0.2187473475933075 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.29869046807289124, "learning_rate": 2.8025442191321276e-05, "loss": 0.2118, "step": 10344, "teacher_loss": 0.20211371779441833 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.34756967425346375, "learning_rate": 2.8024315662355455e-05, "loss": 0.2514, "step": 10345, "teacher_loss": 0.24073219299316406 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.38378316164016724, "learning_rate": 2.8023188834781117e-05, "loss": 0.2688, "step": 10346, "teacher_loss": 0.25606077909469604 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.3386090099811554, "learning_rate": 2.802206170862411e-05, "loss": 0.2331, "step": 10347, "teacher_loss": 0.22138527035713196 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.23073986172676086, "learning_rate": 2.802093428391027e-05, "loss": 0.22, "step": 10348, "teacher_loss": 0.21878477931022644 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.20492029190063477, "learning_rate": 2.801980656066545e-05, "loss": 0.2015, "step": 10349, "teacher_loss": 0.20114190876483917 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.8918582797050476, "learning_rate": 2.8018678538915493e-05, "loss": 0.315, "step": 10350, "teacher_loss": 0.25091320276260376 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.8688955903053284, "learning_rate": 2.8017550218686273e-05, "loss": 0.3777, "step": 10351, "teacher_loss": 0.3231067359447479 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.4511251449584961, "learning_rate": 2.8016421600003654e-05, "loss": 0.2717, "step": 10352, "teacher_loss": 0.25173014402389526 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.4114728569984436, "learning_rate": 2.8015292682893514e-05, "loss": 0.2326, "step": 10353, "teacher_loss": 0.21269632875919342 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.3526153564453125, "learning_rate": 2.8014163467381734e-05, "loss": 0.26, "step": 10354, "teacher_loss": 0.24965913593769073 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.4737495183944702, "learning_rate": 2.8013033953494206e-05, "loss": 0.2281, "step": 10355, "teacher_loss": 0.20080244541168213 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.30782222747802734, "learning_rate": 2.8011904141256825e-05, "loss": 0.295, "step": 10356, "teacher_loss": 0.2935648262500763 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.2902483344078064, "learning_rate": 2.8010774030695493e-05, "loss": 0.303, "step": 10357, "teacher_loss": 0.30442625284194946 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.383614182472229, "learning_rate": 2.8009643621836114e-05, "loss": 0.2197, "step": 10358, "teacher_loss": 0.2015208899974823 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.2517194449901581, "learning_rate": 2.800851291470462e-05, "loss": 0.2289, "step": 10359, "teacher_loss": 0.22635729610919952 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.4676363468170166, "learning_rate": 2.8007381909326925e-05, "loss": 0.2179, "step": 10360, "teacher_loss": 0.19012172520160675 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.45767152309417725, "learning_rate": 2.8006250605728954e-05, "loss": 0.2658, "step": 10361, "teacher_loss": 0.2444770336151123 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.6390374302864075, "learning_rate": 2.800511900393666e-05, "loss": 0.3325, "step": 10362, "teacher_loss": 0.29848289489746094 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.2626778185367584, "learning_rate": 2.800398710397598e-05, "loss": 0.1772, "step": 10363, "teacher_loss": 0.16768473386764526 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.8169225454330444, "learning_rate": 2.8002854905872853e-05, "loss": 0.4098, "step": 10364, "teacher_loss": 0.3645349442958832 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.750160276889801, "learning_rate": 2.8001722409653258e-05, "loss": 0.6115, "step": 10365, "teacher_loss": 0.596047043800354 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.4848783612251282, "learning_rate": 2.8000589615343145e-05, "loss": 0.2226, "step": 10366, "teacher_loss": 0.19344952702522278 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.15360905230045319, "learning_rate": 2.799945652296849e-05, "loss": 0.2319, "step": 10367, "teacher_loss": 0.24055379629135132 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.3780812621116638, "learning_rate": 2.799832313255527e-05, "loss": 0.2775, "step": 10368, "teacher_loss": 0.26631176471710205 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.6879922151565552, "learning_rate": 2.799718944412947e-05, "loss": 0.272, "step": 10369, "teacher_loss": 0.22575025260448456 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.6711910367012024, "learning_rate": 2.7996055457717094e-05, "loss": 0.2644, "step": 10370, "teacher_loss": 0.21917986869812012 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.31732892990112305, "learning_rate": 2.799492117334412e-05, "loss": 0.2368, "step": 10371, "teacher_loss": 0.2278173267841339 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.31505274772644043, "learning_rate": 2.7993786591036566e-05, "loss": 0.3732, "step": 10372, "teacher_loss": 0.3796786665916443 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.3936498165130615, "learning_rate": 2.7992651710820444e-05, "loss": 0.2294, "step": 10373, "teacher_loss": 0.21115407347679138 }, { "compression_loss": 0.0, "epoch": 1.87, "label_loss": 0.7516721487045288, "learning_rate": 2.7991516532721777e-05, "loss": 0.4612, "step": 10374, "teacher_loss": 0.42895299196243286 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.3688117265701294, "learning_rate": 2.7990381056766583e-05, "loss": 0.2301, "step": 10375, "teacher_loss": 0.2147057056427002 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.4552571773529053, "learning_rate": 2.7989245282980897e-05, "loss": 0.3089, "step": 10376, "teacher_loss": 0.2926892936229706 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.3523431420326233, "learning_rate": 2.798810921139076e-05, "loss": 0.2262, "step": 10377, "teacher_loss": 0.2121475636959076 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.5056685209274292, "learning_rate": 2.798697284202222e-05, "loss": 0.2151, "step": 10378, "teacher_loss": 0.1828230321407318 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.5542752742767334, "learning_rate": 2.798583617490133e-05, "loss": 0.3108, "step": 10379, "teacher_loss": 0.28379887342453003 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.6121727228164673, "learning_rate": 2.7984699210054153e-05, "loss": 0.3471, "step": 10380, "teacher_loss": 0.31767114996910095 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.3316594660282135, "learning_rate": 2.7983561947506746e-05, "loss": 0.3235, "step": 10381, "teacher_loss": 0.3225858211517334 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.6516517996788025, "learning_rate": 2.7982424387285196e-05, "loss": 0.4255, "step": 10382, "teacher_loss": 0.40033668279647827 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.9195019602775574, "learning_rate": 2.7981286529415576e-05, "loss": 0.3413, "step": 10383, "teacher_loss": 0.2770756483078003 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.5371028184890747, "learning_rate": 2.7980148373923977e-05, "loss": 0.3291, "step": 10384, "teacher_loss": 0.30602073669433594 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.4159952402114868, "learning_rate": 2.7979009920836492e-05, "loss": 0.1905, "step": 10385, "teacher_loss": 0.16549193859100342 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.24010604619979858, "learning_rate": 2.7977871170179225e-05, "loss": 0.2262, "step": 10386, "teacher_loss": 0.2246396243572235 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.3545704185962677, "learning_rate": 2.7976732121978277e-05, "loss": 0.2317, "step": 10387, "teacher_loss": 0.21804791688919067 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.5020953416824341, "learning_rate": 2.797559277625977e-05, "loss": 0.4966, "step": 10388, "teacher_loss": 0.49594593048095703 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.8366658687591553, "learning_rate": 2.7974453133049824e-05, "loss": 0.5642, "step": 10389, "teacher_loss": 0.533894956111908 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.70830237865448, "learning_rate": 2.7973313192374566e-05, "loss": 0.3222, "step": 10390, "teacher_loss": 0.2792561948299408 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.5858169794082642, "learning_rate": 2.7972172954260132e-05, "loss": 0.4356, "step": 10391, "teacher_loss": 0.4188770055770874 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.312599241733551, "learning_rate": 2.797103241873267e-05, "loss": 0.1699, "step": 10392, "teacher_loss": 0.15403355658054352 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.41025346517562866, "learning_rate": 2.7969891585818317e-05, "loss": 0.3853, "step": 10393, "teacher_loss": 0.3824838697910309 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.21359658241271973, "learning_rate": 2.796875045554324e-05, "loss": 0.2845, "step": 10394, "teacher_loss": 0.29233911633491516 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 1.2180202007293701, "learning_rate": 2.7967609027933592e-05, "loss": 0.3831, "step": 10395, "teacher_loss": 0.29035669565200806 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.6336901783943176, "learning_rate": 2.7966467303015554e-05, "loss": 0.2795, "step": 10396, "teacher_loss": 0.24013368785381317 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.4247021973133087, "learning_rate": 2.796532528081529e-05, "loss": 0.2291, "step": 10397, "teacher_loss": 0.20732617378234863 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.4351074695587158, "learning_rate": 2.7964182961358996e-05, "loss": 0.3186, "step": 10398, "teacher_loss": 0.3056785762310028 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.4462539851665497, "learning_rate": 2.796304034467285e-05, "loss": 0.2869, "step": 10399, "teacher_loss": 0.2691980302333832 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.5241062045097351, "learning_rate": 2.796189743078306e-05, "loss": 0.263, "step": 10400, "teacher_loss": 0.23397159576416016 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.4067451059818268, "learning_rate": 2.796075421971582e-05, "loss": 0.3073, "step": 10401, "teacher_loss": 0.29624900221824646 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.28739285469055176, "learning_rate": 2.7959610711497345e-05, "loss": 0.311, "step": 10402, "teacher_loss": 0.31365156173706055 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.31262698769569397, "learning_rate": 2.795846690615385e-05, "loss": 0.2206, "step": 10403, "teacher_loss": 0.2103470265865326 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.5453461408615112, "learning_rate": 2.795732280371156e-05, "loss": 0.3287, "step": 10404, "teacher_loss": 0.30459287762641907 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.42466726899147034, "learning_rate": 2.7956178404196707e-05, "loss": 0.305, "step": 10405, "teacher_loss": 0.2916773557662964 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.6043449640274048, "learning_rate": 2.795503370763553e-05, "loss": 0.3252, "step": 10406, "teacher_loss": 0.2942245304584503 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.31097254157066345, "learning_rate": 2.7953888714054267e-05, "loss": 0.2655, "step": 10407, "teacher_loss": 0.26046860218048096 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.1787864714860916, "learning_rate": 2.7952743423479176e-05, "loss": 0.1822, "step": 10408, "teacher_loss": 0.1826338768005371 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.32338032126426697, "learning_rate": 2.7951597835936514e-05, "loss": 0.2732, "step": 10409, "teacher_loss": 0.26763537526130676 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.35308346152305603, "learning_rate": 2.7950451951452542e-05, "loss": 0.2312, "step": 10410, "teacher_loss": 0.21765124797821045 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.31419676542282104, "learning_rate": 2.7949305770053536e-05, "loss": 0.2341, "step": 10411, "teacher_loss": 0.22517862915992737 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.8434735536575317, "learning_rate": 2.7948159291765773e-05, "loss": 0.3444, "step": 10412, "teacher_loss": 0.2889789342880249 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 1.0540509223937988, "learning_rate": 2.7947012516615533e-05, "loss": 0.4657, "step": 10413, "teacher_loss": 0.40034109354019165 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.7415398955345154, "learning_rate": 2.794586544462912e-05, "loss": 0.2701, "step": 10414, "teacher_loss": 0.21771132946014404 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.416032075881958, "learning_rate": 2.7944718075832823e-05, "loss": 0.2403, "step": 10415, "teacher_loss": 0.22082647681236267 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.13640457391738892, "learning_rate": 2.7943570410252953e-05, "loss": 0.205, "step": 10416, "teacher_loss": 0.21265248954296112 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.3997036814689636, "learning_rate": 2.7942422447915816e-05, "loss": 0.2162, "step": 10417, "teacher_loss": 0.1958284080028534 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.2261110246181488, "learning_rate": 2.794127418884774e-05, "loss": 0.2245, "step": 10418, "teacher_loss": 0.22432559728622437 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.7721251249313354, "learning_rate": 2.7940125633075046e-05, "loss": 0.2641, "step": 10419, "teacher_loss": 0.2077070027589798 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.6084580421447754, "learning_rate": 2.7938976780624066e-05, "loss": 0.285, "step": 10420, "teacher_loss": 0.24908414483070374 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.5722681283950806, "learning_rate": 2.7937827631521137e-05, "loss": 0.2132, "step": 10421, "teacher_loss": 0.17327998578548431 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.5443324446678162, "learning_rate": 2.7936678185792618e-05, "loss": 0.4083, "step": 10422, "teacher_loss": 0.3931610882282257 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.33623749017715454, "learning_rate": 2.7935528443464852e-05, "loss": 0.2541, "step": 10423, "teacher_loss": 0.24501970410346985 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.6410212516784668, "learning_rate": 2.7934378404564197e-05, "loss": 0.5322, "step": 10424, "teacher_loss": 0.5200929045677185 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.5043405294418335, "learning_rate": 2.7933228069117033e-05, "loss": 0.2761, "step": 10425, "teacher_loss": 0.25071409344673157 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.4749714732170105, "learning_rate": 2.793207743714972e-05, "loss": 0.288, "step": 10426, "teacher_loss": 0.2672499418258667 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.45844417810440063, "learning_rate": 2.7930926508688647e-05, "loss": 0.2645, "step": 10427, "teacher_loss": 0.2429664134979248 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.6301224231719971, "learning_rate": 2.7929775283760194e-05, "loss": 0.3092, "step": 10428, "teacher_loss": 0.27352964878082275 }, { "compression_loss": 0.0, "epoch": 1.88, "label_loss": 0.6505568623542786, "learning_rate": 2.792862376239076e-05, "loss": 0.3993, "step": 10429, "teacher_loss": 0.37138664722442627 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.42053818702697754, "learning_rate": 2.7927471944606746e-05, "loss": 0.2485, "step": 10430, "teacher_loss": 0.2294258177280426 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.6608172655105591, "learning_rate": 2.792631983043456e-05, "loss": 0.3771, "step": 10431, "teacher_loss": 0.34556469321250916 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.6221284866333008, "learning_rate": 2.7925167419900613e-05, "loss": 0.3048, "step": 10432, "teacher_loss": 0.2695158123970032 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.8512316942214966, "learning_rate": 2.792401471303133e-05, "loss": 0.4372, "step": 10433, "teacher_loss": 0.3912391662597656 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.6226814985275269, "learning_rate": 2.7922861709853136e-05, "loss": 0.5006, "step": 10434, "teacher_loss": 0.4870661497116089 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.3661653995513916, "learning_rate": 2.792170841039247e-05, "loss": 0.3152, "step": 10435, "teacher_loss": 0.3094923198223114 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 1.2706000804901123, "learning_rate": 2.7920554814675773e-05, "loss": 0.4361, "step": 10436, "teacher_loss": 0.34343206882476807 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.23582251369953156, "learning_rate": 2.7919400922729486e-05, "loss": 0.1961, "step": 10437, "teacher_loss": 0.19164887070655823 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.8399091958999634, "learning_rate": 2.7918246734580076e-05, "loss": 0.3299, "step": 10438, "teacher_loss": 0.27321189641952515 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.15048521757125854, "learning_rate": 2.7917092250253996e-05, "loss": 0.1921, "step": 10439, "teacher_loss": 0.19667935371398926 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.7713289856910706, "learning_rate": 2.791593746977772e-05, "loss": 0.3283, "step": 10440, "teacher_loss": 0.2790627181529999 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.429765522480011, "learning_rate": 2.7914782393177722e-05, "loss": 0.2605, "step": 10441, "teacher_loss": 0.2417023479938507 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.28593653440475464, "learning_rate": 2.791362702048048e-05, "loss": 0.1619, "step": 10442, "teacher_loss": 0.14812614023685455 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.62589031457901, "learning_rate": 2.7912471351712493e-05, "loss": 0.3203, "step": 10443, "teacher_loss": 0.2863495647907257 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.6412720084190369, "learning_rate": 2.791131538690025e-05, "loss": 0.4152, "step": 10444, "teacher_loss": 0.39011329412460327 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.310546875, "learning_rate": 2.7910159126070257e-05, "loss": 0.2509, "step": 10445, "teacher_loss": 0.2442292422056198 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 1.2569811344146729, "learning_rate": 2.790900256924902e-05, "loss": 0.3173, "step": 10446, "teacher_loss": 0.21285909414291382 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.32656437158584595, "learning_rate": 2.7907845716463056e-05, "loss": 0.2384, "step": 10447, "teacher_loss": 0.22856619954109192 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.4360158443450928, "learning_rate": 2.7906688567738892e-05, "loss": 0.2192, "step": 10448, "teacher_loss": 0.19515354931354523 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.1979067325592041, "learning_rate": 2.790553112310305e-05, "loss": 0.2692, "step": 10449, "teacher_loss": 0.27710720896720886 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.44713491201400757, "learning_rate": 2.7904373382582078e-05, "loss": 0.3155, "step": 10450, "teacher_loss": 0.30083832144737244 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.6276953220367432, "learning_rate": 2.7903215346202513e-05, "loss": 0.2888, "step": 10451, "teacher_loss": 0.2511064112186432 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.5988336205482483, "learning_rate": 2.79020570139909e-05, "loss": 0.2958, "step": 10452, "teacher_loss": 0.26213592290878296 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.3847898542881012, "learning_rate": 2.790089838597381e-05, "loss": 0.2245, "step": 10453, "teacher_loss": 0.20670177042484283 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.17812632024288177, "learning_rate": 2.7899739462177795e-05, "loss": 0.1741, "step": 10454, "teacher_loss": 0.1737053096294403 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.3007696866989136, "learning_rate": 2.789858024262943e-05, "loss": 0.2046, "step": 10455, "teacher_loss": 0.1939181089401245 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.41024863719940186, "learning_rate": 2.7897420727355292e-05, "loss": 0.2368, "step": 10456, "teacher_loss": 0.21757131814956665 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.46183979511260986, "learning_rate": 2.7896260916381967e-05, "loss": 0.387, "step": 10457, "teacher_loss": 0.3786503076553345 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.5082702040672302, "learning_rate": 2.7895100809736037e-05, "loss": 0.2701, "step": 10458, "teacher_loss": 0.24366092681884766 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.5223250389099121, "learning_rate": 2.7893940407444115e-05, "loss": 0.2245, "step": 10459, "teacher_loss": 0.19139716029167175 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.4336528182029724, "learning_rate": 2.78927797095328e-05, "loss": 0.2128, "step": 10460, "teacher_loss": 0.18821988999843597 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.12058216333389282, "learning_rate": 2.7891618716028692e-05, "loss": 0.2647, "step": 10461, "teacher_loss": 0.2807001769542694 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.503341555595398, "learning_rate": 2.789045742695842e-05, "loss": 0.3089, "step": 10462, "teacher_loss": 0.28729701042175293 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.2901851534843445, "learning_rate": 2.788929584234861e-05, "loss": 0.2896, "step": 10463, "teacher_loss": 0.28953447937965393 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.13408219814300537, "learning_rate": 2.788813396222589e-05, "loss": 0.1846, "step": 10464, "teacher_loss": 0.1901823729276657 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.6426858305931091, "learning_rate": 2.7886971786616896e-05, "loss": 0.2987, "step": 10465, "teacher_loss": 0.26052331924438477 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.573448657989502, "learning_rate": 2.788580931554828e-05, "loss": 0.3034, "step": 10466, "teacher_loss": 0.27336758375167847 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.22638225555419922, "learning_rate": 2.788464654904669e-05, "loss": 0.2141, "step": 10467, "teacher_loss": 0.21275544166564941 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.4824391305446625, "learning_rate": 2.788348348713878e-05, "loss": 0.2617, "step": 10468, "teacher_loss": 0.23719602823257446 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.6467758417129517, "learning_rate": 2.7882320129851222e-05, "loss": 0.4914, "step": 10469, "teacher_loss": 0.4741586446762085 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.4561729431152344, "learning_rate": 2.788115647721069e-05, "loss": 0.3009, "step": 10470, "teacher_loss": 0.28369176387786865 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.32698681950569153, "learning_rate": 2.7879992529243853e-05, "loss": 0.2066, "step": 10471, "teacher_loss": 0.1932152360677719 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.30247852206230164, "learning_rate": 2.787882828597741e-05, "loss": 0.2212, "step": 10472, "teacher_loss": 0.2121918797492981 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.6278198957443237, "learning_rate": 2.7877663747438045e-05, "loss": 0.2772, "step": 10473, "teacher_loss": 0.23820774257183075 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.2564704120159149, "learning_rate": 2.787649891365246e-05, "loss": 0.338, "step": 10474, "teacher_loss": 0.34708666801452637 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.2450713813304901, "learning_rate": 2.787533378464736e-05, "loss": 0.3123, "step": 10475, "teacher_loss": 0.31975603103637695 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.3978217840194702, "learning_rate": 2.7874168360449457e-05, "loss": 0.304, "step": 10476, "teacher_loss": 0.29355770349502563 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.5270134210586548, "learning_rate": 2.7873002641085476e-05, "loss": 0.2369, "step": 10477, "teacher_loss": 0.20466457307338715 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.5559606552124023, "learning_rate": 2.7871836626582138e-05, "loss": 0.3547, "step": 10478, "teacher_loss": 0.3322896361351013 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.9896951913833618, "learning_rate": 2.7870670316966175e-05, "loss": 0.3305, "step": 10479, "teacher_loss": 0.25729668140411377 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.3898169994354248, "learning_rate": 2.7869503712264333e-05, "loss": 0.2052, "step": 10480, "teacher_loss": 0.1846565455198288 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.30877548456192017, "learning_rate": 2.7868336812503355e-05, "loss": 0.1699, "step": 10481, "teacher_loss": 0.1544307917356491 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.6854501366615295, "learning_rate": 2.7867169617709997e-05, "loss": 0.3411, "step": 10482, "teacher_loss": 0.30285215377807617 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.4211253523826599, "learning_rate": 2.7866002127911017e-05, "loss": 0.2612, "step": 10483, "teacher_loss": 0.24343039095401764 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.7201869487762451, "learning_rate": 2.786483434313318e-05, "loss": 0.3114, "step": 10484, "teacher_loss": 0.26598456501960754 }, { "compression_loss": 0.0, "epoch": 1.89, "label_loss": 0.2890753746032715, "learning_rate": 2.7863666263403265e-05, "loss": 0.2693, "step": 10485, "teacher_loss": 0.2670706808567047 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.47620460391044617, "learning_rate": 2.7862497888748047e-05, "loss": 0.2746, "step": 10486, "teacher_loss": 0.25219297409057617 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.5559215545654297, "learning_rate": 2.7861329219194315e-05, "loss": 0.4046, "step": 10487, "teacher_loss": 0.38774412870407104 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.39769506454467773, "learning_rate": 2.786016025476887e-05, "loss": 0.1936, "step": 10488, "teacher_loss": 0.1709231436252594 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.33549511432647705, "learning_rate": 2.7858990995498505e-05, "loss": 0.2337, "step": 10489, "teacher_loss": 0.222381591796875 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.3062931299209595, "learning_rate": 2.7857821441410028e-05, "loss": 0.3508, "step": 10490, "teacher_loss": 0.35575586557388306 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.22177183628082275, "learning_rate": 2.7856651592530256e-05, "loss": 0.1711, "step": 10491, "teacher_loss": 0.16542232036590576 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.32008400559425354, "learning_rate": 2.7855481448886007e-05, "loss": 0.243, "step": 10492, "teacher_loss": 0.23448795080184937 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.5326224565505981, "learning_rate": 2.7854311010504115e-05, "loss": 0.3242, "step": 10493, "teacher_loss": 0.301089882850647 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.40941372513771057, "learning_rate": 2.785314027741141e-05, "loss": 0.2558, "step": 10494, "teacher_loss": 0.2386990785598755 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.3746057152748108, "learning_rate": 2.785196924963473e-05, "loss": 0.328, "step": 10495, "teacher_loss": 0.322842538356781 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.7561874389648438, "learning_rate": 2.785079792720093e-05, "loss": 0.3341, "step": 10496, "teacher_loss": 0.2872387766838074 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.31002944707870483, "learning_rate": 2.7849626310136865e-05, "loss": 0.2158, "step": 10497, "teacher_loss": 0.20538225769996643 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.21120533347129822, "learning_rate": 2.784845439846939e-05, "loss": 0.2138, "step": 10498, "teacher_loss": 0.21411120891571045 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.5520175695419312, "learning_rate": 2.7847282192225377e-05, "loss": 0.2445, "step": 10499, "teacher_loss": 0.21038463711738586 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.437430739402771, "learning_rate": 2.7846109691431706e-05, "loss": 0.2297, "step": 10500, "teacher_loss": 0.2066144049167633 }, { "epoch": 1.9, "eval_exact_match": 79.29990539262063, "eval_f1": 86.71744553450758, "step": 10500 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.8003648519515991, "learning_rate": 2.784493689611525e-05, "loss": 0.3126, "step": 10501, "teacher_loss": 0.2583557963371277 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.8878642916679382, "learning_rate": 2.7843763806302905e-05, "loss": 0.3222, "step": 10502, "teacher_loss": 0.2593066096305847 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.13957646489143372, "learning_rate": 2.784259042202156e-05, "loss": 0.2431, "step": 10503, "teacher_loss": 0.25465136766433716 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.7951029539108276, "learning_rate": 2.7841416743298124e-05, "loss": 0.3199, "step": 10504, "teacher_loss": 0.2671493887901306 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.45762503147125244, "learning_rate": 2.78402427701595e-05, "loss": 0.195, "step": 10505, "teacher_loss": 0.16583159565925598 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.45093607902526855, "learning_rate": 2.7839068502632612e-05, "loss": 0.2746, "step": 10506, "teacher_loss": 0.2549722492694855 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.3662753403186798, "learning_rate": 2.783789394074437e-05, "loss": 0.2446, "step": 10507, "teacher_loss": 0.23113563656806946 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.6336041688919067, "learning_rate": 2.7836719084521714e-05, "loss": 0.3977, "step": 10508, "teacher_loss": 0.37149134278297424 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.1949794888496399, "learning_rate": 2.7835543933991575e-05, "loss": 0.2921, "step": 10509, "teacher_loss": 0.30288100242614746 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.32303255796432495, "learning_rate": 2.7834368489180895e-05, "loss": 0.3329, "step": 10510, "teacher_loss": 0.3340139389038086 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.3003106713294983, "learning_rate": 2.7833192750116628e-05, "loss": 0.2947, "step": 10511, "teacher_loss": 0.29403382539749146 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.16023391485214233, "learning_rate": 2.7832016716825722e-05, "loss": 0.2101, "step": 10512, "teacher_loss": 0.21561364829540253 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.2220589518547058, "learning_rate": 2.7830840389335148e-05, "loss": 0.2487, "step": 10513, "teacher_loss": 0.2516571283340454 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.5362939834594727, "learning_rate": 2.7829663767671873e-05, "loss": 0.2833, "step": 10514, "teacher_loss": 0.25518855452537537 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.5730119943618774, "learning_rate": 2.7828486851862873e-05, "loss": 0.2946, "step": 10515, "teacher_loss": 0.2636498808860779 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.5284982919692993, "learning_rate": 2.7827309641935132e-05, "loss": 0.2796, "step": 10516, "teacher_loss": 0.25189077854156494 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.6837352514266968, "learning_rate": 2.782613213791564e-05, "loss": 0.3407, "step": 10517, "teacher_loss": 0.3025929629802704 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.8064262866973877, "learning_rate": 2.782495433983139e-05, "loss": 0.3103, "step": 10518, "teacher_loss": 0.25512173771858215 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.6300204396247864, "learning_rate": 2.7823776247709392e-05, "loss": 0.2524, "step": 10519, "teacher_loss": 0.21046525239944458 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.2278192937374115, "learning_rate": 2.7822597861576647e-05, "loss": 0.1827, "step": 10520, "teacher_loss": 0.17764177918434143 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.3098452091217041, "learning_rate": 2.782141918146018e-05, "loss": 0.1461, "step": 10521, "teacher_loss": 0.1278894990682602 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.26042449474334717, "learning_rate": 2.7820240207387016e-05, "loss": 0.2112, "step": 10522, "teacher_loss": 0.20569732785224915 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.27227064967155457, "learning_rate": 2.7819060939384174e-05, "loss": 0.1871, "step": 10523, "teacher_loss": 0.17766734957695007 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.4729669392108917, "learning_rate": 2.7817881377478703e-05, "loss": 0.3574, "step": 10524, "teacher_loss": 0.34455347061157227 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.428869366645813, "learning_rate": 2.781670152169764e-05, "loss": 0.3052, "step": 10525, "teacher_loss": 0.2914574146270752 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.9285493493080139, "learning_rate": 2.7815521372068037e-05, "loss": 0.4236, "step": 10526, "teacher_loss": 0.36745762825012207 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.41628456115722656, "learning_rate": 2.7814340928616953e-05, "loss": 0.3712, "step": 10527, "teacher_loss": 0.3662249445915222 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.5386245250701904, "learning_rate": 2.781316019137145e-05, "loss": 0.2364, "step": 10528, "teacher_loss": 0.20283028483390808 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.2850113809108734, "learning_rate": 2.78119791603586e-05, "loss": 0.249, "step": 10529, "teacher_loss": 0.24501727521419525 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 1.0109288692474365, "learning_rate": 2.781079783560548e-05, "loss": 0.4834, "step": 10530, "teacher_loss": 0.42484021186828613 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.5316810607910156, "learning_rate": 2.7809616217139176e-05, "loss": 0.2477, "step": 10531, "teacher_loss": 0.21614117920398712 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.9535702466964722, "learning_rate": 2.7808434304986775e-05, "loss": 0.3876, "step": 10532, "teacher_loss": 0.3247010409832001 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.33875399827957153, "learning_rate": 2.7807252099175377e-05, "loss": 0.2288, "step": 10533, "teacher_loss": 0.21658799052238464 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.35775652527809143, "learning_rate": 2.7806069599732086e-05, "loss": 0.2503, "step": 10534, "teacher_loss": 0.2383839190006256 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.5329631567001343, "learning_rate": 2.7804886806684015e-05, "loss": 0.2882, "step": 10535, "teacher_loss": 0.26096785068511963 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.9603796005249023, "learning_rate": 2.780370372005828e-05, "loss": 0.4191, "step": 10536, "teacher_loss": 0.3589169383049011 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.48883798718452454, "learning_rate": 2.780252033988201e-05, "loss": 0.3499, "step": 10537, "teacher_loss": 0.33448800444602966 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.5582314729690552, "learning_rate": 2.780133666618233e-05, "loss": 0.2743, "step": 10538, "teacher_loss": 0.24276627600193024 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.6345632672309875, "learning_rate": 2.7800152698986378e-05, "loss": 0.2624, "step": 10539, "teacher_loss": 0.22107091546058655 }, { "compression_loss": 0.0, "epoch": 1.9, "label_loss": 0.3248680830001831, "learning_rate": 2.7798968438321307e-05, "loss": 0.2283, "step": 10540, "teacher_loss": 0.21760046482086182 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.40187469124794006, "learning_rate": 2.7797783884214258e-05, "loss": 0.2637, "step": 10541, "teacher_loss": 0.24838611483573914 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.2949375510215759, "learning_rate": 2.7796599036692398e-05, "loss": 0.3876, "step": 10542, "teacher_loss": 0.3979080319404602 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.4472096264362335, "learning_rate": 2.7795413895782885e-05, "loss": 0.2818, "step": 10543, "teacher_loss": 0.26337242126464844 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.5137739777565002, "learning_rate": 2.7794228461512897e-05, "loss": 0.4909, "step": 10544, "teacher_loss": 0.48834359645843506 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.23226553201675415, "learning_rate": 2.7793042733909608e-05, "loss": 0.2673, "step": 10545, "teacher_loss": 0.2712154984474182 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.9990419149398804, "learning_rate": 2.7791856713000202e-05, "loss": 0.4343, "step": 10546, "teacher_loss": 0.3715938925743103 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.45345908403396606, "learning_rate": 2.7790670398811876e-05, "loss": 0.3069, "step": 10547, "teacher_loss": 0.290637731552124 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.2902598977088928, "learning_rate": 2.778948379137183e-05, "loss": 0.2535, "step": 10548, "teacher_loss": 0.24943237006664276 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.7131874561309814, "learning_rate": 2.7788296890707255e-05, "loss": 0.4209, "step": 10549, "teacher_loss": 0.38841935992240906 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.28807175159454346, "learning_rate": 2.7787109696845385e-05, "loss": 0.2219, "step": 10550, "teacher_loss": 0.21449819207191467 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.6555964946746826, "learning_rate": 2.778592220981342e-05, "loss": 0.3064, "step": 10551, "teacher_loss": 0.26759546995162964 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.35463422536849976, "learning_rate": 2.7784734429638595e-05, "loss": 0.2605, "step": 10552, "teacher_loss": 0.25004664063453674 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.4276130795478821, "learning_rate": 2.778354635634814e-05, "loss": 0.2247, "step": 10553, "teacher_loss": 0.20213119685649872 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.5391087532043457, "learning_rate": 2.7782357989969296e-05, "loss": 0.2347, "step": 10554, "teacher_loss": 0.20087826251983643 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.30259203910827637, "learning_rate": 2.7781169330529308e-05, "loss": 0.2255, "step": 10555, "teacher_loss": 0.2168789505958557 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.5337007641792297, "learning_rate": 2.7779980378055423e-05, "loss": 0.298, "step": 10556, "teacher_loss": 0.27177825570106506 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.4964728057384491, "learning_rate": 2.7778791132574908e-05, "loss": 0.2218, "step": 10557, "teacher_loss": 0.19133460521697998 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.6402260065078735, "learning_rate": 2.7777601594115024e-05, "loss": 0.282, "step": 10558, "teacher_loss": 0.2422443926334381 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 1.1095409393310547, "learning_rate": 2.777641176270304e-05, "loss": 0.3286, "step": 10559, "teacher_loss": 0.24179603159427643 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.8538591861724854, "learning_rate": 2.7775221638366247e-05, "loss": 0.4287, "step": 10560, "teacher_loss": 0.3814762234687805 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.4244820475578308, "learning_rate": 2.777403122113192e-05, "loss": 0.2989, "step": 10561, "teacher_loss": 0.28497397899627686 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.5642846822738647, "learning_rate": 2.7772840511027356e-05, "loss": 0.2683, "step": 10562, "teacher_loss": 0.23545873165130615 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.42701101303100586, "learning_rate": 2.7771649508079853e-05, "loss": 0.2681, "step": 10563, "teacher_loss": 0.2504361867904663 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.5108298063278198, "learning_rate": 2.7770458212316723e-05, "loss": 0.4661, "step": 10564, "teacher_loss": 0.46109727025032043 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.2883630096912384, "learning_rate": 2.776926662376527e-05, "loss": 0.3397, "step": 10565, "teacher_loss": 0.34536051750183105 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.32484641671180725, "learning_rate": 2.7768074742452816e-05, "loss": 0.2358, "step": 10566, "teacher_loss": 0.22593890130519867 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.4530596435070038, "learning_rate": 2.776688256840669e-05, "loss": 0.3108, "step": 10567, "teacher_loss": 0.29496753215789795 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.47237902879714966, "learning_rate": 2.776569010165423e-05, "loss": 0.5336, "step": 10568, "teacher_loss": 0.540449857711792 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.3347229063510895, "learning_rate": 2.7764497342222758e-05, "loss": 0.5179, "step": 10569, "teacher_loss": 0.5383025407791138 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.34465712308883667, "learning_rate": 2.776330429013964e-05, "loss": 0.2613, "step": 10570, "teacher_loss": 0.25205036997795105 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.5259889364242554, "learning_rate": 2.7762110945432223e-05, "loss": 0.267, "step": 10571, "teacher_loss": 0.23825687170028687 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.6760830879211426, "learning_rate": 2.776091730812786e-05, "loss": 0.33, "step": 10572, "teacher_loss": 0.29150718450546265 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.5438052415847778, "learning_rate": 2.775972337825392e-05, "loss": 0.2747, "step": 10573, "teacher_loss": 0.24476662278175354 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.22737932205200195, "learning_rate": 2.775852915583778e-05, "loss": 0.1768, "step": 10574, "teacher_loss": 0.17114558815956116 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.4441859722137451, "learning_rate": 2.7757334640906825e-05, "loss": 0.4927, "step": 10575, "teacher_loss": 0.4981132447719574 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.386456161737442, "learning_rate": 2.775613983348843e-05, "loss": 0.2322, "step": 10576, "teacher_loss": 0.21507391333580017 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.3415631651878357, "learning_rate": 2.7754944733609995e-05, "loss": 0.2262, "step": 10577, "teacher_loss": 0.21341730654239655 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.3886233866214752, "learning_rate": 2.7753749341298915e-05, "loss": 0.2537, "step": 10578, "teacher_loss": 0.23874206840991974 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.6054654121398926, "learning_rate": 2.7752553656582604e-05, "loss": 0.2863, "step": 10579, "teacher_loss": 0.25084584951400757 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.45179420709609985, "learning_rate": 2.775135767948847e-05, "loss": 0.4904, "step": 10580, "teacher_loss": 0.49467790126800537 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.5903551578521729, "learning_rate": 2.775016141004394e-05, "loss": 0.3082, "step": 10581, "teacher_loss": 0.27683669328689575 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.2951371669769287, "learning_rate": 2.774896484827643e-05, "loss": 0.2127, "step": 10582, "teacher_loss": 0.20355471968650818 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 1.4384177923202515, "learning_rate": 2.774776799421338e-05, "loss": 0.6478, "step": 10583, "teacher_loss": 0.5599298477172852 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.43202030658721924, "learning_rate": 2.7746570847882234e-05, "loss": 0.3099, "step": 10584, "teacher_loss": 0.29634588956832886 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.6725959777832031, "learning_rate": 2.774537340931043e-05, "loss": 0.2896, "step": 10585, "teacher_loss": 0.24701957404613495 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.9345403909683228, "learning_rate": 2.7744175678525425e-05, "loss": 0.3582, "step": 10586, "teacher_loss": 0.2941203713417053 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.5127602815628052, "learning_rate": 2.7742977655554684e-05, "loss": 0.2226, "step": 10587, "teacher_loss": 0.19034329056739807 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.2002761960029602, "learning_rate": 2.774177934042567e-05, "loss": 0.2174, "step": 10588, "teacher_loss": 0.21926885843276978 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 1.3526737689971924, "learning_rate": 2.774058073316586e-05, "loss": 0.4596, "step": 10589, "teacher_loss": 0.3604092001914978 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.1483670473098755, "learning_rate": 2.7739381833802725e-05, "loss": 0.2329, "step": 10590, "teacher_loss": 0.24230796098709106 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.795592188835144, "learning_rate": 2.7738182642363765e-05, "loss": 0.4822, "step": 10591, "teacher_loss": 0.44743263721466064 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.323878675699234, "learning_rate": 2.7736983158876468e-05, "loss": 0.2421, "step": 10592, "teacher_loss": 0.23298680782318115 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.38348716497421265, "learning_rate": 2.7735783383368335e-05, "loss": 0.3056, "step": 10593, "teacher_loss": 0.29693886637687683 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.6583057641983032, "learning_rate": 2.7734583315866874e-05, "loss": 0.3294, "step": 10594, "teacher_loss": 0.29281747341156006 }, { "compression_loss": 0.0, "epoch": 1.91, "label_loss": 0.26391372084617615, "learning_rate": 2.7733382956399594e-05, "loss": 0.2102, "step": 10595, "teacher_loss": 0.20423074066638947 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.657230019569397, "learning_rate": 2.773218230499402e-05, "loss": 0.3208, "step": 10596, "teacher_loss": 0.2834717333316803 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.4997221827507019, "learning_rate": 2.773098136167768e-05, "loss": 0.2627, "step": 10597, "teacher_loss": 0.23637765645980835 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.20968911051750183, "learning_rate": 2.7729780126478108e-05, "loss": 0.2646, "step": 10598, "teacher_loss": 0.270747572183609 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.5338220000267029, "learning_rate": 2.772857859942284e-05, "loss": 0.2717, "step": 10599, "teacher_loss": 0.24255669116973877 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.9986376762390137, "learning_rate": 2.7727376780539427e-05, "loss": 0.3297, "step": 10600, "teacher_loss": 0.25542736053466797 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.38344448804855347, "learning_rate": 2.7726174669855428e-05, "loss": 0.3788, "step": 10601, "teacher_loss": 0.3783177137374878 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.2888924777507782, "learning_rate": 2.7724972267398398e-05, "loss": 0.2882, "step": 10602, "teacher_loss": 0.2880859375 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.250144898891449, "learning_rate": 2.7723769573195902e-05, "loss": 0.2802, "step": 10603, "teacher_loss": 0.28351420164108276 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.4528656303882599, "learning_rate": 2.7722566587275516e-05, "loss": 0.6465, "step": 10604, "teacher_loss": 0.6680537462234497 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.7550205588340759, "learning_rate": 2.7721363309664824e-05, "loss": 0.5203, "step": 10605, "teacher_loss": 0.49422335624694824 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.2732846736907959, "learning_rate": 2.772015974039141e-05, "loss": 0.1851, "step": 10606, "teacher_loss": 0.17526564002037048 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.41088151931762695, "learning_rate": 2.7718955879482878e-05, "loss": 0.2192, "step": 10607, "teacher_loss": 0.19792217016220093 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.4332578778266907, "learning_rate": 2.7717751726966817e-05, "loss": 0.2174, "step": 10608, "teacher_loss": 0.19336655735969543 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.7957656383514404, "learning_rate": 2.7716547282870837e-05, "loss": 0.2302, "step": 10609, "teacher_loss": 0.16738399863243103 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.43560680747032166, "learning_rate": 2.7715342547222556e-05, "loss": 0.3217, "step": 10610, "teacher_loss": 0.3090146780014038 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.2964370846748352, "learning_rate": 2.7714137520049594e-05, "loss": 0.2103, "step": 10611, "teacher_loss": 0.20075857639312744 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.6856546401977539, "learning_rate": 2.7712932201379574e-05, "loss": 0.3053, "step": 10612, "teacher_loss": 0.2630848288536072 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.4563559293746948, "learning_rate": 2.7711726591240133e-05, "loss": 0.3349, "step": 10613, "teacher_loss": 0.32137441635131836 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.4271870255470276, "learning_rate": 2.7710520689658918e-05, "loss": 0.2514, "step": 10614, "teacher_loss": 0.23182927072048187 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.2995910346508026, "learning_rate": 2.770931449666357e-05, "loss": 0.2308, "step": 10615, "teacher_loss": 0.22319768369197845 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.8412692546844482, "learning_rate": 2.7708108012281746e-05, "loss": 0.251, "step": 10616, "teacher_loss": 0.18543805181980133 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.38991835713386536, "learning_rate": 2.7706901236541103e-05, "loss": 0.2356, "step": 10617, "teacher_loss": 0.21850663423538208 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.21451202034950256, "learning_rate": 2.7705694169469312e-05, "loss": 0.1783, "step": 10618, "teacher_loss": 0.1742343008518219 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.6568318605422974, "learning_rate": 2.770448681109405e-05, "loss": 0.3313, "step": 10619, "teacher_loss": 0.2951800227165222 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.6399763822555542, "learning_rate": 2.7703279161442994e-05, "loss": 0.2687, "step": 10620, "teacher_loss": 0.227428138256073 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.6078928709030151, "learning_rate": 2.7702071220543833e-05, "loss": 0.368, "step": 10621, "teacher_loss": 0.3413432240486145 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.15240268409252167, "learning_rate": 2.770086298842426e-05, "loss": 0.202, "step": 10622, "teacher_loss": 0.20754998922348022 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 1.0056992769241333, "learning_rate": 2.7699654465111984e-05, "loss": 0.4125, "step": 10623, "teacher_loss": 0.34654057025909424 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.4882792830467224, "learning_rate": 2.7698445650634703e-05, "loss": 0.3063, "step": 10624, "teacher_loss": 0.2860836982727051 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.364740252494812, "learning_rate": 2.7697236545020133e-05, "loss": 0.2484, "step": 10625, "teacher_loss": 0.23552259802818298 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.2086329311132431, "learning_rate": 2.7696027148296e-05, "loss": 0.2303, "step": 10626, "teacher_loss": 0.23269705474376678 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.5273315906524658, "learning_rate": 2.769481746049003e-05, "loss": 0.3878, "step": 10627, "teacher_loss": 0.3722783029079437 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.5774461627006531, "learning_rate": 2.7693607481629955e-05, "loss": 0.4907, "step": 10628, "teacher_loss": 0.4810274839401245 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.5860852599143982, "learning_rate": 2.7692397211743517e-05, "loss": 0.4198, "step": 10629, "teacher_loss": 0.40136969089508057 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.5135939717292786, "learning_rate": 2.7691186650858465e-05, "loss": 0.3206, "step": 10630, "teacher_loss": 0.29913073778152466 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.6275243759155273, "learning_rate": 2.7689975799002556e-05, "loss": 0.4623, "step": 10631, "teacher_loss": 0.44389382004737854 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.13481444120407104, "learning_rate": 2.7688764656203546e-05, "loss": 0.2235, "step": 10632, "teacher_loss": 0.23339098691940308 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.26117998361587524, "learning_rate": 2.7687553222489206e-05, "loss": 0.1834, "step": 10633, "teacher_loss": 0.17475026845932007 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.5900856256484985, "learning_rate": 2.7686341497887306e-05, "loss": 0.288, "step": 10634, "teacher_loss": 0.2544292211532593 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.18976476788520813, "learning_rate": 2.7685129482425636e-05, "loss": 0.2665, "step": 10635, "teacher_loss": 0.2750202417373657 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.29199349880218506, "learning_rate": 2.7683917176131978e-05, "loss": 0.1813, "step": 10636, "teacher_loss": 0.16899548470973969 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.1816972941160202, "learning_rate": 2.7682704579034128e-05, "loss": 0.1943, "step": 10637, "teacher_loss": 0.19566355645656586 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.27330583333969116, "learning_rate": 2.7681491691159882e-05, "loss": 0.2263, "step": 10638, "teacher_loss": 0.2211175262928009 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.23129123449325562, "learning_rate": 2.7680278512537058e-05, "loss": 0.2263, "step": 10639, "teacher_loss": 0.22579103708267212 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.8670210242271423, "learning_rate": 2.7679065043193464e-05, "loss": 0.4133, "step": 10640, "teacher_loss": 0.36293596029281616 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.4552653729915619, "learning_rate": 2.767785128315692e-05, "loss": 0.2333, "step": 10641, "teacher_loss": 0.20867277681827545 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.49822381138801575, "learning_rate": 2.767663723245526e-05, "loss": 0.2376, "step": 10642, "teacher_loss": 0.20864242315292358 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.9551030993461609, "learning_rate": 2.7675422891116316e-05, "loss": 0.518, "step": 10643, "teacher_loss": 0.46938616037368774 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.5787302255630493, "learning_rate": 2.767420825916792e-05, "loss": 0.2861, "step": 10644, "teacher_loss": 0.25359150767326355 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.13817007839679718, "learning_rate": 2.7672993336637936e-05, "loss": 0.1637, "step": 10645, "teacher_loss": 0.16652154922485352 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.7598130702972412, "learning_rate": 2.7671778123554207e-05, "loss": 0.2883, "step": 10646, "teacher_loss": 0.23592320084571838 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.9243156313896179, "learning_rate": 2.7670562619944598e-05, "loss": 0.3948, "step": 10647, "teacher_loss": 0.33591240644454956 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.16597387194633484, "learning_rate": 2.7669346825836973e-05, "loss": 0.1757, "step": 10648, "teacher_loss": 0.17673049867153168 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 1.1268455982208252, "learning_rate": 2.7668130741259216e-05, "loss": 0.5314, "step": 10649, "teacher_loss": 0.46523338556289673 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.38264861702919006, "learning_rate": 2.76669143662392e-05, "loss": 0.1919, "step": 10650, "teacher_loss": 0.17071633040905 }, { "compression_loss": 0.0, "epoch": 1.92, "label_loss": 0.7397134304046631, "learning_rate": 2.766569770080481e-05, "loss": 0.2938, "step": 10651, "teacher_loss": 0.24421042203903198 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.32062312960624695, "learning_rate": 2.7664480744983954e-05, "loss": 0.2378, "step": 10652, "teacher_loss": 0.2285531461238861 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.1934809535741806, "learning_rate": 2.766326349880452e-05, "loss": 0.3521, "step": 10653, "teacher_loss": 0.36968737840652466 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.7182405591011047, "learning_rate": 2.766204596229442e-05, "loss": 0.3669, "step": 10654, "teacher_loss": 0.3278920650482178 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.5720716118812561, "learning_rate": 2.766082813548157e-05, "loss": 0.2459, "step": 10655, "teacher_loss": 0.20968562364578247 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.4730564057826996, "learning_rate": 2.765961001839389e-05, "loss": 0.2883, "step": 10656, "teacher_loss": 0.2678139805793762 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.7037424445152283, "learning_rate": 2.7658391611059307e-05, "loss": 0.2993, "step": 10657, "teacher_loss": 0.2543885409832001 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.6151723861694336, "learning_rate": 2.7657172913505755e-05, "loss": 0.2249, "step": 10658, "teacher_loss": 0.1815710812807083 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.27467262744903564, "learning_rate": 2.765595392576118e-05, "loss": 0.1838, "step": 10659, "teacher_loss": 0.1737385243177414 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.6059149503707886, "learning_rate": 2.7654734647853523e-05, "loss": 0.3491, "step": 10660, "teacher_loss": 0.3205581307411194 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.33769217133522034, "learning_rate": 2.7653515079810744e-05, "loss": 0.2041, "step": 10661, "teacher_loss": 0.18930456042289734 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.42496931552886963, "learning_rate": 2.7652295221660797e-05, "loss": 0.2674, "step": 10662, "teacher_loss": 0.249863862991333 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.7890260815620422, "learning_rate": 2.7651075073431656e-05, "loss": 0.4072, "step": 10663, "teacher_loss": 0.3647833466529846 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.4786689281463623, "learning_rate": 2.7649854635151296e-05, "loss": 0.2214, "step": 10664, "teacher_loss": 0.1928357630968094 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.39120781421661377, "learning_rate": 2.7648633906847692e-05, "loss": 0.2, "step": 10665, "teacher_loss": 0.1787540316581726 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.49346041679382324, "learning_rate": 2.764741288854884e-05, "loss": 0.2902, "step": 10666, "teacher_loss": 0.2676636576652527 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.3133053481578827, "learning_rate": 2.7646191580282724e-05, "loss": 0.4404, "step": 10667, "teacher_loss": 0.454497367143631 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.2956196069717407, "learning_rate": 2.7644969982077354e-05, "loss": 0.1966, "step": 10668, "teacher_loss": 0.18561431765556335 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.2704859972000122, "learning_rate": 2.764374809396073e-05, "loss": 0.1823, "step": 10669, "teacher_loss": 0.17248108983039856 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.27046912908554077, "learning_rate": 2.764252591596087e-05, "loss": 0.2743, "step": 10670, "teacher_loss": 0.27475133538246155 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.38920360803604126, "learning_rate": 2.76413034481058e-05, "loss": 0.2646, "step": 10671, "teacher_loss": 0.25073009729385376 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.6357411742210388, "learning_rate": 2.764008069042354e-05, "loss": 0.3259, "step": 10672, "teacher_loss": 0.2915067672729492 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 1.2737022638320923, "learning_rate": 2.7638857642942127e-05, "loss": 0.3027, "step": 10673, "teacher_loss": 0.19482949376106262 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.5013376474380493, "learning_rate": 2.76376343056896e-05, "loss": 0.2423, "step": 10674, "teacher_loss": 0.21347010135650635 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.6647286415100098, "learning_rate": 2.7636410678694008e-05, "loss": 0.4326, "step": 10675, "teacher_loss": 0.40679818391799927 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.21355105936527252, "learning_rate": 2.7635186761983407e-05, "loss": 0.2713, "step": 10676, "teacher_loss": 0.2776651978492737 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.5686591863632202, "learning_rate": 2.7633962555585857e-05, "loss": 0.4565, "step": 10677, "teacher_loss": 0.4439891278743744 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.1340579390525818, "learning_rate": 2.7632738059529423e-05, "loss": 0.1565, "step": 10678, "teacher_loss": 0.1589725911617279 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 1.0708719491958618, "learning_rate": 2.7631513273842178e-05, "loss": 0.3542, "step": 10679, "teacher_loss": 0.2745916247367859 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.8099985122680664, "learning_rate": 2.7630288198552206e-05, "loss": 0.3889, "step": 10680, "teacher_loss": 0.34211480617523193 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.3791612386703491, "learning_rate": 2.7629062833687593e-05, "loss": 0.3181, "step": 10681, "teacher_loss": 0.31130141019821167 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.6782591342926025, "learning_rate": 2.7627837179276432e-05, "loss": 0.4896, "step": 10682, "teacher_loss": 0.46863383054733276 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.22329875826835632, "learning_rate": 2.7626611235346825e-05, "loss": 0.2801, "step": 10683, "teacher_loss": 0.2864404320716858 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.21794909238815308, "learning_rate": 2.7625385001926882e-05, "loss": 0.1836, "step": 10684, "teacher_loss": 0.1798313856124878 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.11426588892936707, "learning_rate": 2.762415847904471e-05, "loss": 0.2043, "step": 10685, "teacher_loss": 0.21433210372924805 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.18531127274036407, "learning_rate": 2.762293166672844e-05, "loss": 0.1852, "step": 10686, "teacher_loss": 0.18519604206085205 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.28036683797836304, "learning_rate": 2.7621704565006186e-05, "loss": 0.1862, "step": 10687, "teacher_loss": 0.17576324939727783 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.403202086687088, "learning_rate": 2.7620477173906087e-05, "loss": 0.3646, "step": 10688, "teacher_loss": 0.3603616952896118 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.6357541680335999, "learning_rate": 2.7619249493456288e-05, "loss": 0.281, "step": 10689, "teacher_loss": 0.24153506755828857 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.3377685546875, "learning_rate": 2.761802152368493e-05, "loss": 0.5692, "step": 10690, "teacher_loss": 0.594910740852356 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.3280877470970154, "learning_rate": 2.7616793264620174e-05, "loss": 0.4354, "step": 10691, "teacher_loss": 0.447316437959671 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.6290035843849182, "learning_rate": 2.7615564716290175e-05, "loss": 0.3872, "step": 10692, "teacher_loss": 0.3603074848651886 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.3782510757446289, "learning_rate": 2.7614335878723096e-05, "loss": 0.2437, "step": 10693, "teacher_loss": 0.22869972884655 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.3793245553970337, "learning_rate": 2.761310675194712e-05, "loss": 0.2582, "step": 10694, "teacher_loss": 0.24470940232276917 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.5836122632026672, "learning_rate": 2.7611877335990414e-05, "loss": 0.2663, "step": 10695, "teacher_loss": 0.23105937242507935 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.7853186130523682, "learning_rate": 2.761064763088118e-05, "loss": 0.426, "step": 10696, "teacher_loss": 0.386123925447464 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.69758540391922, "learning_rate": 2.7609417636647602e-05, "loss": 0.2794, "step": 10697, "teacher_loss": 0.23296789824962616 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.5496902465820312, "learning_rate": 2.7608187353317885e-05, "loss": 0.2693, "step": 10698, "teacher_loss": 0.23816293478012085 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.40676063299179077, "learning_rate": 2.7606956780920228e-05, "loss": 0.1883, "step": 10699, "teacher_loss": 0.1640104055404663 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.5043829679489136, "learning_rate": 2.7605725919482857e-05, "loss": 0.4777, "step": 10700, "teacher_loss": 0.47476130723953247 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.7403748035430908, "learning_rate": 2.7604494769033983e-05, "loss": 0.3141, "step": 10701, "teacher_loss": 0.26676928997039795 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.27056220173835754, "learning_rate": 2.7603263329601834e-05, "loss": 0.2425, "step": 10702, "teacher_loss": 0.23932921886444092 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.4659106135368347, "learning_rate": 2.7602031601214637e-05, "loss": 0.2675, "step": 10703, "teacher_loss": 0.245440274477005 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.44203001260757446, "learning_rate": 2.7600799583900643e-05, "loss": 0.3115, "step": 10704, "teacher_loss": 0.2970461845397949 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.9715584516525269, "learning_rate": 2.7599567277688095e-05, "loss": 0.5871, "step": 10705, "teacher_loss": 0.5443358421325684 }, { "compression_loss": 0.0, "epoch": 1.93, "label_loss": 0.47124379873275757, "learning_rate": 2.759833468260524e-05, "loss": 0.2301, "step": 10706, "teacher_loss": 0.20329716801643372 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.4204053282737732, "learning_rate": 2.7597101798680353e-05, "loss": 0.2685, "step": 10707, "teacher_loss": 0.25164565443992615 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.3771471083164215, "learning_rate": 2.759586862594168e-05, "loss": 0.2421, "step": 10708, "teacher_loss": 0.2271193265914917 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.46310678124427795, "learning_rate": 2.759463516441751e-05, "loss": 0.2345, "step": 10709, "teacher_loss": 0.20906318724155426 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.2895175814628601, "learning_rate": 2.759340141413611e-05, "loss": 0.1482, "step": 10710, "teacher_loss": 0.1325358748435974 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 1.021508812904358, "learning_rate": 2.7592167375125772e-05, "loss": 0.3724, "step": 10711, "teacher_loss": 0.300296813249588 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.3177911639213562, "learning_rate": 2.7590933047414798e-05, "loss": 0.2528, "step": 10712, "teacher_loss": 0.2455936074256897 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.46502798795700073, "learning_rate": 2.758969843103147e-05, "loss": 0.3541, "step": 10713, "teacher_loss": 0.3418126702308655 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.5373362302780151, "learning_rate": 2.7588463526004107e-05, "loss": 0.2617, "step": 10714, "teacher_loss": 0.2310972809791565 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.47000786662101746, "learning_rate": 2.758722833236102e-05, "loss": 0.29, "step": 10715, "teacher_loss": 0.2700064778327942 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.7327920198440552, "learning_rate": 2.758599285013052e-05, "loss": 0.3533, "step": 10716, "teacher_loss": 0.3111203610897064 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.3775298297405243, "learning_rate": 2.758475707934094e-05, "loss": 0.3367, "step": 10717, "teacher_loss": 0.3322066068649292 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.4450759291648865, "learning_rate": 2.7583521020020615e-05, "loss": 0.3348, "step": 10718, "teacher_loss": 0.32251012325286865 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.23235630989074707, "learning_rate": 2.7582284672197874e-05, "loss": 0.1771, "step": 10719, "teacher_loss": 0.17092543840408325 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.3645136058330536, "learning_rate": 2.758104803590108e-05, "loss": 0.23, "step": 10720, "teacher_loss": 0.21503706276416779 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.5048770904541016, "learning_rate": 2.7579811111158563e-05, "loss": 0.3341, "step": 10721, "teacher_loss": 0.3151768445968628 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.3180050849914551, "learning_rate": 2.75785738979987e-05, "loss": 0.2164, "step": 10722, "teacher_loss": 0.20505811274051666 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.29039496183395386, "learning_rate": 2.7577336396449844e-05, "loss": 0.2062, "step": 10723, "teacher_loss": 0.19682344794273376 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.3539835214614868, "learning_rate": 2.757609860654038e-05, "loss": 0.2739, "step": 10724, "teacher_loss": 0.265034556388855 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.8956050872802734, "learning_rate": 2.7574860528298677e-05, "loss": 0.4742, "step": 10725, "teacher_loss": 0.42738524079322815 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.1452442854642868, "learning_rate": 2.7573622161753125e-05, "loss": 0.2387, "step": 10726, "teacher_loss": 0.24911123514175415 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.28939560055732727, "learning_rate": 2.7572383506932113e-05, "loss": 0.1977, "step": 10727, "teacher_loss": 0.18749216198921204 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.6320517659187317, "learning_rate": 2.757114456386404e-05, "loss": 0.5318, "step": 10728, "teacher_loss": 0.5206484198570251 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.45532482862472534, "learning_rate": 2.7569905332577314e-05, "loss": 0.2399, "step": 10729, "teacher_loss": 0.2160104513168335 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.18439623713493347, "learning_rate": 2.7568665813100347e-05, "loss": 0.2493, "step": 10730, "teacher_loss": 0.25647836923599243 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.2852588891983032, "learning_rate": 2.756742600546155e-05, "loss": 0.3357, "step": 10731, "teacher_loss": 0.3413543701171875 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.38660377264022827, "learning_rate": 2.756618590968936e-05, "loss": 0.2268, "step": 10732, "teacher_loss": 0.20905235409736633 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.4000321924686432, "learning_rate": 2.7564945525812203e-05, "loss": 0.2147, "step": 10733, "teacher_loss": 0.194134920835495 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.5205478072166443, "learning_rate": 2.7563704853858507e-05, "loss": 0.2899, "step": 10734, "teacher_loss": 0.26430654525756836 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.6222261190414429, "learning_rate": 2.7562463893856737e-05, "loss": 0.4603, "step": 10735, "teacher_loss": 0.44234639406204224 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.35669898986816406, "learning_rate": 2.756122264583533e-05, "loss": 0.2462, "step": 10736, "teacher_loss": 0.23389463126659393 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.2117297649383545, "learning_rate": 2.755998110982275e-05, "loss": 0.2162, "step": 10737, "teacher_loss": 0.21667510271072388 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.3946285843849182, "learning_rate": 2.755873928584746e-05, "loss": 0.2203, "step": 10738, "teacher_loss": 0.20094534754753113 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.33045852184295654, "learning_rate": 2.7557497173937928e-05, "loss": 0.1902, "step": 10739, "teacher_loss": 0.17463496327400208 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.41973817348480225, "learning_rate": 2.7556254774122638e-05, "loss": 0.4123, "step": 10740, "teacher_loss": 0.41146600246429443 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.32989501953125, "learning_rate": 2.7555012086430072e-05, "loss": 0.234, "step": 10741, "teacher_loss": 0.22339129447937012 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.558845043182373, "learning_rate": 2.755376911088872e-05, "loss": 0.2379, "step": 10742, "teacher_loss": 0.20223954319953918 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 1.1454236507415771, "learning_rate": 2.755252584752708e-05, "loss": 0.2829, "step": 10743, "teacher_loss": 0.18701830506324768 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.7674243450164795, "learning_rate": 2.7551282296373656e-05, "loss": 0.3089, "step": 10744, "teacher_loss": 0.2579955458641052 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.5170470476150513, "learning_rate": 2.755003845745696e-05, "loss": 0.4112, "step": 10745, "teacher_loss": 0.3994525671005249 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.43625807762145996, "learning_rate": 2.754879433080551e-05, "loss": 0.2407, "step": 10746, "teacher_loss": 0.21898235380649567 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.19233836233615875, "learning_rate": 2.754754991644783e-05, "loss": 0.1981, "step": 10747, "teacher_loss": 0.19878417253494263 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.8407419919967651, "learning_rate": 2.754630521441245e-05, "loss": 0.2603, "step": 10748, "teacher_loss": 0.1958077996969223 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.5829198360443115, "learning_rate": 2.7545060224727902e-05, "loss": 0.3749, "step": 10749, "teacher_loss": 0.35181140899658203 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.2617458999156952, "learning_rate": 2.7543814947422744e-05, "loss": 0.363, "step": 10750, "teacher_loss": 0.3742242753505707 }, { "epoch": 1.94, "eval_exact_match": 79.59318826868495, "eval_f1": 86.78058162372317, "step": 10750 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.3319438099861145, "learning_rate": 2.7542569382525508e-05, "loss": 0.2879, "step": 10751, "teacher_loss": 0.2829638421535492 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.6153581738471985, "learning_rate": 2.7541323530064765e-05, "loss": 0.2803, "step": 10752, "teacher_loss": 0.2430334985256195 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.8033252358436584, "learning_rate": 2.7540077390069075e-05, "loss": 0.4027, "step": 10753, "teacher_loss": 0.35822615027427673 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.7388111352920532, "learning_rate": 2.7538830962567008e-05, "loss": 0.3252, "step": 10754, "teacher_loss": 0.2792031168937683 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.5877249240875244, "learning_rate": 2.753758424758714e-05, "loss": 0.246, "step": 10755, "teacher_loss": 0.20805487036705017 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.43574514985084534, "learning_rate": 2.753633724515805e-05, "loss": 0.2431, "step": 10756, "teacher_loss": 0.22166277468204498 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.5306398868560791, "learning_rate": 2.7535089955308342e-05, "loss": 0.2228, "step": 10757, "teacher_loss": 0.18858368694782257 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.3435639441013336, "learning_rate": 2.75338423780666e-05, "loss": 0.2605, "step": 10758, "teacher_loss": 0.2512151896953583 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.2963387370109558, "learning_rate": 2.7532594513461427e-05, "loss": 0.2535, "step": 10759, "teacher_loss": 0.24868829548358917 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.5550227165222168, "learning_rate": 2.7531346361521435e-05, "loss": 0.6236, "step": 10760, "teacher_loss": 0.6311668157577515 }, { "compression_loss": 0.0, "epoch": 1.94, "label_loss": 0.40955448150634766, "learning_rate": 2.7530097922275248e-05, "loss": 0.5015, "step": 10761, "teacher_loss": 0.5117581486701965 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.4119355380535126, "learning_rate": 2.752884919575148e-05, "loss": 0.2049, "step": 10762, "teacher_loss": 0.18187227845191956 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.4913174510002136, "learning_rate": 2.752760018197876e-05, "loss": 0.275, "step": 10763, "teacher_loss": 0.2509962022304535 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.6365230083465576, "learning_rate": 2.7526350880985732e-05, "loss": 0.4799, "step": 10764, "teacher_loss": 0.46245259046554565 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.22546356916427612, "learning_rate": 2.7525101292801036e-05, "loss": 0.2152, "step": 10765, "teacher_loss": 0.2140844315290451 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.38395965099334717, "learning_rate": 2.7523851417453322e-05, "loss": 0.2388, "step": 10766, "teacher_loss": 0.22262342274188995 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.4739437997341156, "learning_rate": 2.7522601254971234e-05, "loss": 0.2261, "step": 10767, "teacher_loss": 0.1985701322555542 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.4453097879886627, "learning_rate": 2.752135080538345e-05, "loss": 0.2165, "step": 10768, "teacher_loss": 0.1910897195339203 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.8906028866767883, "learning_rate": 2.7520100068718632e-05, "loss": 0.5755, "step": 10769, "teacher_loss": 0.5404713749885559 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.384732723236084, "learning_rate": 2.7518849045005458e-05, "loss": 0.2776, "step": 10770, "teacher_loss": 0.2656790018081665 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.3922428488731384, "learning_rate": 2.7517597734272605e-05, "loss": 0.3961, "step": 10771, "teacher_loss": 0.39647775888442993 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.3590065538883209, "learning_rate": 2.7516346136548764e-05, "loss": 0.3662, "step": 10772, "teacher_loss": 0.3670479655265808 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.5111308693885803, "learning_rate": 2.7515094251862635e-05, "loss": 0.3665, "step": 10773, "teacher_loss": 0.35046809911727905 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.6661297082901001, "learning_rate": 2.751384208024292e-05, "loss": 0.3596, "step": 10774, "teacher_loss": 0.32554882764816284 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.29166215658187866, "learning_rate": 2.7512589621718326e-05, "loss": 0.277, "step": 10775, "teacher_loss": 0.2753344774246216 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.32293498516082764, "learning_rate": 2.751133687631756e-05, "loss": 0.2606, "step": 10776, "teacher_loss": 0.2537005543708801 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.25732457637786865, "learning_rate": 2.751008384406935e-05, "loss": 0.2186, "step": 10777, "teacher_loss": 0.2142890989780426 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.19241492450237274, "learning_rate": 2.7508830525002434e-05, "loss": 0.2555, "step": 10778, "teacher_loss": 0.26250070333480835 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.69423508644104, "learning_rate": 2.750757691914553e-05, "loss": 0.2844, "step": 10779, "teacher_loss": 0.2388879358768463 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.19666212797164917, "learning_rate": 2.750632302652739e-05, "loss": 0.2915, "step": 10780, "teacher_loss": 0.3020234704017639 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.6278557181358337, "learning_rate": 2.7505068847176754e-05, "loss": 0.2753, "step": 10781, "teacher_loss": 0.2361781746149063 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.49890565872192383, "learning_rate": 2.7503814381122384e-05, "loss": 0.2988, "step": 10782, "teacher_loss": 0.2765964865684509 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.3144882023334503, "learning_rate": 2.750255962839304e-05, "loss": 0.2128, "step": 10783, "teacher_loss": 0.20154812932014465 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.2659118175506592, "learning_rate": 2.7501304589017487e-05, "loss": 0.541, "step": 10784, "teacher_loss": 0.5715261697769165 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.324421226978302, "learning_rate": 2.75000492630245e-05, "loss": 0.2009, "step": 10785, "teacher_loss": 0.18713781237602234 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.4521762430667877, "learning_rate": 2.749879365044286e-05, "loss": 0.2737, "step": 10786, "teacher_loss": 0.25387269258499146 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.23208793997764587, "learning_rate": 2.7497537751301358e-05, "loss": 0.2852, "step": 10787, "teacher_loss": 0.2911258637905121 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.6925598978996277, "learning_rate": 2.7496281565628783e-05, "loss": 0.2722, "step": 10788, "teacher_loss": 0.22546206414699554 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.69293612241745, "learning_rate": 2.749502509345394e-05, "loss": 0.3366, "step": 10789, "teacher_loss": 0.29703542590141296 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.15416736900806427, "learning_rate": 2.7493768334805632e-05, "loss": 0.1944, "step": 10790, "teacher_loss": 0.19892218708992004 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.3359658420085907, "learning_rate": 2.7492511289712673e-05, "loss": 0.2324, "step": 10791, "teacher_loss": 0.22084154188632965 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.31044816970825195, "learning_rate": 2.7491253958203884e-05, "loss": 0.2646, "step": 10792, "teacher_loss": 0.25951480865478516 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.14581097662448883, "learning_rate": 2.7489996340308096e-05, "loss": 0.2021, "step": 10793, "teacher_loss": 0.20840641856193542 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.45781636238098145, "learning_rate": 2.7488738436054137e-05, "loss": 0.3512, "step": 10794, "teacher_loss": 0.33938688039779663 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.2801690697669983, "learning_rate": 2.748748024547085e-05, "loss": 0.2789, "step": 10795, "teacher_loss": 0.2787937521934509 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.35359060764312744, "learning_rate": 2.748622176858708e-05, "loss": 0.2518, "step": 10796, "teacher_loss": 0.24047240614891052 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.30956515669822693, "learning_rate": 2.7484963005431677e-05, "loss": 0.2148, "step": 10797, "teacher_loss": 0.2042410671710968 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.544880747795105, "learning_rate": 2.748370395603351e-05, "loss": 0.3008, "step": 10798, "teacher_loss": 0.27367788553237915 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.41636085510253906, "learning_rate": 2.7482444620421432e-05, "loss": 0.2289, "step": 10799, "teacher_loss": 0.20806975662708282 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.8299413919448853, "learning_rate": 2.748118499862433e-05, "loss": 0.2637, "step": 10800, "teacher_loss": 0.2007812261581421 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 1.0994682312011719, "learning_rate": 2.747992509067107e-05, "loss": 0.5186, "step": 10801, "teacher_loss": 0.45409733057022095 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.3366830348968506, "learning_rate": 2.747866489659055e-05, "loss": 0.5206, "step": 10802, "teacher_loss": 0.5410189032554626 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.32088106870651245, "learning_rate": 2.747740441641166e-05, "loss": 0.2562, "step": 10803, "teacher_loss": 0.24896365404129028 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.4379093050956726, "learning_rate": 2.747614365016329e-05, "loss": 0.3613, "step": 10804, "teacher_loss": 0.35281896591186523 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.4277106523513794, "learning_rate": 2.747488259787435e-05, "loss": 0.2055, "step": 10805, "teacher_loss": 0.18086184561252594 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.406033992767334, "learning_rate": 2.7473621259573756e-05, "loss": 0.4108, "step": 10806, "teacher_loss": 0.4112781286239624 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.4657803177833557, "learning_rate": 2.7472359635290427e-05, "loss": 0.2463, "step": 10807, "teacher_loss": 0.22186987102031708 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 1.2364765405654907, "learning_rate": 2.747109772505328e-05, "loss": 0.3738, "step": 10808, "teacher_loss": 0.27797022461891174 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.14064404368400574, "learning_rate": 2.7469835528891257e-05, "loss": 0.1236, "step": 10809, "teacher_loss": 0.12167838960886002 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.5955355763435364, "learning_rate": 2.746857304683329e-05, "loss": 0.2747, "step": 10810, "teacher_loss": 0.23910586535930634 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.43910861015319824, "learning_rate": 2.7467310278908327e-05, "loss": 0.3172, "step": 10811, "teacher_loss": 0.30368590354919434 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.2320491373538971, "learning_rate": 2.7466047225145318e-05, "loss": 0.2447, "step": 10812, "teacher_loss": 0.24612921476364136 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 1.0183064937591553, "learning_rate": 2.746478388557322e-05, "loss": 0.6473, "step": 10813, "teacher_loss": 0.6061270236968994 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.4801907539367676, "learning_rate": 2.7463520260221e-05, "loss": 0.3193, "step": 10814, "teacher_loss": 0.3014691174030304 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.20801588892936707, "learning_rate": 2.746225634911763e-05, "loss": 0.1663, "step": 10815, "teacher_loss": 0.16169731318950653 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.7599947452545166, "learning_rate": 2.7460992152292084e-05, "loss": 0.2504, "step": 10816, "teacher_loss": 0.19380278885364532 }, { "compression_loss": 0.0, "epoch": 1.95, "label_loss": 0.3370821475982666, "learning_rate": 2.7459727669773344e-05, "loss": 0.2864, "step": 10817, "teacher_loss": 0.28074395656585693 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.47463536262512207, "learning_rate": 2.7458462901590408e-05, "loss": 0.3831, "step": 10818, "teacher_loss": 0.37289243936538696 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.5084630250930786, "learning_rate": 2.7457197847772272e-05, "loss": 0.2597, "step": 10819, "teacher_loss": 0.23206114768981934 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.33815276622772217, "learning_rate": 2.7455932508347935e-05, "loss": 0.2723, "step": 10820, "teacher_loss": 0.26503363251686096 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.24131813645362854, "learning_rate": 2.7454666883346412e-05, "loss": 0.1955, "step": 10821, "teacher_loss": 0.19040238857269287 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.472001850605011, "learning_rate": 2.7453400972796717e-05, "loss": 0.212, "step": 10822, "teacher_loss": 0.18306519091129303 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.92164146900177, "learning_rate": 2.7452134776727875e-05, "loss": 0.4391, "step": 10823, "teacher_loss": 0.38551491498947144 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 1.0369915962219238, "learning_rate": 2.745086829516892e-05, "loss": 0.4715, "step": 10824, "teacher_loss": 0.40871402621269226 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.4346398711204529, "learning_rate": 2.744960152814888e-05, "loss": 0.2741, "step": 10825, "teacher_loss": 0.2562423646450043 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.4701068103313446, "learning_rate": 2.7448334475696806e-05, "loss": 0.2143, "step": 10826, "teacher_loss": 0.18583060801029205 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.4358345568180084, "learning_rate": 2.744706713784174e-05, "loss": 0.3101, "step": 10827, "teacher_loss": 0.2961447834968567 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.19895631074905396, "learning_rate": 2.7445799514612747e-05, "loss": 0.2121, "step": 10828, "teacher_loss": 0.21353840827941895 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.6724644899368286, "learning_rate": 2.7444531606038887e-05, "loss": 0.2638, "step": 10829, "teacher_loss": 0.21841639280319214 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.5389783382415771, "learning_rate": 2.744326341214922e-05, "loss": 0.3087, "step": 10830, "teacher_loss": 0.2831568419933319 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.3820839822292328, "learning_rate": 2.744199493297284e-05, "loss": 0.2053, "step": 10831, "teacher_loss": 0.1856684386730194 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.5639641284942627, "learning_rate": 2.744072616853881e-05, "loss": 0.3486, "step": 10832, "teacher_loss": 0.3246185779571533 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.45832860469818115, "learning_rate": 2.7439457118876235e-05, "loss": 0.2544, "step": 10833, "teacher_loss": 0.23176434636116028 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.24422238767147064, "learning_rate": 2.7438187784014203e-05, "loss": 0.3508, "step": 10834, "teacher_loss": 0.36267563700675964 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.28793585300445557, "learning_rate": 2.7436918163981815e-05, "loss": 0.2053, "step": 10835, "teacher_loss": 0.19616399705410004 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.3634748160839081, "learning_rate": 2.7435648258808176e-05, "loss": 0.2595, "step": 10836, "teacher_loss": 0.24790015816688538 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.2011847198009491, "learning_rate": 2.7434378068522413e-05, "loss": 0.2526, "step": 10837, "teacher_loss": 0.25832095742225647 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.18684807419776917, "learning_rate": 2.743310759315364e-05, "loss": 0.2229, "step": 10838, "teacher_loss": 0.22689178586006165 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.6791402697563171, "learning_rate": 2.7431836832730988e-05, "loss": 0.2753, "step": 10839, "teacher_loss": 0.23045684397220612 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.6846766471862793, "learning_rate": 2.7430565787283584e-05, "loss": 0.505, "step": 10840, "teacher_loss": 0.4850236475467682 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.16580232977867126, "learning_rate": 2.742929445684058e-05, "loss": 0.3293, "step": 10841, "teacher_loss": 0.3475082814693451 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.6000460386276245, "learning_rate": 2.7428022841431118e-05, "loss": 0.2687, "step": 10842, "teacher_loss": 0.2319331169128418 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.39683467149734497, "learning_rate": 2.7426750941084355e-05, "loss": 0.2445, "step": 10843, "teacher_loss": 0.22752338647842407 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.4009547829627991, "learning_rate": 2.7425478755829447e-05, "loss": 0.5134, "step": 10844, "teacher_loss": 0.5259145498275757 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.414784699678421, "learning_rate": 2.742420628569557e-05, "loss": 0.3684, "step": 10845, "teacher_loss": 0.3632541298866272 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.38040831685066223, "learning_rate": 2.7422933530711883e-05, "loss": 0.2706, "step": 10846, "teacher_loss": 0.25841739773750305 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.56363445520401, "learning_rate": 2.7421660490907586e-05, "loss": 0.225, "step": 10847, "teacher_loss": 0.18732300400733948 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.4187430143356323, "learning_rate": 2.742038716631185e-05, "loss": 0.2192, "step": 10848, "teacher_loss": 0.19706645607948303 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.38193169236183167, "learning_rate": 2.741911355695388e-05, "loss": 0.3127, "step": 10849, "teacher_loss": 0.3050064146518707 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.5706260204315186, "learning_rate": 2.7417839662862865e-05, "loss": 0.5728, "step": 10850, "teacher_loss": 0.5730546712875366 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.21841847896575928, "learning_rate": 2.741656548406802e-05, "loss": 0.2058, "step": 10851, "teacher_loss": 0.204440176486969 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.5540941953659058, "learning_rate": 2.741529102059855e-05, "loss": 0.2392, "step": 10852, "teacher_loss": 0.20417365431785583 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.6483820676803589, "learning_rate": 2.7414016272483685e-05, "loss": 0.3773, "step": 10853, "teacher_loss": 0.34717458486557007 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.48529040813446045, "learning_rate": 2.741274123975265e-05, "loss": 0.3896, "step": 10854, "teacher_loss": 0.37894201278686523 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 1.2216346263885498, "learning_rate": 2.7411465922434666e-05, "loss": 0.3905, "step": 10855, "teacher_loss": 0.298186719417572 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.3121795952320099, "learning_rate": 2.7410190320558985e-05, "loss": 0.2768, "step": 10856, "teacher_loss": 0.2728690505027771 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.7875667214393616, "learning_rate": 2.7408914434154844e-05, "loss": 0.4107, "step": 10857, "teacher_loss": 0.3688180148601532 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.4278305172920227, "learning_rate": 2.7407638263251503e-05, "loss": 0.3526, "step": 10858, "teacher_loss": 0.3442361354827881 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.6039260625839233, "learning_rate": 2.7406361807878215e-05, "loss": 0.3028, "step": 10859, "teacher_loss": 0.2693212628364563 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.15492716431617737, "learning_rate": 2.7405085068064246e-05, "loss": 0.1728, "step": 10860, "teacher_loss": 0.17474587261676788 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.7073568105697632, "learning_rate": 2.7403808043838866e-05, "loss": 0.3633, "step": 10861, "teacher_loss": 0.32507115602493286 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.2879864573478699, "learning_rate": 2.740253073523136e-05, "loss": 0.1946, "step": 10862, "teacher_loss": 0.18417751789093018 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.6933639645576477, "learning_rate": 2.7401253142271008e-05, "loss": 0.2512, "step": 10863, "teacher_loss": 0.20202794671058655 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.3929342031478882, "learning_rate": 2.7399975264987102e-05, "loss": 0.2759, "step": 10864, "teacher_loss": 0.2629064917564392 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.40562641620635986, "learning_rate": 2.739869710340894e-05, "loss": 0.2446, "step": 10865, "teacher_loss": 0.22672992944717407 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.4464614987373352, "learning_rate": 2.7397418657565828e-05, "loss": 0.2636, "step": 10866, "teacher_loss": 0.24329717457294464 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.20394085347652435, "learning_rate": 2.7396139927487074e-05, "loss": 0.2883, "step": 10867, "teacher_loss": 0.2976875901222229 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.19401872158050537, "learning_rate": 2.7394860913202e-05, "loss": 0.1663, "step": 10868, "teacher_loss": 0.1631706804037094 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.7677251100540161, "learning_rate": 2.7393581614739924e-05, "loss": 0.8347, "step": 10869, "teacher_loss": 0.8421406745910645 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.37438052892684937, "learning_rate": 2.739230203213018e-05, "loss": 0.2658, "step": 10870, "teacher_loss": 0.25374093651771545 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.648666262626648, "learning_rate": 2.73910221654021e-05, "loss": 0.2555, "step": 10871, "teacher_loss": 0.21179074048995972 }, { "compression_loss": 0.0, "epoch": 1.96, "label_loss": 0.21512557566165924, "learning_rate": 2.738974201458504e-05, "loss": 0.2627, "step": 10872, "teacher_loss": 0.2679938077926636 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.7019003033638, "learning_rate": 2.738846157970834e-05, "loss": 0.3608, "step": 10873, "teacher_loss": 0.3228452205657959 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.6334228515625, "learning_rate": 2.738718086080136e-05, "loss": 0.677, "step": 10874, "teacher_loss": 0.6818009614944458 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.19164009392261505, "learning_rate": 2.7385899857893453e-05, "loss": 0.2295, "step": 10875, "teacher_loss": 0.2337537258863449 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.28335070610046387, "learning_rate": 2.7384618571014005e-05, "loss": 0.2989, "step": 10876, "teacher_loss": 0.30064892768859863 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.9634113311767578, "learning_rate": 2.7383337000192382e-05, "loss": 0.2854, "step": 10877, "teacher_loss": 0.21010950207710266 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.7090965509414673, "learning_rate": 2.738205514545797e-05, "loss": 0.5564, "step": 10878, "teacher_loss": 0.5394244194030762 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.38687464594841003, "learning_rate": 2.7380773006840154e-05, "loss": 0.4509, "step": 10879, "teacher_loss": 0.45806825160980225 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.16957297921180725, "learning_rate": 2.7379490584368336e-05, "loss": 0.204, "step": 10880, "teacher_loss": 0.20782233774662018 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.43553513288497925, "learning_rate": 2.737820787807191e-05, "loss": 0.2311, "step": 10881, "teacher_loss": 0.2083931416273117 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.18465974926948547, "learning_rate": 2.7376924887980293e-05, "loss": 0.2444, "step": 10882, "teacher_loss": 0.25104832649230957 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.2371116578578949, "learning_rate": 2.7375641614122897e-05, "loss": 0.2343, "step": 10883, "teacher_loss": 0.23395338654518127 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.7803940773010254, "learning_rate": 2.737435805652914e-05, "loss": 0.3465, "step": 10884, "teacher_loss": 0.2983270287513733 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.5725279450416565, "learning_rate": 2.7373074215228452e-05, "loss": 0.2961, "step": 10885, "teacher_loss": 0.26542454957962036 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.39034268260002136, "learning_rate": 2.737179009025027e-05, "loss": 0.2055, "step": 10886, "teacher_loss": 0.18501505255699158 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.29216527938842773, "learning_rate": 2.7370505681624033e-05, "loss": 0.2319, "step": 10887, "teacher_loss": 0.22524479031562805 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.33569180965423584, "learning_rate": 2.7369220989379192e-05, "loss": 0.2201, "step": 10888, "teacher_loss": 0.20729121565818787 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 1.540254831314087, "learning_rate": 2.7367936013545196e-05, "loss": 0.381, "step": 10889, "teacher_loss": 0.2521928548812866 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.2732278108596802, "learning_rate": 2.736665075415151e-05, "loss": 0.1675, "step": 10890, "teacher_loss": 0.15572570264339447 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 1.0981857776641846, "learning_rate": 2.73653652112276e-05, "loss": 0.3514, "step": 10891, "teacher_loss": 0.26844727993011475 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.3784533441066742, "learning_rate": 2.7364079384802935e-05, "loss": 0.2935, "step": 10892, "teacher_loss": 0.2840586304664612 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.3704400956630707, "learning_rate": 2.736279327490701e-05, "loss": 0.2128, "step": 10893, "teacher_loss": 0.1952662467956543 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.4308822453022003, "learning_rate": 2.7361506881569288e-05, "loss": 0.2534, "step": 10894, "teacher_loss": 0.23368799686431885 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.4873030483722687, "learning_rate": 2.7360220204819276e-05, "loss": 0.3303, "step": 10895, "teacher_loss": 0.31285595893859863 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.21434643864631653, "learning_rate": 2.735893324468648e-05, "loss": 0.1931, "step": 10896, "teacher_loss": 0.19079014658927917 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.5113731622695923, "learning_rate": 2.7357646001200394e-05, "loss": 0.2904, "step": 10897, "teacher_loss": 0.26588648557662964 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.362072229385376, "learning_rate": 2.7356358474390536e-05, "loss": 0.1873, "step": 10898, "teacher_loss": 0.16784429550170898 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.2871999442577362, "learning_rate": 2.735507066428643e-05, "loss": 0.2118, "step": 10899, "teacher_loss": 0.20343969762325287 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.4309789538383484, "learning_rate": 2.7353782570917587e-05, "loss": 0.2412, "step": 10900, "teacher_loss": 0.22007720172405243 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.2927132844924927, "learning_rate": 2.7352494194313552e-05, "loss": 0.2314, "step": 10901, "teacher_loss": 0.224545419216156 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.4730851352214813, "learning_rate": 2.735120553450386e-05, "loss": 0.2465, "step": 10902, "teacher_loss": 0.2213568389415741 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.9335314035415649, "learning_rate": 2.7349916591518057e-05, "loss": 0.3854, "step": 10903, "teacher_loss": 0.32453060150146484 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.30453547835350037, "learning_rate": 2.734862736538569e-05, "loss": 0.1913, "step": 10904, "teacher_loss": 0.17870302498340607 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.680169403553009, "learning_rate": 2.734733785613632e-05, "loss": 0.2841, "step": 10905, "teacher_loss": 0.2401125431060791 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.6479673981666565, "learning_rate": 2.734604806379952e-05, "loss": 0.5911, "step": 10906, "teacher_loss": 0.584787905216217 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.6036485433578491, "learning_rate": 2.7344757988404845e-05, "loss": 0.2931, "step": 10907, "teacher_loss": 0.25862234830856323 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.3791617751121521, "learning_rate": 2.7343467629981886e-05, "loss": 0.37, "step": 10908, "teacher_loss": 0.36900314688682556 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.38305845856666565, "learning_rate": 2.734217698856022e-05, "loss": 0.2161, "step": 10909, "teacher_loss": 0.19755110144615173 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.8357294797897339, "learning_rate": 2.734088606416944e-05, "loss": 0.3518, "step": 10910, "teacher_loss": 0.29808545112609863 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.2530587315559387, "learning_rate": 2.733959485683914e-05, "loss": 0.2364, "step": 10911, "teacher_loss": 0.23452477157115936 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.4362846910953522, "learning_rate": 2.733830336659893e-05, "loss": 0.2377, "step": 10912, "teacher_loss": 0.21564152836799622 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 1.089313268661499, "learning_rate": 2.733701159347841e-05, "loss": 0.2768, "step": 10913, "teacher_loss": 0.18656393885612488 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.3766339421272278, "learning_rate": 2.7335719537507212e-05, "loss": 0.3003, "step": 10914, "teacher_loss": 0.29186469316482544 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.6957297325134277, "learning_rate": 2.733442719871494e-05, "loss": 0.4263, "step": 10915, "teacher_loss": 0.3963565230369568 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.6060894131660461, "learning_rate": 2.733313457713124e-05, "loss": 0.3108, "step": 10916, "teacher_loss": 0.2779746651649475 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.488019198179245, "learning_rate": 2.7331841672785734e-05, "loss": 0.3714, "step": 10917, "teacher_loss": 0.358467161655426 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.739631175994873, "learning_rate": 2.7330548485708072e-05, "loss": 0.406, "step": 10918, "teacher_loss": 0.36892497539520264 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.35032591223716736, "learning_rate": 2.732925501592791e-05, "loss": 0.208, "step": 10919, "teacher_loss": 0.19222550094127655 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.8082783222198486, "learning_rate": 2.7327961263474887e-05, "loss": 0.371, "step": 10920, "teacher_loss": 0.3224300146102905 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.36098629236221313, "learning_rate": 2.7326667228378677e-05, "loss": 0.3273, "step": 10921, "teacher_loss": 0.32356658577919006 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.5517736077308655, "learning_rate": 2.7325372910668948e-05, "loss": 0.3097, "step": 10922, "teacher_loss": 0.2827921509742737 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.15011778473854065, "learning_rate": 2.7324078310375367e-05, "loss": 0.1954, "step": 10923, "teacher_loss": 0.2004351019859314 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.2400776445865631, "learning_rate": 2.732278342752762e-05, "loss": 0.2036, "step": 10924, "teacher_loss": 0.1995181441307068 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.33505213260650635, "learning_rate": 2.7321488262155396e-05, "loss": 0.2269, "step": 10925, "teacher_loss": 0.21483266353607178 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.5862535238265991, "learning_rate": 2.7320192814288386e-05, "loss": 0.3549, "step": 10926, "teacher_loss": 0.3292403221130371 }, { "compression_loss": 0.0, "epoch": 1.97, "label_loss": 0.3221989870071411, "learning_rate": 2.7318897083956295e-05, "loss": 0.2413, "step": 10927, "teacher_loss": 0.23228782415390015 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.28338491916656494, "learning_rate": 2.7317601071188823e-05, "loss": 0.1995, "step": 10928, "teacher_loss": 0.19020111858844757 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 1.0176000595092773, "learning_rate": 2.7316304776015695e-05, "loss": 0.4314, "step": 10929, "teacher_loss": 0.36625435948371887 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.48951399326324463, "learning_rate": 2.7315008198466623e-05, "loss": 0.3323, "step": 10930, "teacher_loss": 0.31481537222862244 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.684956967830658, "learning_rate": 2.7313711338571333e-05, "loss": 0.327, "step": 10931, "teacher_loss": 0.2872406244277954 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.403883159160614, "learning_rate": 2.7312414196359562e-05, "loss": 0.2726, "step": 10932, "teacher_loss": 0.2580595016479492 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.5118328332901001, "learning_rate": 2.7311116771861044e-05, "loss": 0.2477, "step": 10933, "teacher_loss": 0.2183288037776947 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.14330704510211945, "learning_rate": 2.7309819065105537e-05, "loss": 0.2447, "step": 10934, "teacher_loss": 0.25598254799842834 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.30741801857948303, "learning_rate": 2.7308521076122782e-05, "loss": 0.2859, "step": 10935, "teacher_loss": 0.28353387117385864 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.5244690775871277, "learning_rate": 2.730722280494254e-05, "loss": 0.2406, "step": 10936, "teacher_loss": 0.2090718150138855 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.5133647322654724, "learning_rate": 2.7305924251594577e-05, "loss": 0.2748, "step": 10937, "teacher_loss": 0.24834682047367096 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.238912433385849, "learning_rate": 2.730462541610867e-05, "loss": 0.1776, "step": 10938, "teacher_loss": 0.17078456282615662 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.4113699793815613, "learning_rate": 2.7303326298514588e-05, "loss": 0.2609, "step": 10939, "teacher_loss": 0.24417494237422943 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.3746951222419739, "learning_rate": 2.7302026898842126e-05, "loss": 0.2841, "step": 10940, "teacher_loss": 0.2740182876586914 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.7371665239334106, "learning_rate": 2.7300727217121068e-05, "loss": 0.761, "step": 10941, "teacher_loss": 0.7636754512786865 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.7845966815948486, "learning_rate": 2.729942725338122e-05, "loss": 0.3151, "step": 10942, "teacher_loss": 0.262956440448761 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.7906073331832886, "learning_rate": 2.7298127007652373e-05, "loss": 0.331, "step": 10943, "teacher_loss": 0.2799391746520996 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.4016439914703369, "learning_rate": 2.729682647996435e-05, "loss": 0.2364, "step": 10944, "teacher_loss": 0.21809351444244385 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.29760733246803284, "learning_rate": 2.729552567034696e-05, "loss": 0.2327, "step": 10945, "teacher_loss": 0.22552502155303955 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.45928603410720825, "learning_rate": 2.729422457883003e-05, "loss": 0.2799, "step": 10946, "teacher_loss": 0.26000452041625977 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.4973798394203186, "learning_rate": 2.72929232054434e-05, "loss": 0.2796, "step": 10947, "teacher_loss": 0.2553505599498749 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.25807061791419983, "learning_rate": 2.7291621550216887e-05, "loss": 0.2343, "step": 10948, "teacher_loss": 0.23163972795009613 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.7279292345046997, "learning_rate": 2.7290319613180348e-05, "loss": 0.8084, "step": 10949, "teacher_loss": 0.8173166513442993 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.4489055573940277, "learning_rate": 2.7289017394363625e-05, "loss": 0.307, "step": 10950, "teacher_loss": 0.2911779582500458 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.3300057351589203, "learning_rate": 2.728771489379658e-05, "loss": 0.2444, "step": 10951, "teacher_loss": 0.23492828011512756 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.2552585005760193, "learning_rate": 2.7286412111509075e-05, "loss": 0.2475, "step": 10952, "teacher_loss": 0.2466193586587906 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.290688157081604, "learning_rate": 2.7285109047530975e-05, "loss": 0.2411, "step": 10953, "teacher_loss": 0.23561015725135803 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.3721437454223633, "learning_rate": 2.7283805701892156e-05, "loss": 0.2463, "step": 10954, "teacher_loss": 0.2323523610830307 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.2926291823387146, "learning_rate": 2.7282502074622505e-05, "loss": 0.2198, "step": 10955, "teacher_loss": 0.2116565853357315 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.48973768949508667, "learning_rate": 2.72811981657519e-05, "loss": 0.2423, "step": 10956, "teacher_loss": 0.21477633714675903 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.6451169848442078, "learning_rate": 2.7279893975310246e-05, "loss": 0.2776, "step": 10957, "teacher_loss": 0.2367827147245407 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.7494860887527466, "learning_rate": 2.727858950332744e-05, "loss": 0.4126, "step": 10958, "teacher_loss": 0.37520962953567505 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.26051971316337585, "learning_rate": 2.727728474983339e-05, "loss": 0.203, "step": 10959, "teacher_loss": 0.19658610224723816 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.4420437216758728, "learning_rate": 2.727597971485801e-05, "loss": 0.262, "step": 10960, "teacher_loss": 0.24203094840049744 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.6790921092033386, "learning_rate": 2.727467439843122e-05, "loss": 0.529, "step": 10961, "teacher_loss": 0.5122976303100586 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.6930454969406128, "learning_rate": 2.7273368800582946e-05, "loss": 0.3163, "step": 10962, "teacher_loss": 0.27444779872894287 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.2740445137023926, "learning_rate": 2.7272062921343123e-05, "loss": 0.4054, "step": 10963, "teacher_loss": 0.4199417233467102 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.4202827215194702, "learning_rate": 2.7270756760741692e-05, "loss": 0.2879, "step": 10964, "teacher_loss": 0.2732202410697937 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.7727272510528564, "learning_rate": 2.72694503188086e-05, "loss": 0.2671, "step": 10965, "teacher_loss": 0.21095682680606842 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.8302296996116638, "learning_rate": 2.7268143595573793e-05, "loss": 0.3053, "step": 10966, "teacher_loss": 0.2469996064901352 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.6615298390388489, "learning_rate": 2.7266836591067237e-05, "loss": 0.2609, "step": 10967, "teacher_loss": 0.2163485586643219 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 1.0277924537658691, "learning_rate": 2.72655293053189e-05, "loss": 0.368, "step": 10968, "teacher_loss": 0.29464367032051086 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.32207390666007996, "learning_rate": 2.7264221738358742e-05, "loss": 0.1983, "step": 10969, "teacher_loss": 0.18452855944633484 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.41837024688720703, "learning_rate": 2.726291389021676e-05, "loss": 0.3084, "step": 10970, "teacher_loss": 0.29618021845817566 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.5088686347007751, "learning_rate": 2.7261605760922918e-05, "loss": 0.2694, "step": 10971, "teacher_loss": 0.24279162287712097 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.30409446358680725, "learning_rate": 2.7260297350507227e-05, "loss": 0.2762, "step": 10972, "teacher_loss": 0.27307072281837463 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.47880637645721436, "learning_rate": 2.725898865899967e-05, "loss": 0.2337, "step": 10973, "teacher_loss": 0.2064475417137146 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.1640702486038208, "learning_rate": 2.725767968643026e-05, "loss": 0.2275, "step": 10974, "teacher_loss": 0.23455876111984253 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.5323233008384705, "learning_rate": 2.7256370432829008e-05, "loss": 0.3299, "step": 10975, "teacher_loss": 0.30745208263397217 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.5228419303894043, "learning_rate": 2.7255060898225924e-05, "loss": 0.3088, "step": 10976, "teacher_loss": 0.2850039005279541 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.30222204327583313, "learning_rate": 2.7253751082651038e-05, "loss": 0.2152, "step": 10977, "teacher_loss": 0.20551280677318573 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.46103575825691223, "learning_rate": 2.7252440986134375e-05, "loss": 0.2674, "step": 10978, "teacher_loss": 0.2458699345588684 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.904411792755127, "learning_rate": 2.7251130608705976e-05, "loss": 0.3183, "step": 10979, "teacher_loss": 0.2531786561012268 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.43219244480133057, "learning_rate": 2.7249819950395886e-05, "loss": 0.2839, "step": 10980, "teacher_loss": 0.26740720868110657 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.48594194650650024, "learning_rate": 2.7248509011234154e-05, "loss": 0.2162, "step": 10981, "teacher_loss": 0.1862303465604782 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.5689194202423096, "learning_rate": 2.7247197791250828e-05, "loss": 0.2238, "step": 10982, "teacher_loss": 0.185488760471344 }, { "compression_loss": 0.0, "epoch": 1.98, "label_loss": 0.4455610513687134, "learning_rate": 2.7245886290475974e-05, "loss": 0.2662, "step": 10983, "teacher_loss": 0.24628344178199768 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.3862900137901306, "learning_rate": 2.7244574508939667e-05, "loss": 0.2248, "step": 10984, "teacher_loss": 0.20683839917182922 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.34636855125427246, "learning_rate": 2.7243262446671976e-05, "loss": 0.2222, "step": 10985, "teacher_loss": 0.20837363600730896 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.45351868867874146, "learning_rate": 2.7241950103702983e-05, "loss": 0.2977, "step": 10986, "teacher_loss": 0.28036606311798096 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.3287506699562073, "learning_rate": 2.7240637480062783e-05, "loss": 0.1607, "step": 10987, "teacher_loss": 0.1419958770275116 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.28295910358428955, "learning_rate": 2.723932457578146e-05, "loss": 0.1632, "step": 10988, "teacher_loss": 0.149948388338089 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.3634149730205536, "learning_rate": 2.7238011390889116e-05, "loss": 0.3118, "step": 10989, "teacher_loss": 0.30611008405685425 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.49249735474586487, "learning_rate": 2.723669792541587e-05, "loss": 0.2111, "step": 10990, "teacher_loss": 0.17982915043830872 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.7725654244422913, "learning_rate": 2.7235384179391824e-05, "loss": 0.3167, "step": 10991, "teacher_loss": 0.2660036087036133 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.32238221168518066, "learning_rate": 2.7234070152847104e-05, "loss": 0.2291, "step": 10992, "teacher_loss": 0.21874716877937317 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.40911489725112915, "learning_rate": 2.7232755845811832e-05, "loss": 0.2856, "step": 10993, "teacher_loss": 0.27186620235443115 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.42562103271484375, "learning_rate": 2.7231441258316145e-05, "loss": 0.3217, "step": 10994, "teacher_loss": 0.310102641582489 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.24604177474975586, "learning_rate": 2.7230126390390187e-05, "loss": 0.2381, "step": 10995, "teacher_loss": 0.23723720014095306 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.1614038050174713, "learning_rate": 2.7228811242064092e-05, "loss": 0.2019, "step": 10996, "teacher_loss": 0.20636524260044098 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.22733375430107117, "learning_rate": 2.7227495813368022e-05, "loss": 0.229, "step": 10997, "teacher_loss": 0.22922906279563904 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.5421076416969299, "learning_rate": 2.7226180104332134e-05, "loss": 0.2649, "step": 10998, "teacher_loss": 0.23409873247146606 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.6794562339782715, "learning_rate": 2.7224864114986592e-05, "loss": 0.3547, "step": 10999, "teacher_loss": 0.3186277449131012 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.5141890645027161, "learning_rate": 2.7223547845361565e-05, "loss": 0.2873, "step": 11000, "teacher_loss": 0.2620975077152252 }, { "epoch": 1.99, "eval_exact_match": 79.30936613055819, "eval_f1": 86.80744384812628, "step": 11000 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.5698481798171997, "learning_rate": 2.7222231295487237e-05, "loss": 0.264, "step": 11001, "teacher_loss": 0.23004421591758728 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.2744436264038086, "learning_rate": 2.722091446539379e-05, "loss": 0.1968, "step": 11002, "teacher_loss": 0.1881275624036789 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.2508719861507416, "learning_rate": 2.721959735511141e-05, "loss": 0.2431, "step": 11003, "teacher_loss": 0.24219286441802979 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.6528825759887695, "learning_rate": 2.7218279964670302e-05, "loss": 0.3563, "step": 11004, "teacher_loss": 0.3233593702316284 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.32213282585144043, "learning_rate": 2.7216962294100668e-05, "loss": 0.2341, "step": 11005, "teacher_loss": 0.2243487536907196 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.4476689100265503, "learning_rate": 2.7215644343432718e-05, "loss": 0.2406, "step": 11006, "teacher_loss": 0.21757693588733673 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.20841731131076813, "learning_rate": 2.7214326112696662e-05, "loss": 0.2285, "step": 11007, "teacher_loss": 0.23071825504302979 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.2793912887573242, "learning_rate": 2.721300760192273e-05, "loss": 0.2365, "step": 11008, "teacher_loss": 0.23173384368419647 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.7197442054748535, "learning_rate": 2.7211688811141152e-05, "loss": 0.3045, "step": 11009, "teacher_loss": 0.25840288400650024 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.6929067373275757, "learning_rate": 2.7210369740382166e-05, "loss": 0.3047, "step": 11010, "teacher_loss": 0.2615973949432373 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.36473339796066284, "learning_rate": 2.7209050389676006e-05, "loss": 0.1835, "step": 11011, "teacher_loss": 0.16336104273796082 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.5672443509101868, "learning_rate": 2.7207730759052925e-05, "loss": 0.3647, "step": 11012, "teacher_loss": 0.34220004081726074 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.5090664625167847, "learning_rate": 2.720641084854318e-05, "loss": 0.2553, "step": 11013, "teacher_loss": 0.227097749710083 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.6856584548950195, "learning_rate": 2.720509065817703e-05, "loss": 0.3141, "step": 11014, "teacher_loss": 0.2728331685066223 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.32951289415359497, "learning_rate": 2.7203770187984746e-05, "loss": 0.222, "step": 11015, "teacher_loss": 0.21004731953144073 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.3552158772945404, "learning_rate": 2.7202449437996596e-05, "loss": 0.234, "step": 11016, "teacher_loss": 0.2205621302127838 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.9253280162811279, "learning_rate": 2.7201128408242866e-05, "loss": 0.3404, "step": 11017, "teacher_loss": 0.275382399559021 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.9888057708740234, "learning_rate": 2.7199807098753846e-05, "loss": 0.3642, "step": 11018, "teacher_loss": 0.2947728633880615 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.5136024355888367, "learning_rate": 2.7198485509559825e-05, "loss": 0.1952, "step": 11019, "teacher_loss": 0.15978918969631195 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.3180859684944153, "learning_rate": 2.71971636406911e-05, "loss": 0.2075, "step": 11020, "teacher_loss": 0.19520969688892365 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.44778770208358765, "learning_rate": 2.7195841492177988e-05, "loss": 0.3122, "step": 11021, "teacher_loss": 0.29713016748428345 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 1.3251254558563232, "learning_rate": 2.7194519064050792e-05, "loss": 0.4342, "step": 11022, "teacher_loss": 0.3351704776287079 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.5996873378753662, "learning_rate": 2.7193196356339837e-05, "loss": 0.2861, "step": 11023, "teacher_loss": 0.25128084421157837 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.4814158082008362, "learning_rate": 2.7191873369075443e-05, "loss": 0.414, "step": 11024, "teacher_loss": 0.406563401222229 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.42363858222961426, "learning_rate": 2.7190550102287953e-05, "loss": 0.3263, "step": 11025, "teacher_loss": 0.31544697284698486 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.6484353542327881, "learning_rate": 2.7189226556007692e-05, "loss": 0.3682, "step": 11026, "teacher_loss": 0.33706796169281006 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.4444020688533783, "learning_rate": 2.718790273026501e-05, "loss": 0.3315, "step": 11027, "teacher_loss": 0.31896984577178955 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.41422030329704285, "learning_rate": 2.7186578625090266e-05, "loss": 0.3275, "step": 11028, "teacher_loss": 0.31787851452827454 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.4196636974811554, "learning_rate": 2.7185254240513806e-05, "loss": 0.3374, "step": 11029, "teacher_loss": 0.3282148838043213 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.5356686115264893, "learning_rate": 2.7183929576566e-05, "loss": 0.2062, "step": 11030, "teacher_loss": 0.16959501802921295 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.40177783370018005, "learning_rate": 2.718260463327722e-05, "loss": 0.3218, "step": 11031, "teacher_loss": 0.3129402697086334 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.32416480779647827, "learning_rate": 2.7181279410677842e-05, "loss": 0.1872, "step": 11032, "teacher_loss": 0.17201584577560425 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.47777456045150757, "learning_rate": 2.7179953908798246e-05, "loss": 0.2928, "step": 11033, "teacher_loss": 0.2722725570201874 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.27770549058914185, "learning_rate": 2.717862812766882e-05, "loss": 0.2192, "step": 11034, "teacher_loss": 0.21273157000541687 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.583075225353241, "learning_rate": 2.7177302067319977e-05, "loss": 0.4403, "step": 11035, "teacher_loss": 0.4244568943977356 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.7338087558746338, "learning_rate": 2.71759757277821e-05, "loss": 0.2622, "step": 11036, "teacher_loss": 0.20981313288211823 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.5577709674835205, "learning_rate": 2.7174649109085605e-05, "loss": 0.4409, "step": 11037, "teacher_loss": 0.42786386609077454 }, { "compression_loss": 0.0, "epoch": 1.99, "label_loss": 0.1353207528591156, "learning_rate": 2.7173322211260906e-05, "loss": 0.1885, "step": 11038, "teacher_loss": 0.19436055421829224 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.24597877264022827, "learning_rate": 2.7171995034338427e-05, "loss": 0.2351, "step": 11039, "teacher_loss": 0.23393619060516357 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.3176301419734955, "learning_rate": 2.7170667578348598e-05, "loss": 0.2409, "step": 11040, "teacher_loss": 0.23238921165466309 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.6035487651824951, "learning_rate": 2.7169339843321846e-05, "loss": 0.3282, "step": 11041, "teacher_loss": 0.29760265350341797 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.3541339933872223, "learning_rate": 2.716801182928862e-05, "loss": 0.2158, "step": 11042, "teacher_loss": 0.20044703781604767 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.42789238691329956, "learning_rate": 2.7166683536279363e-05, "loss": 0.3725, "step": 11043, "teacher_loss": 0.3663749098777771 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.3658503592014313, "learning_rate": 2.7165354964324534e-05, "loss": 0.3898, "step": 11044, "teacher_loss": 0.3924328088760376 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.7615195512771606, "learning_rate": 2.7164026113454585e-05, "loss": 0.3247, "step": 11045, "teacher_loss": 0.27612221240997314 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.6370681524276733, "learning_rate": 2.7162696983699988e-05, "loss": 0.3435, "step": 11046, "teacher_loss": 0.31082576513290405 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.29654231667518616, "learning_rate": 2.7161367575091217e-05, "loss": 0.2228, "step": 11047, "teacher_loss": 0.21461719274520874 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.3899002969264984, "learning_rate": 2.7160037887658743e-05, "loss": 0.2917, "step": 11048, "teacher_loss": 0.280734121799469 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.15593791007995605, "learning_rate": 2.715870792143307e-05, "loss": 0.1782, "step": 11049, "teacher_loss": 0.18067193031311035 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.7147342562675476, "learning_rate": 2.7157377676444664e-05, "loss": 0.3088, "step": 11050, "teacher_loss": 0.26368898153305054 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.25797319412231445, "learning_rate": 2.7156047152724046e-05, "loss": 0.2281, "step": 11051, "teacher_loss": 0.22473329305648804 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.7149232625961304, "learning_rate": 2.715471635030171e-05, "loss": 0.3682, "step": 11052, "teacher_loss": 0.3297004997730255 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.3798366189002991, "learning_rate": 2.715338526920817e-05, "loss": 0.2177, "step": 11053, "teacher_loss": 0.19966942071914673 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.36372238397598267, "learning_rate": 2.7152053909473945e-05, "loss": 0.2595, "step": 11054, "teacher_loss": 0.24791128933429718 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.1740797460079193, "learning_rate": 2.715072227112956e-05, "loss": 0.2234, "step": 11055, "teacher_loss": 0.2288346290588379 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.2681766450405121, "learning_rate": 2.7149390354205533e-05, "loss": 0.2768, "step": 11056, "teacher_loss": 0.277803897857666 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.4045276939868927, "learning_rate": 2.7148058158732423e-05, "loss": 0.2587, "step": 11057, "teacher_loss": 0.24251475930213928 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.18611067533493042, "learning_rate": 2.7146725684740754e-05, "loss": 0.2105, "step": 11058, "teacher_loss": 0.2132633924484253 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.7857559323310852, "learning_rate": 2.7145392932261085e-05, "loss": 0.3207, "step": 11059, "teacher_loss": 0.2689828872680664 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.35701483488082886, "learning_rate": 2.714405990132397e-05, "loss": 0.2309, "step": 11060, "teacher_loss": 0.21685263514518738 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.35951870679855347, "learning_rate": 2.7142726591959968e-05, "loss": 0.1975, "step": 11061, "teacher_loss": 0.17951303720474243 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.5068052411079407, "learning_rate": 2.7141393004199652e-05, "loss": 0.1986, "step": 11062, "teacher_loss": 0.16437765955924988 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.558967113494873, "learning_rate": 2.71400591380736e-05, "loss": 0.2751, "step": 11063, "teacher_loss": 0.243523508310318 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.9196529388427734, "learning_rate": 2.7138724993612386e-05, "loss": 0.2789, "step": 11064, "teacher_loss": 0.20772764086723328 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.3080746531486511, "learning_rate": 2.7137390570846608e-05, "loss": 0.2311, "step": 11065, "teacher_loss": 0.22255653142929077 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.4238111078739166, "learning_rate": 2.7136055869806847e-05, "loss": 0.278, "step": 11066, "teacher_loss": 0.26175373792648315 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.12689395248889923, "learning_rate": 2.7134720890523713e-05, "loss": 0.1946, "step": 11067, "teacher_loss": 0.20208588242530823 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.1391545832157135, "learning_rate": 2.713338563302781e-05, "loss": 0.156, "step": 11068, "teacher_loss": 0.15789633989334106 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.4829387068748474, "learning_rate": 2.713205009734975e-05, "loss": 0.3892, "step": 11069, "teacher_loss": 0.37878715991973877 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.4808875024318695, "learning_rate": 2.713071428352016e-05, "loss": 0.2457, "step": 11070, "teacher_loss": 0.21954245865345 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.1887083202600479, "learning_rate": 2.712937819156966e-05, "loss": 0.2362, "step": 11071, "teacher_loss": 0.24152205884456635 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.6208519339561462, "learning_rate": 2.7128041821528884e-05, "loss": 0.4896, "step": 11072, "teacher_loss": 0.4749906659126282 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.34722474217414856, "learning_rate": 2.7126705173428467e-05, "loss": 0.2354, "step": 11073, "teacher_loss": 0.22301414608955383 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.9872570037841797, "learning_rate": 2.712536824729906e-05, "loss": 0.3112, "step": 11074, "teacher_loss": 0.23612657189369202 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.4689207673072815, "learning_rate": 2.712403104317132e-05, "loss": 0.2822, "step": 11075, "teacher_loss": 0.261398583650589 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.6537783145904541, "learning_rate": 2.712269356107589e-05, "loss": 0.283, "step": 11076, "teacher_loss": 0.24180655181407928 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.5218496322631836, "learning_rate": 2.7121355801043444e-05, "loss": 0.2022, "step": 11077, "teacher_loss": 0.1666392832994461 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.25220993161201477, "learning_rate": 2.7120017763104648e-05, "loss": 0.2699, "step": 11078, "teacher_loss": 0.2718440294265747 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.26389414072036743, "learning_rate": 2.711867944729019e-05, "loss": 0.1613, "step": 11079, "teacher_loss": 0.1499488800764084 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.6802481412887573, "learning_rate": 2.711734085363074e-05, "loss": 0.3652, "step": 11080, "teacher_loss": 0.3302406072616577 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.40174198150634766, "learning_rate": 2.7116001982156995e-05, "loss": 0.2603, "step": 11081, "teacher_loss": 0.24454142153263092 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.267318457365036, "learning_rate": 2.711466283289965e-05, "loss": 0.2349, "step": 11082, "teacher_loss": 0.2312580794095993 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.5143426060676575, "learning_rate": 2.7113323405889406e-05, "loss": 0.3014, "step": 11083, "teacher_loss": 0.27775144577026367 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 1.008647084236145, "learning_rate": 2.7111983701156978e-05, "loss": 0.4198, "step": 11084, "teacher_loss": 0.3543638586997986 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.6783154606819153, "learning_rate": 2.7110643718733075e-05, "loss": 0.2506, "step": 11085, "teacher_loss": 0.2030808925628662 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.4924120306968689, "learning_rate": 2.7109303458648422e-05, "loss": 0.3429, "step": 11086, "teacher_loss": 0.3262344002723694 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.28460320830345154, "learning_rate": 2.7107962920933748e-05, "loss": 0.1657, "step": 11087, "teacher_loss": 0.15247558057308197 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.18472126126289368, "learning_rate": 2.7106622105619787e-05, "loss": 0.1922, "step": 11088, "teacher_loss": 0.19304703176021576 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.20212475955486298, "learning_rate": 2.7105281012737272e-05, "loss": 0.1817, "step": 11089, "teacher_loss": 0.17938190698623657 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.3637077212333679, "learning_rate": 2.7103939642316963e-05, "loss": 0.3078, "step": 11090, "teacher_loss": 0.30162686109542847 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.4526901841163635, "learning_rate": 2.7102597994389604e-05, "loss": 0.2903, "step": 11091, "teacher_loss": 0.2723066210746765 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.5648419857025146, "learning_rate": 2.7101256068985955e-05, "loss": 0.2764, "step": 11092, "teacher_loss": 0.2443307489156723 }, { "compression_loss": 0.0, "epoch": 2.0, "label_loss": 0.22369495034217834, "learning_rate": 2.7099913866136793e-05, "loss": 0.1419, "step": 11093, "teacher_loss": 0.132841557264328 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.4675179123878479, "learning_rate": 2.7098571385872882e-05, "loss": 0.2337, "step": 11094, "teacher_loss": 0.20773647725582123 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.6255444288253784, "learning_rate": 2.7097228628225e-05, "loss": 0.3842, "step": 11095, "teacher_loss": 0.35737138986587524 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.3477962017059326, "learning_rate": 2.7095885593223934e-05, "loss": 0.2113, "step": 11096, "teacher_loss": 0.1961199939250946 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.5160927176475525, "learning_rate": 2.7094542280900477e-05, "loss": 0.2901, "step": 11097, "teacher_loss": 0.26497629284858704 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.4052009582519531, "learning_rate": 2.7093198691285433e-05, "loss": 0.2761, "step": 11098, "teacher_loss": 0.26171940565109253 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.17650824785232544, "learning_rate": 2.70918548244096e-05, "loss": 0.2767, "step": 11099, "teacher_loss": 0.28787118196487427 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.5297001600265503, "learning_rate": 2.709051068030378e-05, "loss": 0.2931, "step": 11100, "teacher_loss": 0.2667995095252991 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.9438211917877197, "learning_rate": 2.7089166258998808e-05, "loss": 0.725, "step": 11101, "teacher_loss": 0.7007371187210083 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.213974267244339, "learning_rate": 2.7087821560525492e-05, "loss": 0.2452, "step": 11102, "teacher_loss": 0.24863901734352112 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.7154766321182251, "learning_rate": 2.7086476584914675e-05, "loss": 0.3544, "step": 11103, "teacher_loss": 0.31429457664489746 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.14646224677562714, "learning_rate": 2.708513133219718e-05, "loss": 0.1612, "step": 11104, "teacher_loss": 0.16288727521896362 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.7129599452018738, "learning_rate": 2.708378580240386e-05, "loss": 0.5867, "step": 11105, "teacher_loss": 0.5726842284202576 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.7975394129753113, "learning_rate": 2.7082439995565565e-05, "loss": 0.2192, "step": 11106, "teacher_loss": 0.15496890246868134 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.20353181660175323, "learning_rate": 2.708109391171314e-05, "loss": 0.2028, "step": 11107, "teacher_loss": 0.2027340829372406 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.33372199535369873, "learning_rate": 2.7079747550877455e-05, "loss": 0.2718, "step": 11108, "teacher_loss": 0.26488804817199707 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.2949153780937195, "learning_rate": 2.7078400913089376e-05, "loss": 0.2564, "step": 11109, "teacher_loss": 0.25209251046180725 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.6103252172470093, "learning_rate": 2.7077053998379778e-05, "loss": 0.4434, "step": 11110, "teacher_loss": 0.42488181591033936 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 1.1178202629089355, "learning_rate": 2.707570680677954e-05, "loss": 0.3193, "step": 11111, "teacher_loss": 0.23062273859977722 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.5445696115493774, "learning_rate": 2.707435933831955e-05, "loss": 0.3803, "step": 11112, "teacher_loss": 0.3620012402534485 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.6099430322647095, "learning_rate": 2.7073011593030697e-05, "loss": 0.385, "step": 11113, "teacher_loss": 0.3600549101829529 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.5297979712486267, "learning_rate": 2.707166357094389e-05, "loss": 0.2802, "step": 11114, "teacher_loss": 0.2524702250957489 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.540873110294342, "learning_rate": 2.7070315272090027e-05, "loss": 0.2573, "step": 11115, "teacher_loss": 0.22574713826179504 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.33703333139419556, "learning_rate": 2.7068966696500025e-05, "loss": 0.2914, "step": 11116, "teacher_loss": 0.28633999824523926 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.17114807665348053, "learning_rate": 2.70676178442048e-05, "loss": 0.1954, "step": 11117, "teacher_loss": 0.19810548424720764 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.7181960344314575, "learning_rate": 2.706626871523528e-05, "loss": 0.2806, "step": 11118, "teacher_loss": 0.231967955827713 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.2218027412891388, "learning_rate": 2.7064919309622395e-05, "loss": 0.2652, "step": 11119, "teacher_loss": 0.2700613737106323 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.3960804045200348, "learning_rate": 2.7063569627397082e-05, "loss": 0.2457, "step": 11120, "teacher_loss": 0.2289600670337677 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.22098730504512787, "learning_rate": 2.706221966859029e-05, "loss": 0.2429, "step": 11121, "teacher_loss": 0.24536684155464172 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.7551677823066711, "learning_rate": 2.7060869433232957e-05, "loss": 0.4162, "step": 11122, "teacher_loss": 0.3785792589187622 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.8047893047332764, "learning_rate": 2.7059518921356053e-05, "loss": 0.5408, "step": 11123, "teacher_loss": 0.5114735960960388 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.33568909764289856, "learning_rate": 2.7058168132990536e-05, "loss": 0.2191, "step": 11124, "teacher_loss": 0.2061665952205658 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.2605346739292145, "learning_rate": 2.7056817068167377e-05, "loss": 0.2017, "step": 11125, "teacher_loss": 0.19519099593162537 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.516914427280426, "learning_rate": 2.705546572691755e-05, "loss": 0.3492, "step": 11126, "teacher_loss": 0.3305789828300476 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.2928575277328491, "learning_rate": 2.705411410927204e-05, "loss": 0.3136, "step": 11127, "teacher_loss": 0.3159273862838745 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.39623790979385376, "learning_rate": 2.7052762215261828e-05, "loss": 0.2864, "step": 11128, "teacher_loss": 0.2741583585739136 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.3782995939254761, "learning_rate": 2.705141004491792e-05, "loss": 0.2384, "step": 11129, "teacher_loss": 0.22285538911819458 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.28563278913497925, "learning_rate": 2.705005759827131e-05, "loss": 0.2166, "step": 11130, "teacher_loss": 0.20897230505943298 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.44995877146720886, "learning_rate": 2.7048704875353004e-05, "loss": 0.3323, "step": 11131, "teacher_loss": 0.31926870346069336 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.3307040333747864, "learning_rate": 2.7047351876194022e-05, "loss": 0.2117, "step": 11132, "teacher_loss": 0.19846510887145996 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.42214131355285645, "learning_rate": 2.7045998600825382e-05, "loss": 0.2702, "step": 11133, "teacher_loss": 0.25335264205932617 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.6219674348831177, "learning_rate": 2.7044645049278112e-05, "loss": 0.519, "step": 11134, "teacher_loss": 0.5075709223747253 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.2580287456512451, "learning_rate": 2.7043291221583237e-05, "loss": 0.2115, "step": 11135, "teacher_loss": 0.20631955564022064 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.11969958990812302, "learning_rate": 2.7041937117771807e-05, "loss": 0.152, "step": 11136, "teacher_loss": 0.15562891960144043 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.9449215531349182, "learning_rate": 2.7040582737874857e-05, "loss": 0.6072, "step": 11137, "teacher_loss": 0.5696402788162231 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.6992002725601196, "learning_rate": 2.7039228081923448e-05, "loss": 0.3539, "step": 11138, "teacher_loss": 0.3154977560043335 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.3310987949371338, "learning_rate": 2.7037873149948637e-05, "loss": 0.4145, "step": 11139, "teacher_loss": 0.4237608313560486 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.2851220369338989, "learning_rate": 2.7036517941981486e-05, "loss": 0.2197, "step": 11140, "teacher_loss": 0.2124020904302597 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.835666835308075, "learning_rate": 2.7035162458053057e-05, "loss": 0.5865, "step": 11141, "teacher_loss": 0.558761715888977 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.26387423276901245, "learning_rate": 2.7033806698194444e-05, "loss": 0.222, "step": 11142, "teacher_loss": 0.21738894283771515 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.501766562461853, "learning_rate": 2.703245066243672e-05, "loss": 0.3029, "step": 11143, "teacher_loss": 0.28080326318740845 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.2797282338142395, "learning_rate": 2.703109435081098e-05, "loss": 0.1707, "step": 11144, "teacher_loss": 0.1586271971464157 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.5871778726577759, "learning_rate": 2.7029737763348316e-05, "loss": 0.477, "step": 11145, "teacher_loss": 0.4648021161556244 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.4689687490463257, "learning_rate": 2.702838090007983e-05, "loss": 0.2785, "step": 11146, "teacher_loss": 0.2573047876358032 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.6790783405303955, "learning_rate": 2.702702376103664e-05, "loss": 0.3453, "step": 11147, "teacher_loss": 0.3082126975059509 }, { "compression_loss": 0.0, "epoch": 2.01, "label_loss": 0.5030022859573364, "learning_rate": 2.7025666346249845e-05, "loss": 0.2926, "step": 11148, "teacher_loss": 0.26927411556243896 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.2587215006351471, "learning_rate": 2.702430865575058e-05, "loss": 0.2026, "step": 11149, "teacher_loss": 0.19634869694709778 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.2933503985404968, "learning_rate": 2.7022950689569968e-05, "loss": 0.2044, "step": 11150, "teacher_loss": 0.19447465240955353 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.20631949603557587, "learning_rate": 2.7021592447739143e-05, "loss": 0.183, "step": 11151, "teacher_loss": 0.18043893575668335 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.4691442847251892, "learning_rate": 2.702023393028925e-05, "loss": 0.2829, "step": 11152, "teacher_loss": 0.2621912360191345 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5620827674865723, "learning_rate": 2.7018875137251424e-05, "loss": 0.4542, "step": 11153, "teacher_loss": 0.442263126373291 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.4237462282180786, "learning_rate": 2.7017516068656832e-05, "loss": 0.4438, "step": 11154, "teacher_loss": 0.44599688053131104 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.10700087249279022, "learning_rate": 2.7016156724536625e-05, "loss": 0.1523, "step": 11155, "teacher_loss": 0.15731433033943176 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.3748167157173157, "learning_rate": 2.7014797104921967e-05, "loss": 0.2594, "step": 11156, "teacher_loss": 0.24663017690181732 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.6609557867050171, "learning_rate": 2.701343720984404e-05, "loss": 0.2679, "step": 11157, "teacher_loss": 0.22418633103370667 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 1.7564237117767334, "learning_rate": 2.701207703933401e-05, "loss": 0.4517, "step": 11158, "teacher_loss": 0.30674880743026733 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.40274733304977417, "learning_rate": 2.701071659342307e-05, "loss": 0.1648, "step": 11159, "teacher_loss": 0.1384095549583435 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.8992514610290527, "learning_rate": 2.700935587214241e-05, "loss": 0.3257, "step": 11160, "teacher_loss": 0.2620210349559784 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.35065388679504395, "learning_rate": 2.7007994875523222e-05, "loss": 0.2666, "step": 11161, "teacher_loss": 0.25723132491111755 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.6221776604652405, "learning_rate": 2.7006633603596712e-05, "loss": 0.2474, "step": 11162, "teacher_loss": 0.20573300123214722 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5098869204521179, "learning_rate": 2.7005272056394096e-05, "loss": 0.2631, "step": 11163, "teacher_loss": 0.23568841814994812 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.40784379839897156, "learning_rate": 2.7003910233946588e-05, "loss": 0.385, "step": 11164, "teacher_loss": 0.38241130113601685 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.3961941599845886, "learning_rate": 2.70025481362854e-05, "loss": 0.2764, "step": 11165, "teacher_loss": 0.263124018907547 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.17299503087997437, "learning_rate": 2.7001185763441773e-05, "loss": 0.2039, "step": 11166, "teacher_loss": 0.2072961926460266 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.36186981201171875, "learning_rate": 2.699982311544694e-05, "loss": 0.3029, "step": 11167, "teacher_loss": 0.296355664730072 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.18529130518436432, "learning_rate": 2.699846019233214e-05, "loss": 0.1953, "step": 11168, "teacher_loss": 0.1963636577129364 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.6550931930541992, "learning_rate": 2.6997096994128616e-05, "loss": 0.2809, "step": 11169, "teacher_loss": 0.23934370279312134 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.6422407627105713, "learning_rate": 2.699573352086763e-05, "loss": 0.2547, "step": 11170, "teacher_loss": 0.2115880697965622 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.37764179706573486, "learning_rate": 2.699436977258044e-05, "loss": 0.2333, "step": 11171, "teacher_loss": 0.21725022792816162 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5187445282936096, "learning_rate": 2.699300574929831e-05, "loss": 0.1997, "step": 11172, "teacher_loss": 0.1642317771911621 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.23255422711372375, "learning_rate": 2.699164145105252e-05, "loss": 0.2217, "step": 11173, "teacher_loss": 0.22048154473304749 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.3505420684814453, "learning_rate": 2.699027687787434e-05, "loss": 0.2619, "step": 11174, "teacher_loss": 0.2520436644554138 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5439131855964661, "learning_rate": 2.698891202979506e-05, "loss": 0.2504, "step": 11175, "teacher_loss": 0.2177654504776001 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.49192696809768677, "learning_rate": 2.698754690684597e-05, "loss": 0.25, "step": 11176, "teacher_loss": 0.22315248847007751 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.7508048415184021, "learning_rate": 2.6986181509058376e-05, "loss": 0.4346, "step": 11177, "teacher_loss": 0.39951199293136597 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.9282213449478149, "learning_rate": 2.6984815836463572e-05, "loss": 0.3638, "step": 11178, "teacher_loss": 0.30112171173095703 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.9537012577056885, "learning_rate": 2.6983449889092874e-05, "loss": 0.4629, "step": 11179, "teacher_loss": 0.40836164355278015 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5213973522186279, "learning_rate": 2.69820836669776e-05, "loss": 0.3205, "step": 11180, "teacher_loss": 0.2982255816459656 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.6989810466766357, "learning_rate": 2.698071717014907e-05, "loss": 0.3349, "step": 11181, "teacher_loss": 0.29449018836021423 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.3838341236114502, "learning_rate": 2.6979350398638616e-05, "loss": 0.2182, "step": 11182, "teacher_loss": 0.19980354607105255 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5215386748313904, "learning_rate": 2.6977983352477574e-05, "loss": 0.2612, "step": 11183, "teacher_loss": 0.23231905698776245 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.20742267370224, "learning_rate": 2.697661603169728e-05, "loss": 0.1814, "step": 11184, "teacher_loss": 0.17850598692893982 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5252453088760376, "learning_rate": 2.6975248436329097e-05, "loss": 0.2202, "step": 11185, "teacher_loss": 0.1863296627998352 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.4949405789375305, "learning_rate": 2.6973880566404364e-05, "loss": 0.3584, "step": 11186, "teacher_loss": 0.3432287573814392 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5451057553291321, "learning_rate": 2.6972512421954453e-05, "loss": 0.2823, "step": 11187, "teacher_loss": 0.25311341881752014 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5243842601776123, "learning_rate": 2.6971144003010725e-05, "loss": 0.558, "step": 11188, "teacher_loss": 0.5617440938949585 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.27836930751800537, "learning_rate": 2.6969775309604558e-05, "loss": 0.2803, "step": 11189, "teacher_loss": 0.2805447578430176 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.555506706237793, "learning_rate": 2.696840634176733e-05, "loss": 0.3163, "step": 11190, "teacher_loss": 0.28967010974884033 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.21295493841171265, "learning_rate": 2.696703709953043e-05, "loss": 0.1706, "step": 11191, "teacher_loss": 0.16592423617839813 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.6925020217895508, "learning_rate": 2.6965667582925247e-05, "loss": 0.3616, "step": 11192, "teacher_loss": 0.3248024582862854 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.3998020589351654, "learning_rate": 2.696429779198318e-05, "loss": 0.2529, "step": 11193, "teacher_loss": 0.23656976222991943 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.6629601716995239, "learning_rate": 2.6962927726735637e-05, "loss": 0.3501, "step": 11194, "teacher_loss": 0.31535208225250244 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.25233015418052673, "learning_rate": 2.6961557387214022e-05, "loss": 0.3444, "step": 11195, "teacher_loss": 0.3546769917011261 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.670173704624176, "learning_rate": 2.6960186773449767e-05, "loss": 0.209, "step": 11196, "teacher_loss": 0.15775738656520844 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.3421512544155121, "learning_rate": 2.6958815885474285e-05, "loss": 0.197, "step": 11197, "teacher_loss": 0.18090704083442688 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.3140241205692291, "learning_rate": 2.6957444723319005e-05, "loss": 0.3106, "step": 11198, "teacher_loss": 0.3102579116821289 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.6438047885894775, "learning_rate": 2.6956073287015373e-05, "loss": 0.279, "step": 11199, "teacher_loss": 0.2384609878063202 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5537392497062683, "learning_rate": 2.6954701576594827e-05, "loss": 0.2588, "step": 11200, "teacher_loss": 0.22598521411418915 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.28569895029067993, "learning_rate": 2.695332959208881e-05, "loss": 0.232, "step": 11201, "teacher_loss": 0.2260473668575287 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.5608729124069214, "learning_rate": 2.695195733352879e-05, "loss": 0.2621, "step": 11202, "teacher_loss": 0.2289191484451294 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.29268136620521545, "learning_rate": 2.695058480094622e-05, "loss": 0.2642, "step": 11203, "teacher_loss": 0.2610268294811249 }, { "compression_loss": 0.0, "epoch": 2.02, "label_loss": 0.8299776911735535, "learning_rate": 2.6949211994372566e-05, "loss": 0.3556, "step": 11204, "teacher_loss": 0.3028719127178192 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.461181640625, "learning_rate": 2.6947838913839314e-05, "loss": 0.373, "step": 11205, "teacher_loss": 0.3632575571537018 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.9282543063163757, "learning_rate": 2.6946465559377934e-05, "loss": 0.3246, "step": 11206, "teacher_loss": 0.25754567980766296 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.5561931133270264, "learning_rate": 2.6945091931019916e-05, "loss": 0.3541, "step": 11207, "teacher_loss": 0.33169811964035034 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.3426342010498047, "learning_rate": 2.6943718028796752e-05, "loss": 0.2104, "step": 11208, "teacher_loss": 0.1957625299692154 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.6001682281494141, "learning_rate": 2.6942343852739942e-05, "loss": 0.3417, "step": 11209, "teacher_loss": 0.3130132555961609 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.7260804772377014, "learning_rate": 2.6940969402880993e-05, "loss": 0.246, "step": 11210, "teacher_loss": 0.19261445105075836 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.4389238953590393, "learning_rate": 2.693959467925142e-05, "loss": 0.2032, "step": 11211, "teacher_loss": 0.17700113356113434 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.5990551710128784, "learning_rate": 2.6938219681882733e-05, "loss": 0.3283, "step": 11212, "teacher_loss": 0.29823437333106995 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.27416127920150757, "learning_rate": 2.6936844410806463e-05, "loss": 0.2293, "step": 11213, "teacher_loss": 0.22437047958374023 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.45993781089782715, "learning_rate": 2.6935468866054142e-05, "loss": 0.3432, "step": 11214, "teacher_loss": 0.33028265833854675 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.9485346078872681, "learning_rate": 2.69340930476573e-05, "loss": 0.3582, "step": 11215, "teacher_loss": 0.29263997077941895 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.334402859210968, "learning_rate": 2.693271695564749e-05, "loss": 0.1993, "step": 11216, "teacher_loss": 0.184321328997612 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.3555179834365845, "learning_rate": 2.693134059005625e-05, "loss": 0.2669, "step": 11217, "teacher_loss": 0.2570229768753052 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.2551718056201935, "learning_rate": 2.692996395091515e-05, "loss": 0.2988, "step": 11218, "teacher_loss": 0.30368751287460327 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.724045991897583, "learning_rate": 2.6928587038255734e-05, "loss": 0.3357, "step": 11219, "teacher_loss": 0.29252979159355164 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.5547541379928589, "learning_rate": 2.692720985210959e-05, "loss": 0.2407, "step": 11220, "teacher_loss": 0.20585399866104126 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.4354609251022339, "learning_rate": 2.692583239250828e-05, "loss": 0.2408, "step": 11221, "teacher_loss": 0.21916991472244263 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.27566632628440857, "learning_rate": 2.692445465948339e-05, "loss": 0.2416, "step": 11222, "teacher_loss": 0.2377694696187973 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.5918866395950317, "learning_rate": 2.6923076653066503e-05, "loss": 0.3038, "step": 11223, "teacher_loss": 0.27177971601486206 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.5092679262161255, "learning_rate": 2.6921698373289217e-05, "loss": 0.2093, "step": 11224, "teacher_loss": 0.1759694218635559 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.21625277400016785, "learning_rate": 2.692031982018313e-05, "loss": 0.2248, "step": 11225, "teacher_loss": 0.22579748928546906 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.39595478773117065, "learning_rate": 2.691894099377985e-05, "loss": 0.2129, "step": 11226, "teacher_loss": 0.1925710290670395 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.41937726736068726, "learning_rate": 2.691756189411099e-05, "loss": 0.389, "step": 11227, "teacher_loss": 0.38563358783721924 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.8448793888092041, "learning_rate": 2.6916182521208164e-05, "loss": 0.3935, "step": 11228, "teacher_loss": 0.3433764576911926 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.18278658390045166, "learning_rate": 2.6914802875103003e-05, "loss": 0.192, "step": 11229, "teacher_loss": 0.1929853856563568 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.27039605379104614, "learning_rate": 2.691342295582713e-05, "loss": 0.2216, "step": 11230, "teacher_loss": 0.21617646515369415 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.3692094683647156, "learning_rate": 2.6912042763412185e-05, "loss": 0.1878, "step": 11231, "teacher_loss": 0.16764050722122192 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.265606164932251, "learning_rate": 2.6910662297889818e-05, "loss": 0.1987, "step": 11232, "teacher_loss": 0.1912379264831543 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.3151041269302368, "learning_rate": 2.6909281559291672e-05, "loss": 0.2104, "step": 11233, "teacher_loss": 0.19877415895462036 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.28362464904785156, "learning_rate": 2.6907900547649406e-05, "loss": 0.2001, "step": 11234, "teacher_loss": 0.1907707005739212 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.4875852167606354, "learning_rate": 2.6906519262994683e-05, "loss": 0.3403, "step": 11235, "teacher_loss": 0.32390546798706055 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.3244495987892151, "learning_rate": 2.690513770535917e-05, "loss": 0.2258, "step": 11236, "teacher_loss": 0.21487928926944733 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.23239167034626007, "learning_rate": 2.690375587477455e-05, "loss": 0.2147, "step": 11237, "teacher_loss": 0.21272554993629456 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.33954235911369324, "learning_rate": 2.690237377127249e-05, "loss": 0.1878, "step": 11238, "teacher_loss": 0.1709732711315155 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.42640507221221924, "learning_rate": 2.690099139488468e-05, "loss": 0.2295, "step": 11239, "teacher_loss": 0.2075849324464798 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.25878241658210754, "learning_rate": 2.6899608745642823e-05, "loss": 0.1851, "step": 11240, "teacher_loss": 0.17694270610809326 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.5628886222839355, "learning_rate": 2.6898225823578616e-05, "loss": 0.2964, "step": 11241, "teacher_loss": 0.26674625277519226 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.5313345193862915, "learning_rate": 2.6896842628723766e-05, "loss": 0.2694, "step": 11242, "teacher_loss": 0.24032220244407654 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.2580585777759552, "learning_rate": 2.6895459161109978e-05, "loss": 0.2269, "step": 11243, "teacher_loss": 0.22342915832996368 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.3754185438156128, "learning_rate": 2.689407542076898e-05, "loss": 0.2921, "step": 11244, "teacher_loss": 0.2828599214553833 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.29227176308631897, "learning_rate": 2.689269140773249e-05, "loss": 0.24, "step": 11245, "teacher_loss": 0.23417061567306519 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.370419442653656, "learning_rate": 2.689130712203224e-05, "loss": 0.2153, "step": 11246, "teacher_loss": 0.19805026054382324 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.20756079256534576, "learning_rate": 2.688992256369997e-05, "loss": 0.2225, "step": 11247, "teacher_loss": 0.22415073215961456 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.2742537260055542, "learning_rate": 2.688853773276743e-05, "loss": 0.2119, "step": 11248, "teacher_loss": 0.2049185335636139 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.8833494186401367, "learning_rate": 2.6887152629266354e-05, "loss": 0.5107, "step": 11249, "teacher_loss": 0.4692646265029907 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.5896503925323486, "learning_rate": 2.6885767253228515e-05, "loss": 0.3619, "step": 11250, "teacher_loss": 0.33654725551605225 }, { "epoch": 2.03, "eval_exact_match": 79.30936613055819, "eval_f1": 86.97385439302845, "step": 11250 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.33625927567481995, "learning_rate": 2.688438160468567e-05, "loss": 0.3439, "step": 11251, "teacher_loss": 0.34474360942840576 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.45378538966178894, "learning_rate": 2.688299568366958e-05, "loss": 0.3777, "step": 11252, "teacher_loss": 0.3692609965801239 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.03842146694660187, "learning_rate": 2.688160949021203e-05, "loss": 0.1806, "step": 11253, "teacher_loss": 0.19644439220428467 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.5745305418968201, "learning_rate": 2.6880223024344798e-05, "loss": 0.3926, "step": 11254, "teacher_loss": 0.37234410643577576 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.37490731477737427, "learning_rate": 2.6878836286099665e-05, "loss": 0.2191, "step": 11255, "teacher_loss": 0.20183223485946655 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.6139047145843506, "learning_rate": 2.6877449275508435e-05, "loss": 0.3365, "step": 11256, "teacher_loss": 0.30570706725120544 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.7160766124725342, "learning_rate": 2.6876061992602903e-05, "loss": 0.9038, "step": 11257, "teacher_loss": 0.9247101545333862 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.6901978254318237, "learning_rate": 2.6874674437414876e-05, "loss": 0.3751, "step": 11258, "teacher_loss": 0.34007206559181213 }, { "compression_loss": 0.0, "epoch": 2.03, "label_loss": 0.3208482265472412, "learning_rate": 2.6873286609976165e-05, "loss": 0.2961, "step": 11259, "teacher_loss": 0.293396532535553 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.4595295488834381, "learning_rate": 2.6871898510318588e-05, "loss": 0.5129, "step": 11260, "teacher_loss": 0.5188044309616089 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.24841484427452087, "learning_rate": 2.6870510138473977e-05, "loss": 0.1989, "step": 11261, "teacher_loss": 0.19337168335914612 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.6570264101028442, "learning_rate": 2.6869121494474152e-05, "loss": 0.2866, "step": 11262, "teacher_loss": 0.24548864364624023 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.5054397583007812, "learning_rate": 2.686773257835096e-05, "loss": 0.2242, "step": 11263, "teacher_loss": 0.1929645538330078 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.3450898230075836, "learning_rate": 2.6866343390136245e-05, "loss": 0.1972, "step": 11264, "teacher_loss": 0.18075133860111237 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.6953567862510681, "learning_rate": 2.686495392986185e-05, "loss": 0.3408, "step": 11265, "teacher_loss": 0.30141323804855347 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.6767400503158569, "learning_rate": 2.686356419755963e-05, "loss": 0.3771, "step": 11266, "teacher_loss": 0.3438029885292053 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.2951080799102783, "learning_rate": 2.6862174193261457e-05, "loss": 0.3589, "step": 11267, "teacher_loss": 0.3660429120063782 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.5776809453964233, "learning_rate": 2.6860783916999196e-05, "loss": 0.2764, "step": 11268, "teacher_loss": 0.2429770529270172 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.5957028865814209, "learning_rate": 2.6859393368804718e-05, "loss": 0.3518, "step": 11269, "teacher_loss": 0.3247438073158264 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.23803962767124176, "learning_rate": 2.6858002548709903e-05, "loss": 0.2108, "step": 11270, "teacher_loss": 0.20778590440750122 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.629150390625, "learning_rate": 2.6856611456746647e-05, "loss": 0.285, "step": 11271, "teacher_loss": 0.24671012163162231 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.3847843110561371, "learning_rate": 2.6855220092946834e-05, "loss": 0.2207, "step": 11272, "teacher_loss": 0.20245277881622314 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.5425306558609009, "learning_rate": 2.6853828457342372e-05, "loss": 0.3445, "step": 11273, "teacher_loss": 0.32247036695480347 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.3355640172958374, "learning_rate": 2.685243654996516e-05, "loss": 0.2184, "step": 11274, "teacher_loss": 0.20534522831439972 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.31646233797073364, "learning_rate": 2.6851044370847116e-05, "loss": 0.2175, "step": 11275, "teacher_loss": 0.20650577545166016 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.23984137177467346, "learning_rate": 2.6849651920020154e-05, "loss": 0.1868, "step": 11276, "teacher_loss": 0.18094469606876373 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.2742348313331604, "learning_rate": 2.6848259197516197e-05, "loss": 0.2198, "step": 11277, "teacher_loss": 0.21380004286766052 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.4496724605560303, "learning_rate": 2.6846866203367185e-05, "loss": 0.2501, "step": 11278, "teacher_loss": 0.22789807617664337 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.4974231719970703, "learning_rate": 2.684547293760505e-05, "loss": 0.2596, "step": 11279, "teacher_loss": 0.23316732048988342 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.7012125253677368, "learning_rate": 2.6844079400261735e-05, "loss": 0.354, "step": 11280, "teacher_loss": 0.31540751457214355 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.34276098012924194, "learning_rate": 2.6842685591369187e-05, "loss": 0.1788, "step": 11281, "teacher_loss": 0.16059181094169617 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.6426563262939453, "learning_rate": 2.6841291510959363e-05, "loss": 0.4463, "step": 11282, "teacher_loss": 0.42451998591423035 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.4309924244880676, "learning_rate": 2.6839897159064228e-05, "loss": 0.2688, "step": 11283, "teacher_loss": 0.25075024366378784 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.11389905214309692, "learning_rate": 2.683850253571575e-05, "loss": 0.1805, "step": 11284, "teacher_loss": 0.1879309117794037 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.8952006101608276, "learning_rate": 2.6837107640945904e-05, "loss": 0.2915, "step": 11285, "teacher_loss": 0.22444184124469757 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 1.0161534547805786, "learning_rate": 2.6835712474786667e-05, "loss": 0.9861, "step": 11286, "teacher_loss": 0.9827646613121033 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.4063742756843567, "learning_rate": 2.6834317037270034e-05, "loss": 0.371, "step": 11287, "teacher_loss": 0.36708134412765503 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.3163510859012604, "learning_rate": 2.6832921328427985e-05, "loss": 0.3928, "step": 11288, "teacher_loss": 0.4013279676437378 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.22254332900047302, "learning_rate": 2.6831525348292532e-05, "loss": 0.2131, "step": 11289, "teacher_loss": 0.2120695412158966 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.3451879620552063, "learning_rate": 2.6830129096895672e-05, "loss": 0.3024, "step": 11290, "teacher_loss": 0.29770058393478394 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.6977245807647705, "learning_rate": 2.682873257426942e-05, "loss": 0.2454, "step": 11291, "teacher_loss": 0.19514703750610352 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.25241440534591675, "learning_rate": 2.68273357804458e-05, "loss": 0.2084, "step": 11292, "teacher_loss": 0.2035626471042633 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.7038175463676453, "learning_rate": 2.6825938715456825e-05, "loss": 0.4133, "step": 11293, "teacher_loss": 0.38106220960617065 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.6573154926300049, "learning_rate": 2.682454137933453e-05, "loss": 0.3725, "step": 11294, "teacher_loss": 0.34083160758018494 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.29671037197113037, "learning_rate": 2.6823143772110957e-05, "loss": 0.2429, "step": 11295, "teacher_loss": 0.23690149188041687 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 1.121340274810791, "learning_rate": 2.6821745893818145e-05, "loss": 0.4853, "step": 11296, "teacher_loss": 0.414655864238739 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.6127495765686035, "learning_rate": 2.6820347744488143e-05, "loss": 0.3295, "step": 11297, "teacher_loss": 0.29807665944099426 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.2527361214160919, "learning_rate": 2.6818949324153003e-05, "loss": 0.145, "step": 11298, "teacher_loss": 0.13300520181655884 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.598783016204834, "learning_rate": 2.6817550632844792e-05, "loss": 0.3626, "step": 11299, "teacher_loss": 0.3363194465637207 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.2856517434120178, "learning_rate": 2.6816151670595576e-05, "loss": 0.2749, "step": 11300, "teacher_loss": 0.27373206615448 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.19810280203819275, "learning_rate": 2.6814752437437428e-05, "loss": 0.2439, "step": 11301, "teacher_loss": 0.2490319311618805 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.20336779952049255, "learning_rate": 2.6813352933402432e-05, "loss": 0.1824, "step": 11302, "teacher_loss": 0.18003855645656586 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.43892931938171387, "learning_rate": 2.6811953158522668e-05, "loss": 0.2904, "step": 11303, "teacher_loss": 0.2739505171775818 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.5004748702049255, "learning_rate": 2.6810553112830235e-05, "loss": 0.2635, "step": 11304, "teacher_loss": 0.23719993233680725 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.4722036123275757, "learning_rate": 2.6809152796357225e-05, "loss": 0.2712, "step": 11305, "teacher_loss": 0.24886423349380493 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.4347146153450012, "learning_rate": 2.680775220913575e-05, "loss": 0.1817, "step": 11306, "teacher_loss": 0.153592050075531 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.8150264620780945, "learning_rate": 2.6806351351197923e-05, "loss": 0.3914, "step": 11307, "teacher_loss": 0.34438472986221313 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.2456343173980713, "learning_rate": 2.680495022257585e-05, "loss": 0.1766, "step": 11308, "teacher_loss": 0.1689676195383072 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.3890421390533447, "learning_rate": 2.6803548823301666e-05, "loss": 0.2877, "step": 11309, "teacher_loss": 0.27645498514175415 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.7891196012496948, "learning_rate": 2.6802147153407493e-05, "loss": 0.3037, "step": 11310, "teacher_loss": 0.24971838295459747 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.25211894512176514, "learning_rate": 2.680074521292547e-05, "loss": 0.2279, "step": 11311, "teacher_loss": 0.22523453831672668 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.9736194014549255, "learning_rate": 2.679934300188774e-05, "loss": 0.4806, "step": 11312, "teacher_loss": 0.4257797300815582 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.71315997838974, "learning_rate": 2.6797940520326453e-05, "loss": 0.4972, "step": 11313, "teacher_loss": 0.47322559356689453 }, { "compression_loss": 0.0, "epoch": 2.04, "label_loss": 0.5923274755477905, "learning_rate": 2.679653776827376e-05, "loss": 0.2821, "step": 11314, "teacher_loss": 0.2476511150598526 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.7088020443916321, "learning_rate": 2.679513474576183e-05, "loss": 0.3402, "step": 11315, "teacher_loss": 0.2992664575576782 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.8937879800796509, "learning_rate": 2.679373145282282e-05, "loss": 0.3111, "step": 11316, "teacher_loss": 0.2463269829750061 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.7397705316543579, "learning_rate": 2.67923278894889e-05, "loss": 0.2984, "step": 11317, "teacher_loss": 0.24934226274490356 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.4082082509994507, "learning_rate": 2.6790924055792265e-05, "loss": 0.2565, "step": 11318, "teacher_loss": 0.23964270949363708 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.34883540868759155, "learning_rate": 2.6789519951765092e-05, "loss": 0.2141, "step": 11319, "teacher_loss": 0.19908207654953003 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.4200015664100647, "learning_rate": 2.678811557743957e-05, "loss": 0.2762, "step": 11320, "teacher_loss": 0.26026713848114014 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.31462085247039795, "learning_rate": 2.6786710932847905e-05, "loss": 0.232, "step": 11321, "teacher_loss": 0.2228488326072693 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.6154340505599976, "learning_rate": 2.6785306018022292e-05, "loss": 0.3919, "step": 11322, "teacher_loss": 0.36701780557632446 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.32675957679748535, "learning_rate": 2.6783900832994948e-05, "loss": 0.1918, "step": 11323, "teacher_loss": 0.17679722607135773 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.4512156546115875, "learning_rate": 2.6782495377798087e-05, "loss": 0.2337, "step": 11324, "teacher_loss": 0.2095463126897812 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.7122188806533813, "learning_rate": 2.6781089652463936e-05, "loss": 0.2859, "step": 11325, "teacher_loss": 0.2385624200105667 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.5092610120773315, "learning_rate": 2.6779683657024718e-05, "loss": 0.2218, "step": 11326, "teacher_loss": 0.189883291721344 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.6519548296928406, "learning_rate": 2.677827739151267e-05, "loss": 0.2497, "step": 11327, "teacher_loss": 0.2050163894891739 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.7390754222869873, "learning_rate": 2.6776870855960033e-05, "loss": 0.3189, "step": 11328, "teacher_loss": 0.2721847891807556 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.17489303648471832, "learning_rate": 2.6775464050399063e-05, "loss": 0.1754, "step": 11329, "teacher_loss": 0.17546510696411133 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.5513505339622498, "learning_rate": 2.6774056974862e-05, "loss": 0.3001, "step": 11330, "teacher_loss": 0.2721668779850006 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.2625054717063904, "learning_rate": 2.677264962938112e-05, "loss": 0.2391, "step": 11331, "teacher_loss": 0.23654712736606598 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.4441652297973633, "learning_rate": 2.677124201398867e-05, "loss": 0.2379, "step": 11332, "teacher_loss": 0.21501559019088745 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.5385757088661194, "learning_rate": 2.6769834128716938e-05, "loss": 0.3488, "step": 11333, "teacher_loss": 0.3276699483394623 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.5285783410072327, "learning_rate": 2.6768425973598194e-05, "loss": 0.244, "step": 11334, "teacher_loss": 0.21239647269248962 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.264853298664093, "learning_rate": 2.676701754866473e-05, "loss": 0.2274, "step": 11335, "teacher_loss": 0.22323128581047058 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.3245318531990051, "learning_rate": 2.676560885394883e-05, "loss": 0.2723, "step": 11336, "teacher_loss": 0.2664865255355835 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.6064648628234863, "learning_rate": 2.6764199889482797e-05, "loss": 0.3021, "step": 11337, "teacher_loss": 0.26824620366096497 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.481045126914978, "learning_rate": 2.676279065529893e-05, "loss": 0.2274, "step": 11338, "teacher_loss": 0.199247807264328 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.33887118101119995, "learning_rate": 2.676138115142954e-05, "loss": 0.2772, "step": 11339, "teacher_loss": 0.27035829424858093 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.33583134412765503, "learning_rate": 2.6759971377906943e-05, "loss": 0.1456, "step": 11340, "teacher_loss": 0.12443230301141739 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.19143471121788025, "learning_rate": 2.6758561334763455e-05, "loss": 0.1773, "step": 11341, "teacher_loss": 0.17567458748817444 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.6370750069618225, "learning_rate": 2.675715102203142e-05, "loss": 0.4479, "step": 11342, "teacher_loss": 0.4269050359725952 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.3071538805961609, "learning_rate": 2.6755740439743153e-05, "loss": 0.2386, "step": 11343, "teacher_loss": 0.23096255958080292 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.4049166738986969, "learning_rate": 2.6754329587931005e-05, "loss": 0.1745, "step": 11344, "teacher_loss": 0.14885716140270233 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.6913039088249207, "learning_rate": 2.6752918466627325e-05, "loss": 0.2659, "step": 11345, "teacher_loss": 0.2186136245727539 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.7287171483039856, "learning_rate": 2.675150707586446e-05, "loss": 0.2327, "step": 11346, "teacher_loss": 0.17760223150253296 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.1814676821231842, "learning_rate": 2.6750095415674768e-05, "loss": 0.2724, "step": 11347, "teacher_loss": 0.28253573179244995 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.5242919921875, "learning_rate": 2.6748683486090616e-05, "loss": 0.3449, "step": 11348, "teacher_loss": 0.3250080943107605 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.540088951587677, "learning_rate": 2.674727128714438e-05, "loss": 0.5124, "step": 11349, "teacher_loss": 0.5093181133270264 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.3999256491661072, "learning_rate": 2.6745858818868434e-05, "loss": 0.2776, "step": 11350, "teacher_loss": 0.2640034854412079 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.5296216011047363, "learning_rate": 2.674444608129516e-05, "loss": 0.3481, "step": 11351, "teacher_loss": 0.3279725909233093 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.7622315287590027, "learning_rate": 2.6743033074456945e-05, "loss": 0.3889, "step": 11352, "teacher_loss": 0.3474624752998352 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.30449968576431274, "learning_rate": 2.6741619798386195e-05, "loss": 0.2967, "step": 11353, "teacher_loss": 0.2958603501319885 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.4423085153102875, "learning_rate": 2.67402062531153e-05, "loss": 0.2303, "step": 11354, "teacher_loss": 0.20670422911643982 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.2931573987007141, "learning_rate": 2.673879243867668e-05, "loss": 0.2141, "step": 11355, "teacher_loss": 0.20527726411819458 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.42857998609542847, "learning_rate": 2.6737378355102743e-05, "loss": 0.2137, "step": 11356, "teacher_loss": 0.18977797031402588 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 1.5221748352050781, "learning_rate": 2.673596400242591e-05, "loss": 0.4595, "step": 11357, "teacher_loss": 0.3414172828197479 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.3822225332260132, "learning_rate": 2.6734549380678606e-05, "loss": 0.2773, "step": 11358, "teacher_loss": 0.2656382918357849 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.43989866971969604, "learning_rate": 2.6733134489893268e-05, "loss": 0.1877, "step": 11359, "teacher_loss": 0.15964946150779724 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.4692853093147278, "learning_rate": 2.673171933010234e-05, "loss": 0.2678, "step": 11360, "teacher_loss": 0.24543660879135132 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.4771975874900818, "learning_rate": 2.6730303901338254e-05, "loss": 0.2474, "step": 11361, "teacher_loss": 0.22184711694717407 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.41034796833992004, "learning_rate": 2.672888820363347e-05, "loss": 0.2598, "step": 11362, "teacher_loss": 0.24309095740318298 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.2897535562515259, "learning_rate": 2.672747223702045e-05, "loss": 0.1623, "step": 11363, "teacher_loss": 0.14814062416553497 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.5135263204574585, "learning_rate": 2.6726056001531647e-05, "loss": 0.2283, "step": 11364, "teacher_loss": 0.1965990662574768 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.24716857075691223, "learning_rate": 2.6724639497199536e-05, "loss": 0.2304, "step": 11365, "teacher_loss": 0.2285584658384323 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.3011236786842346, "learning_rate": 2.6723222724056596e-05, "loss": 0.1785, "step": 11366, "teacher_loss": 0.1648693084716797 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.5832270383834839, "learning_rate": 2.6721805682135306e-05, "loss": 0.5169, "step": 11367, "teacher_loss": 0.5095845460891724 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.6850972771644592, "learning_rate": 2.6720388371468155e-05, "loss": 0.3605, "step": 11368, "teacher_loss": 0.3244236707687378 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.47433677315711975, "learning_rate": 2.6718970792087642e-05, "loss": 0.2044, "step": 11369, "teacher_loss": 0.1743718385696411 }, { "compression_loss": 0.0, "epoch": 2.05, "label_loss": 0.80791836977005, "learning_rate": 2.6717552944026258e-05, "loss": 0.4262, "step": 11370, "teacher_loss": 0.38377946615219116 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.7666103839874268, "learning_rate": 2.671613482731652e-05, "loss": 0.8251, "step": 11371, "teacher_loss": 0.8316085934638977 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.5949872732162476, "learning_rate": 2.6714716441990937e-05, "loss": 0.2502, "step": 11372, "teacher_loss": 0.21190816164016724 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.3114638924598694, "learning_rate": 2.6713297788082025e-05, "loss": 0.2185, "step": 11373, "teacher_loss": 0.2082090973854065 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.31177061796188354, "learning_rate": 2.671187886562232e-05, "loss": 0.3025, "step": 11374, "teacher_loss": 0.30150407552719116 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.4346925616264343, "learning_rate": 2.671045967464434e-05, "loss": 0.3049, "step": 11375, "teacher_loss": 0.2905040383338928 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.4796760678291321, "learning_rate": 2.6709040215180633e-05, "loss": 0.2361, "step": 11376, "teacher_loss": 0.20907297730445862 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.4441388249397278, "learning_rate": 2.670762048726374e-05, "loss": 0.4339, "step": 11377, "teacher_loss": 0.4327280819416046 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 1.0570833683013916, "learning_rate": 2.6706200490926204e-05, "loss": 0.7004, "step": 11378, "teacher_loss": 0.6607751846313477 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.3764675557613373, "learning_rate": 2.6704780226200593e-05, "loss": 0.2676, "step": 11379, "teacher_loss": 0.25549939274787903 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.4097643494606018, "learning_rate": 2.6703359693119468e-05, "loss": 0.2764, "step": 11380, "teacher_loss": 0.26159554719924927 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.17159897089004517, "learning_rate": 2.6701938891715385e-05, "loss": 0.2116, "step": 11381, "teacher_loss": 0.2160591036081314 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.8258722424507141, "learning_rate": 2.6700517822020934e-05, "loss": 0.3238, "step": 11382, "teacher_loss": 0.2680235207080841 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.3728242516517639, "learning_rate": 2.6699096484068686e-05, "loss": 0.2009, "step": 11383, "teacher_loss": 0.18182902038097382 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.8285374641418457, "learning_rate": 2.6697674877891234e-05, "loss": 0.4015, "step": 11384, "teacher_loss": 0.3540651798248291 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.6131064891815186, "learning_rate": 2.6696253003521165e-05, "loss": 0.2731, "step": 11385, "teacher_loss": 0.23526602983474731 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.11068959534168243, "learning_rate": 2.6694830860991087e-05, "loss": 0.2356, "step": 11386, "teacher_loss": 0.24946291744709015 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.6774469017982483, "learning_rate": 2.6693408450333594e-05, "loss": 0.2876, "step": 11387, "teacher_loss": 0.2442491352558136 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.6583279371261597, "learning_rate": 2.669198577158131e-05, "loss": 0.2307, "step": 11388, "teacher_loss": 0.1831304132938385 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.2326776087284088, "learning_rate": 2.669056282476684e-05, "loss": 0.1644, "step": 11389, "teacher_loss": 0.1568393111228943 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.40882301330566406, "learning_rate": 2.668913960992282e-05, "loss": 0.3717, "step": 11390, "teacher_loss": 0.3675907850265503 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.16709105670452118, "learning_rate": 2.6687716127081873e-05, "loss": 0.2232, "step": 11391, "teacher_loss": 0.22937899827957153 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.6514356136322021, "learning_rate": 2.6686292376276637e-05, "loss": 0.2949, "step": 11392, "teacher_loss": 0.2552560269832611 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 1.2707115411758423, "learning_rate": 2.6684868357539754e-05, "loss": 0.6124, "step": 11393, "teacher_loss": 0.5392736792564392 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.5533962249755859, "learning_rate": 2.6683444070903867e-05, "loss": 0.3121, "step": 11394, "teacher_loss": 0.2852592468261719 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.2995579242706299, "learning_rate": 2.6682019516401644e-05, "loss": 0.2554, "step": 11395, "teacher_loss": 0.250477135181427 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.4474751949310303, "learning_rate": 2.668059469406574e-05, "loss": 0.2759, "step": 11396, "teacher_loss": 0.2568162977695465 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.4892094135284424, "learning_rate": 2.667916960392881e-05, "loss": 0.4394, "step": 11397, "teacher_loss": 0.4338518977165222 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.416218101978302, "learning_rate": 2.6677744246023543e-05, "loss": 0.2643, "step": 11398, "teacher_loss": 0.24744123220443726 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.5471934080123901, "learning_rate": 2.667631862038261e-05, "loss": 0.2875, "step": 11399, "teacher_loss": 0.2586797773838043 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.23702137172222137, "learning_rate": 2.6674892727038705e-05, "loss": 0.1887, "step": 11400, "teacher_loss": 0.1832950860261917 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.5204581022262573, "learning_rate": 2.6673466566024507e-05, "loss": 0.2367, "step": 11401, "teacher_loss": 0.20518356561660767 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 1.1431727409362793, "learning_rate": 2.667204013737272e-05, "loss": 0.5509, "step": 11402, "teacher_loss": 0.48510515689849854 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.48504188656806946, "learning_rate": 2.6670613441116044e-05, "loss": 0.2753, "step": 11403, "teacher_loss": 0.25195983052253723 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.29413798451423645, "learning_rate": 2.66691864772872e-05, "loss": 0.2595, "step": 11404, "teacher_loss": 0.2556458115577698 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.18042083084583282, "learning_rate": 2.666775924591889e-05, "loss": 0.2159, "step": 11405, "teacher_loss": 0.2198476791381836 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.753820538520813, "learning_rate": 2.6666331747043842e-05, "loss": 0.3158, "step": 11406, "teacher_loss": 0.26708540320396423 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.4868769645690918, "learning_rate": 2.6664903980694788e-05, "loss": 0.4509, "step": 11407, "teacher_loss": 0.4469580352306366 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.4388984143733978, "learning_rate": 2.6663475946904455e-05, "loss": 0.3446, "step": 11408, "teacher_loss": 0.334101140499115 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.5309861898422241, "learning_rate": 2.6662047645705594e-05, "loss": 0.2445, "step": 11409, "teacher_loss": 0.21266715228557587 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.3994253873825073, "learning_rate": 2.666061907713094e-05, "loss": 0.1733, "step": 11410, "teacher_loss": 0.14822141826152802 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.5392282009124756, "learning_rate": 2.665919024121325e-05, "loss": 0.2879, "step": 11411, "teacher_loss": 0.2599894106388092 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.40017589926719666, "learning_rate": 2.665776113798529e-05, "loss": 0.2138, "step": 11412, "teacher_loss": 0.19310961663722992 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.2381407469511032, "learning_rate": 2.6656331767479812e-05, "loss": 0.2444, "step": 11413, "teacher_loss": 0.24511444568634033 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.506705641746521, "learning_rate": 2.6654902129729598e-05, "loss": 0.244, "step": 11414, "teacher_loss": 0.21480131149291992 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.20453046262264252, "learning_rate": 2.6653472224767418e-05, "loss": 0.2079, "step": 11415, "teacher_loss": 0.20824390649795532 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.4179179072380066, "learning_rate": 2.6652042052626065e-05, "loss": 0.1925, "step": 11416, "teacher_loss": 0.16740979254245758 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.4616493880748749, "learning_rate": 2.6650611613338314e-05, "loss": 0.2718, "step": 11417, "teacher_loss": 0.2506926655769348 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.715360164642334, "learning_rate": 2.6649180906936975e-05, "loss": 0.3145, "step": 11418, "teacher_loss": 0.27000635862350464 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.25587835907936096, "learning_rate": 2.664774993345484e-05, "loss": 0.2004, "step": 11419, "teacher_loss": 0.19425319135189056 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.34409594535827637, "learning_rate": 2.6646318692924727e-05, "loss": 0.2402, "step": 11420, "teacher_loss": 0.22863470017910004 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.21863621473312378, "learning_rate": 2.664488718537944e-05, "loss": 0.214, "step": 11421, "teacher_loss": 0.21350552141666412 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.6358727216720581, "learning_rate": 2.66434554108518e-05, "loss": 0.3149, "step": 11422, "teacher_loss": 0.27927178144454956 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.7564515471458435, "learning_rate": 2.664202336937464e-05, "loss": 0.5201, "step": 11423, "teacher_loss": 0.4938763976097107 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.5134033560752869, "learning_rate": 2.664059106098079e-05, "loss": 0.2817, "step": 11424, "teacher_loss": 0.2559245228767395 }, { "compression_loss": 0.0, "epoch": 2.06, "label_loss": 0.9037539958953857, "learning_rate": 2.6639158485703087e-05, "loss": 0.3353, "step": 11425, "teacher_loss": 0.2721256911754608 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.7813578844070435, "learning_rate": 2.6637725643574372e-05, "loss": 0.3245, "step": 11426, "teacher_loss": 0.27375704050064087 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.5056071877479553, "learning_rate": 2.66362925346275e-05, "loss": 0.2533, "step": 11427, "teacher_loss": 0.22531168162822723 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.6999883055686951, "learning_rate": 2.663485915889533e-05, "loss": 0.3655, "step": 11428, "teacher_loss": 0.32831957936286926 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.9246419072151184, "learning_rate": 2.6633425516410723e-05, "loss": 0.3743, "step": 11429, "teacher_loss": 0.3130955100059509 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.2807331383228302, "learning_rate": 2.6631991607206546e-05, "loss": 0.2642, "step": 11430, "teacher_loss": 0.2623119354248047 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.34489190578460693, "learning_rate": 2.663055743131568e-05, "loss": 0.2422, "step": 11431, "teacher_loss": 0.23082560300827026 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.807802677154541, "learning_rate": 2.6629122988770994e-05, "loss": 0.3389, "step": 11432, "teacher_loss": 0.2867452800273895 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.7090919613838196, "learning_rate": 2.662768827960539e-05, "loss": 0.344, "step": 11433, "teacher_loss": 0.30344754457473755 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.5698674917221069, "learning_rate": 2.6626253303851753e-05, "loss": 0.3525, "step": 11434, "teacher_loss": 0.328380823135376 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.7092264294624329, "learning_rate": 2.6624818061542987e-05, "loss": 0.5226, "step": 11435, "teacher_loss": 0.5018178224563599 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.4048399329185486, "learning_rate": 2.6623382552711994e-05, "loss": 0.331, "step": 11436, "teacher_loss": 0.322791188955307 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.24470043182373047, "learning_rate": 2.6621946777391693e-05, "loss": 0.2157, "step": 11437, "teacher_loss": 0.21246306598186493 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.3553011417388916, "learning_rate": 2.662051073561499e-05, "loss": 0.2741, "step": 11438, "teacher_loss": 0.26510071754455566 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.21672523021697998, "learning_rate": 2.6619074427414817e-05, "loss": 0.2586, "step": 11439, "teacher_loss": 0.26324760913848877 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.5286556482315063, "learning_rate": 2.6617637852824107e-05, "loss": 0.3539, "step": 11440, "teacher_loss": 0.33448976278305054 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.3425612449645996, "learning_rate": 2.6616201011875792e-05, "loss": 0.2759, "step": 11441, "teacher_loss": 0.2684401869773865 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.34516701102256775, "learning_rate": 2.6614763904602812e-05, "loss": 0.2883, "step": 11442, "teacher_loss": 0.2819909453392029 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.469087153673172, "learning_rate": 2.661332653103812e-05, "loss": 0.301, "step": 11443, "teacher_loss": 0.2823309600353241 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.15045467019081116, "learning_rate": 2.661188889121467e-05, "loss": 0.1932, "step": 11444, "teacher_loss": 0.19799047708511353 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.32715171575546265, "learning_rate": 2.661045098516542e-05, "loss": 0.2446, "step": 11445, "teacher_loss": 0.23545153439044952 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.4435218274593353, "learning_rate": 2.660901281292334e-05, "loss": 0.2106, "step": 11446, "teacher_loss": 0.18474188446998596 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.5281405448913574, "learning_rate": 2.66075743745214e-05, "loss": 0.2609, "step": 11447, "teacher_loss": 0.23123279213905334 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.3339434266090393, "learning_rate": 2.6606135669992583e-05, "loss": 0.2472, "step": 11448, "teacher_loss": 0.23753131926059723 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.3242594003677368, "learning_rate": 2.6604696699369872e-05, "loss": 0.2995, "step": 11449, "teacher_loss": 0.29677414894104004 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.5262978076934814, "learning_rate": 2.6603257462686258e-05, "loss": 0.2556, "step": 11450, "teacher_loss": 0.22556047141551971 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.22574535012245178, "learning_rate": 2.660181795997474e-05, "loss": 0.1648, "step": 11451, "teacher_loss": 0.15807875990867615 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.46097689867019653, "learning_rate": 2.6600378191268317e-05, "loss": 0.3554, "step": 11452, "teacher_loss": 0.34369906783103943 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.4093618392944336, "learning_rate": 2.6598938156600005e-05, "loss": 0.2279, "step": 11453, "teacher_loss": 0.2077624648809433 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.8208404779434204, "learning_rate": 2.6597497856002815e-05, "loss": 0.5395, "step": 11454, "teacher_loss": 0.5082308053970337 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.4899601340293884, "learning_rate": 2.6596057289509773e-05, "loss": 0.2511, "step": 11455, "teacher_loss": 0.22460442781448364 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.597091794013977, "learning_rate": 2.65946164571539e-05, "loss": 0.3034, "step": 11456, "teacher_loss": 0.2707829475402832 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.8050843477249146, "learning_rate": 2.6593175358968236e-05, "loss": 0.2514, "step": 11457, "teacher_loss": 0.18992206454277039 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.31587517261505127, "learning_rate": 2.659173399498582e-05, "loss": 0.18, "step": 11458, "teacher_loss": 0.16488569974899292 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.2840992510318756, "learning_rate": 2.6590292365239695e-05, "loss": 0.2673, "step": 11459, "teacher_loss": 0.26540112495422363 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 1.3859256505966187, "learning_rate": 2.6588850469762916e-05, "loss": 0.3554, "step": 11460, "teacher_loss": 0.24092447757720947 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.426033079624176, "learning_rate": 2.6587408308588544e-05, "loss": 0.2885, "step": 11461, "teacher_loss": 0.2732661962509155 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.3164922595024109, "learning_rate": 2.658596588174964e-05, "loss": 0.1829, "step": 11462, "teacher_loss": 0.16805408895015717 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.2843630909919739, "learning_rate": 2.6584523189279272e-05, "loss": 0.2109, "step": 11463, "teacher_loss": 0.20278067886829376 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.3334880471229553, "learning_rate": 2.658308023121052e-05, "loss": 0.249, "step": 11464, "teacher_loss": 0.23963509500026703 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.16369962692260742, "learning_rate": 2.6581637007576463e-05, "loss": 0.2105, "step": 11465, "teacher_loss": 0.21565020084381104 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.35381096601486206, "learning_rate": 2.65801935184102e-05, "loss": 0.2575, "step": 11466, "teacher_loss": 0.24675695598125458 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.49458664655685425, "learning_rate": 2.6578749763744815e-05, "loss": 0.2436, "step": 11467, "teacher_loss": 0.2157311737537384 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.7266108989715576, "learning_rate": 2.657730574361341e-05, "loss": 0.2407, "step": 11468, "teacher_loss": 0.18672975897789001 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.1076120063662529, "learning_rate": 2.65758614580491e-05, "loss": 0.1619, "step": 11469, "teacher_loss": 0.16797390580177307 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.45156431198120117, "learning_rate": 2.6574416907084993e-05, "loss": 0.2822, "step": 11470, "teacher_loss": 0.2633805274963379 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.769243061542511, "learning_rate": 2.6572972090754205e-05, "loss": 0.3033, "step": 11471, "teacher_loss": 0.25148072838783264 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.39401692152023315, "learning_rate": 2.6571527009089868e-05, "loss": 0.3032, "step": 11472, "teacher_loss": 0.2931518852710724 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.3975682258605957, "learning_rate": 2.657008166212511e-05, "loss": 0.2405, "step": 11473, "teacher_loss": 0.2231028825044632 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.2454376369714737, "learning_rate": 2.656863604989306e-05, "loss": 0.2289, "step": 11474, "teacher_loss": 0.22706955671310425 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.39082401990890503, "learning_rate": 2.656719017242688e-05, "loss": 0.2255, "step": 11475, "teacher_loss": 0.207082137465477 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.36211955547332764, "learning_rate": 2.6565744029759702e-05, "loss": 0.3358, "step": 11476, "teacher_loss": 0.33288872241973877 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.31308960914611816, "learning_rate": 2.6564297621924696e-05, "loss": 0.3274, "step": 11477, "teacher_loss": 0.3289948105812073 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 1.1792408227920532, "learning_rate": 2.656285094895501e-05, "loss": 0.4596, "step": 11478, "teacher_loss": 0.3796197175979614 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.513270378112793, "learning_rate": 2.656140401088383e-05, "loss": 0.2464, "step": 11479, "teacher_loss": 0.21678464114665985 }, { "compression_loss": 0.0, "epoch": 2.07, "label_loss": 0.3803479075431824, "learning_rate": 2.655995680774431e-05, "loss": 0.2962, "step": 11480, "teacher_loss": 0.28682851791381836 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.3974624276161194, "learning_rate": 2.6558509339569638e-05, "loss": 0.1999, "step": 11481, "teacher_loss": 0.1779431849718094 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.11776053160429001, "learning_rate": 2.6557061606393008e-05, "loss": 0.202, "step": 11482, "teacher_loss": 0.21130812168121338 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.218589186668396, "learning_rate": 2.65556136082476e-05, "loss": 0.171, "step": 11483, "teacher_loss": 0.1656721830368042 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.5219054222106934, "learning_rate": 2.655416534516662e-05, "loss": 0.2548, "step": 11484, "teacher_loss": 0.22517472505569458 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.778640627861023, "learning_rate": 2.6552716817183263e-05, "loss": 0.3712, "step": 11485, "teacher_loss": 0.3259029984474182 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.3549985885620117, "learning_rate": 2.6551268024330754e-05, "loss": 0.224, "step": 11486, "teacher_loss": 0.20948973298072815 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.5217106938362122, "learning_rate": 2.6549818966642297e-05, "loss": 0.3921, "step": 11487, "teacher_loss": 0.37764373421669006 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.27615150809288025, "learning_rate": 2.654836964415112e-05, "loss": 0.2988, "step": 11488, "teacher_loss": 0.3013116121292114 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.7139608263969421, "learning_rate": 2.6546920056890456e-05, "loss": 0.2958, "step": 11489, "teacher_loss": 0.2493637204170227 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.12833325564861298, "learning_rate": 2.654547020489353e-05, "loss": 0.2583, "step": 11490, "teacher_loss": 0.27271461486816406 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.3258852958679199, "learning_rate": 2.654402008819359e-05, "loss": 0.3044, "step": 11491, "teacher_loss": 0.3019777536392212 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.3171885907649994, "learning_rate": 2.654256970682388e-05, "loss": 0.234, "step": 11492, "teacher_loss": 0.22480100393295288 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.48541080951690674, "learning_rate": 2.654111906081765e-05, "loss": 0.2637, "step": 11493, "teacher_loss": 0.2390764355659485 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.47897306084632874, "learning_rate": 2.6539668150208163e-05, "loss": 0.192, "step": 11494, "teacher_loss": 0.16007855534553528 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.7920082211494446, "learning_rate": 2.6538216975028685e-05, "loss": 0.3755, "step": 11495, "teacher_loss": 0.3292248845100403 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.48282814025878906, "learning_rate": 2.6536765535312484e-05, "loss": 0.334, "step": 11496, "teacher_loss": 0.31745779514312744 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.5188870429992676, "learning_rate": 2.6535313831092836e-05, "loss": 0.2768, "step": 11497, "teacher_loss": 0.24995186924934387 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.31040865182876587, "learning_rate": 2.6533861862403028e-05, "loss": 0.2305, "step": 11498, "teacher_loss": 0.22160229086875916 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.5093654990196228, "learning_rate": 2.653240962927635e-05, "loss": 0.2656, "step": 11499, "teacher_loss": 0.23854956030845642 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.30939435958862305, "learning_rate": 2.6530957131746095e-05, "loss": 0.3213, "step": 11500, "teacher_loss": 0.32258930802345276 }, { "epoch": 2.08, "eval_exact_match": 79.49858088930937, "eval_f1": 86.98967637166847, "step": 11500 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.9235019683837891, "learning_rate": 2.652950436984556e-05, "loss": 0.3569, "step": 11501, "teacher_loss": 0.2939265966415405 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.21222499012947083, "learning_rate": 2.6528051343608063e-05, "loss": 0.1878, "step": 11502, "teacher_loss": 0.18513526022434235 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.8145816326141357, "learning_rate": 2.6526598053066904e-05, "loss": 0.4081, "step": 11503, "teacher_loss": 0.36292803287506104 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.5495126247406006, "learning_rate": 2.6525144498255417e-05, "loss": 0.2945, "step": 11504, "teacher_loss": 0.266146719455719 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.4471229016780853, "learning_rate": 2.6523690679206922e-05, "loss": 0.2837, "step": 11505, "teacher_loss": 0.2655555009841919 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.3105457127094269, "learning_rate": 2.6522236595954747e-05, "loss": 0.2031, "step": 11506, "teacher_loss": 0.19117000699043274 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.3489554226398468, "learning_rate": 2.6520782248532226e-05, "loss": 0.1961, "step": 11507, "teacher_loss": 0.17906615138053894 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.23212981224060059, "learning_rate": 2.6519327636972716e-05, "loss": 0.2283, "step": 11508, "teacher_loss": 0.22782379388809204 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.8445788621902466, "learning_rate": 2.651787276130956e-05, "loss": 0.3827, "step": 11509, "teacher_loss": 0.3313617706298828 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.31710678339004517, "learning_rate": 2.6516417621576113e-05, "loss": 0.2812, "step": 11510, "teacher_loss": 0.27719205617904663 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.7960541844367981, "learning_rate": 2.651496221780574e-05, "loss": 0.6575, "step": 11511, "teacher_loss": 0.6421504020690918 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.4644680321216583, "learning_rate": 2.65135065500318e-05, "loss": 0.1865, "step": 11512, "teacher_loss": 0.15564100444316864 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.7393025159835815, "learning_rate": 2.651205061828768e-05, "loss": 0.378, "step": 11513, "teacher_loss": 0.33785346150398254 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.267619788646698, "learning_rate": 2.6510594422606757e-05, "loss": 0.2544, "step": 11514, "teacher_loss": 0.2529555559158325 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.5073104500770569, "learning_rate": 2.6509137963022408e-05, "loss": 0.22, "step": 11515, "teacher_loss": 0.18805575370788574 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.327262282371521, "learning_rate": 2.6507681239568034e-05, "loss": 0.2168, "step": 11516, "teacher_loss": 0.2044813632965088 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.4161885976791382, "learning_rate": 2.650622425227703e-05, "loss": 0.3237, "step": 11517, "teacher_loss": 0.3133743405342102 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.2961266040802002, "learning_rate": 2.6504767001182807e-05, "loss": 0.2798, "step": 11518, "teacher_loss": 0.2780037820339203 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.27459368109703064, "learning_rate": 2.650330948631876e-05, "loss": 0.211, "step": 11519, "teacher_loss": 0.20397761464118958 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.44292938709259033, "learning_rate": 2.6501851707718322e-05, "loss": 0.206, "step": 11520, "teacher_loss": 0.1796289086341858 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.5749096870422363, "learning_rate": 2.6500393665414906e-05, "loss": 0.7059, "step": 11521, "teacher_loss": 0.7204955816268921 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.4794371426105499, "learning_rate": 2.649893535944195e-05, "loss": 0.3728, "step": 11522, "teacher_loss": 0.3610028028488159 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.1794937402009964, "learning_rate": 2.6497476789832873e-05, "loss": 0.2259, "step": 11523, "teacher_loss": 0.23105546832084656 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.7142660021781921, "learning_rate": 2.6496017956621126e-05, "loss": 0.5794, "step": 11524, "teacher_loss": 0.5644403696060181 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.4158456325531006, "learning_rate": 2.6494558859840157e-05, "loss": 0.2554, "step": 11525, "teacher_loss": 0.23762312531471252 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.4289650619029999, "learning_rate": 2.6493099499523416e-05, "loss": 0.2138, "step": 11526, "teacher_loss": 0.18991005420684814 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.5315886735916138, "learning_rate": 2.6491639875704358e-05, "loss": 0.2412, "step": 11527, "teacher_loss": 0.20890063047409058 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.9457077980041504, "learning_rate": 2.6490179988416453e-05, "loss": 0.2782, "step": 11528, "teacher_loss": 0.2040167599916458 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.31757789850234985, "learning_rate": 2.648871983769317e-05, "loss": 0.1751, "step": 11529, "teacher_loss": 0.1592797189950943 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.4163209795951843, "learning_rate": 2.6487259423567988e-05, "loss": 0.2582, "step": 11530, "teacher_loss": 0.2406538426876068 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.3915764093399048, "learning_rate": 2.6485798746074383e-05, "loss": 0.4032, "step": 11531, "teacher_loss": 0.4044739603996277 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 1.0775914192199707, "learning_rate": 2.648433780524586e-05, "loss": 0.2503, "step": 11532, "teacher_loss": 0.15838435292243958 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.4765702486038208, "learning_rate": 2.6482876601115892e-05, "loss": 0.3266, "step": 11533, "teacher_loss": 0.3099861741065979 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.4064400792121887, "learning_rate": 2.6481415133717996e-05, "loss": 0.2799, "step": 11534, "teacher_loss": 0.2658911943435669 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.8612875938415527, "learning_rate": 2.6479953403085668e-05, "loss": 0.4432, "step": 11535, "teacher_loss": 0.3967375159263611 }, { "compression_loss": 0.0, "epoch": 2.08, "label_loss": 0.2469785362482071, "learning_rate": 2.647849140925243e-05, "loss": 0.2348, "step": 11536, "teacher_loss": 0.23347502946853638 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.48398667573928833, "learning_rate": 2.6477029152251804e-05, "loss": 0.2074, "step": 11537, "teacher_loss": 0.1766219586133957 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3509359657764435, "learning_rate": 2.6475566632117305e-05, "loss": 0.2066, "step": 11538, "teacher_loss": 0.19056370854377747 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.21700313687324524, "learning_rate": 2.647410384888247e-05, "loss": 0.1886, "step": 11539, "teacher_loss": 0.1853916347026825 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.4358169138431549, "learning_rate": 2.6472640802580835e-05, "loss": 0.3463, "step": 11540, "teacher_loss": 0.33630073070526123 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3147571086883545, "learning_rate": 2.6471177493245942e-05, "loss": 0.2904, "step": 11541, "teacher_loss": 0.2876918911933899 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3784254789352417, "learning_rate": 2.646971392091134e-05, "loss": 0.216, "step": 11542, "teacher_loss": 0.19793689250946045 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.4209362268447876, "learning_rate": 2.646825008561059e-05, "loss": 0.3424, "step": 11543, "teacher_loss": 0.3336646258831024 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.7893935441970825, "learning_rate": 2.6466785987377248e-05, "loss": 0.5876, "step": 11544, "teacher_loss": 0.5651889443397522 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3821299076080322, "learning_rate": 2.646532162624488e-05, "loss": 0.2026, "step": 11545, "teacher_loss": 0.18260201811790466 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3684558868408203, "learning_rate": 2.646385700224706e-05, "loss": 0.1951, "step": 11546, "teacher_loss": 0.1758539080619812 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.2763667702674866, "learning_rate": 2.6462392115417374e-05, "loss": 0.2194, "step": 11547, "teacher_loss": 0.21307410299777985 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.23371244966983795, "learning_rate": 2.6460926965789403e-05, "loss": 0.2388, "step": 11548, "teacher_loss": 0.23935914039611816 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.397588849067688, "learning_rate": 2.6459461553396734e-05, "loss": 0.2771, "step": 11549, "teacher_loss": 0.26375043392181396 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 1.0016975402832031, "learning_rate": 2.6457995878272972e-05, "loss": 0.3498, "step": 11550, "teacher_loss": 0.27739956974983215 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.622583270072937, "learning_rate": 2.645652994045172e-05, "loss": 0.4905, "step": 11551, "teacher_loss": 0.4758085012435913 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.45356571674346924, "learning_rate": 2.645506373996658e-05, "loss": 0.2727, "step": 11552, "teacher_loss": 0.2525651454925537 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.8043028116226196, "learning_rate": 2.6453597276851173e-05, "loss": 0.4029, "step": 11553, "teacher_loss": 0.3582611680030823 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.6703791618347168, "learning_rate": 2.645213055113912e-05, "loss": 0.3104, "step": 11554, "teacher_loss": 0.2704000473022461 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3875727653503418, "learning_rate": 2.6450663562864052e-05, "loss": 0.293, "step": 11555, "teacher_loss": 0.28245264291763306 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.7905151844024658, "learning_rate": 2.6449196312059596e-05, "loss": 0.4572, "step": 11556, "teacher_loss": 0.42016997933387756 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.6901015043258667, "learning_rate": 2.64477287987594e-05, "loss": 0.3237, "step": 11557, "teacher_loss": 0.282986044883728 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.241962730884552, "learning_rate": 2.6446261022997098e-05, "loss": 0.1986, "step": 11558, "teacher_loss": 0.19382070004940033 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.1880953013896942, "learning_rate": 2.6444792984806352e-05, "loss": 0.1853, "step": 11559, "teacher_loss": 0.1849527508020401 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.5639216899871826, "learning_rate": 2.6443324684220815e-05, "loss": 0.3039, "step": 11560, "teacher_loss": 0.2750260829925537 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.26886558532714844, "learning_rate": 2.6441856121274154e-05, "loss": 0.2468, "step": 11561, "teacher_loss": 0.24436140060424805 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.7171804308891296, "learning_rate": 2.6440387296000037e-05, "loss": 0.2556, "step": 11562, "teacher_loss": 0.20427894592285156 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.4552052915096283, "learning_rate": 2.6438918208432136e-05, "loss": 0.3841, "step": 11563, "teacher_loss": 0.37614595890045166 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.2889822721481323, "learning_rate": 2.643744885860414e-05, "loss": 0.2251, "step": 11564, "teacher_loss": 0.21797339618206024 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3351632058620453, "learning_rate": 2.6435979246549727e-05, "loss": 0.1853, "step": 11565, "teacher_loss": 0.16866150498390198 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.4389292597770691, "learning_rate": 2.6434509372302602e-05, "loss": 0.2828, "step": 11566, "teacher_loss": 0.26550358533859253 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.8094568252563477, "learning_rate": 2.643303923589646e-05, "loss": 0.6219, "step": 11567, "teacher_loss": 0.6010293960571289 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3604455292224884, "learning_rate": 2.6431568837365e-05, "loss": 0.3207, "step": 11568, "teacher_loss": 0.3162683844566345 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.4267175495624542, "learning_rate": 2.6430098176741943e-05, "loss": 0.3261, "step": 11569, "teacher_loss": 0.3148837685585022 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.4980096220970154, "learning_rate": 2.6428627254061007e-05, "loss": 0.4433, "step": 11570, "teacher_loss": 0.43725964426994324 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.46014195680618286, "learning_rate": 2.6427156069355915e-05, "loss": 0.3016, "step": 11571, "teacher_loss": 0.2839767336845398 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.5203524231910706, "learning_rate": 2.6425684622660387e-05, "loss": 0.2015, "step": 11572, "teacher_loss": 0.16604149341583252 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.20247912406921387, "learning_rate": 2.642421291400817e-05, "loss": 0.3619, "step": 11573, "teacher_loss": 0.37958797812461853 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.9878513216972351, "learning_rate": 2.6422740943433004e-05, "loss": 0.4417, "step": 11574, "teacher_loss": 0.3810656666755676 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.5010071396827698, "learning_rate": 2.6421268710968634e-05, "loss": 0.2622, "step": 11575, "teacher_loss": 0.235645592212677 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.2708182632923126, "learning_rate": 2.6419796216648815e-05, "loss": 0.3177, "step": 11576, "teacher_loss": 0.32288557291030884 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.4682545065879822, "learning_rate": 2.6418323460507307e-05, "loss": 0.3218, "step": 11577, "teacher_loss": 0.30554401874542236 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.606867253780365, "learning_rate": 2.641685044257788e-05, "loss": 0.399, "step": 11578, "teacher_loss": 0.37594592571258545 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3676658868789673, "learning_rate": 2.64153771628943e-05, "loss": 0.1977, "step": 11579, "teacher_loss": 0.17878666520118713 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3219849169254303, "learning_rate": 2.6413903621490343e-05, "loss": 0.2121, "step": 11580, "teacher_loss": 0.19991880655288696 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.5104255676269531, "learning_rate": 2.64124298183998e-05, "loss": 0.2762, "step": 11581, "teacher_loss": 0.2501417398452759 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.4044257402420044, "learning_rate": 2.6410955753656454e-05, "loss": 0.2095, "step": 11582, "teacher_loss": 0.18786993622779846 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3525312840938568, "learning_rate": 2.6409481427294105e-05, "loss": 0.2127, "step": 11583, "teacher_loss": 0.1971331238746643 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.6474061012268066, "learning_rate": 2.640800683934656e-05, "loss": 0.5159, "step": 11584, "teacher_loss": 0.5012890100479126 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.7495552897453308, "learning_rate": 2.6406531989847615e-05, "loss": 0.303, "step": 11585, "teacher_loss": 0.2534202039241791 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.1992206871509552, "learning_rate": 2.640505687883109e-05, "loss": 0.234, "step": 11586, "teacher_loss": 0.23788242042064667 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3321964144706726, "learning_rate": 2.6403581506330807e-05, "loss": 0.2551, "step": 11587, "teacher_loss": 0.24657011032104492 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.580051064491272, "learning_rate": 2.6402105872380594e-05, "loss": 0.5082, "step": 11588, "teacher_loss": 0.5001651048660278 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.4092367887496948, "learning_rate": 2.640062997701427e-05, "loss": 0.228, "step": 11589, "teacher_loss": 0.20781967043876648 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.7279612421989441, "learning_rate": 2.6399153820265687e-05, "loss": 0.3014, "step": 11590, "teacher_loss": 0.25404855608940125 }, { "compression_loss": 0.0, "epoch": 2.09, "label_loss": 0.3218912184238434, "learning_rate": 2.6397677402168682e-05, "loss": 0.2654, "step": 11591, "teacher_loss": 0.2590905427932739 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.36744678020477295, "learning_rate": 2.6396200722757107e-05, "loss": 0.3881, "step": 11592, "teacher_loss": 0.39037349820137024 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.4213526248931885, "learning_rate": 2.639472378206482e-05, "loss": 0.1865, "step": 11593, "teacher_loss": 0.16042251884937286 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5628147125244141, "learning_rate": 2.6393246580125672e-05, "loss": 0.4136, "step": 11594, "teacher_loss": 0.39704567193984985 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.11632384359836578, "learning_rate": 2.6391769116973545e-05, "loss": 0.1418, "step": 11595, "teacher_loss": 0.14466771483421326 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.45982855558395386, "learning_rate": 2.6390291392642305e-05, "loss": 0.3066, "step": 11596, "teacher_loss": 0.28962796926498413 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.48725011944770813, "learning_rate": 2.638881340716583e-05, "loss": 0.2133, "step": 11597, "teacher_loss": 0.18290892243385315 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5628359317779541, "learning_rate": 2.6387335160578012e-05, "loss": 0.3256, "step": 11598, "teacher_loss": 0.2992381453514099 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.6120871901512146, "learning_rate": 2.638585665291274e-05, "loss": 0.2968, "step": 11599, "teacher_loss": 0.2617568373680115 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.31551745533943176, "learning_rate": 2.6384377884203912e-05, "loss": 0.2168, "step": 11600, "teacher_loss": 0.20583681762218475 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5428146123886108, "learning_rate": 2.638289885448543e-05, "loss": 0.3146, "step": 11601, "teacher_loss": 0.2892981171607971 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.27808085083961487, "learning_rate": 2.63814195637912e-05, "loss": 0.2667, "step": 11602, "teacher_loss": 0.2654552161693573 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.2951381206512451, "learning_rate": 2.637994001215515e-05, "loss": 0.2869, "step": 11603, "teacher_loss": 0.2860385477542877 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 1.4654958248138428, "learning_rate": 2.637846019961119e-05, "loss": 0.5196, "step": 11604, "teacher_loss": 0.4144832491874695 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5902531147003174, "learning_rate": 2.6376980126193256e-05, "loss": 0.3088, "step": 11605, "teacher_loss": 0.27752721309661865 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5012961030006409, "learning_rate": 2.637549979193528e-05, "loss": 0.2773, "step": 11606, "teacher_loss": 0.25240397453308105 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.3962011933326721, "learning_rate": 2.6374019196871193e-05, "loss": 0.259, "step": 11607, "teacher_loss": 0.2437729835510254 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5605190992355347, "learning_rate": 2.6372538341034952e-05, "loss": 0.2608, "step": 11608, "teacher_loss": 0.22749318182468414 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.49389132857322693, "learning_rate": 2.6371057224460497e-05, "loss": 0.1969, "step": 11609, "teacher_loss": 0.16388659179210663 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.22084566950798035, "learning_rate": 2.6369575847181795e-05, "loss": 0.2625, "step": 11610, "teacher_loss": 0.26718103885650635 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.46870607137680054, "learning_rate": 2.636809420923281e-05, "loss": 0.1855, "step": 11611, "teacher_loss": 0.15398085117340088 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.36501044034957886, "learning_rate": 2.6366612310647503e-05, "loss": 0.4166, "step": 11612, "teacher_loss": 0.42231959104537964 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 1.0721452236175537, "learning_rate": 2.6365130151459857e-05, "loss": 0.4061, "step": 11613, "teacher_loss": 0.33210888504981995 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.6533260345458984, "learning_rate": 2.636364773170385e-05, "loss": 0.2743, "step": 11614, "teacher_loss": 0.23213700950145721 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.7723872661590576, "learning_rate": 2.636216505141347e-05, "loss": 0.3336, "step": 11615, "teacher_loss": 0.2848455309867859 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.4633684754371643, "learning_rate": 2.6360682110622714e-05, "loss": 0.3084, "step": 11616, "teacher_loss": 0.29114609956741333 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.13535670936107635, "learning_rate": 2.6359198909365578e-05, "loss": 0.1491, "step": 11617, "teacher_loss": 0.15066124498844147 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.6626337170600891, "learning_rate": 2.6357715447676063e-05, "loss": 0.3263, "step": 11618, "teacher_loss": 0.28890860080718994 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.2539743483066559, "learning_rate": 2.635623172558819e-05, "loss": 0.1915, "step": 11619, "teacher_loss": 0.18451187014579773 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5255005359649658, "learning_rate": 2.635474774313597e-05, "loss": 0.3201, "step": 11620, "teacher_loss": 0.2972955107688904 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.6569070816040039, "learning_rate": 2.6353263500353427e-05, "loss": 0.3822, "step": 11621, "teacher_loss": 0.35166114568710327 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5396597385406494, "learning_rate": 2.635177899727459e-05, "loss": 0.5363, "step": 11622, "teacher_loss": 0.5358933806419373 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.7170656323432922, "learning_rate": 2.6350294233933493e-05, "loss": 0.3642, "step": 11623, "teacher_loss": 0.32500770688056946 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.779828667640686, "learning_rate": 2.6348809210364185e-05, "loss": 0.4942, "step": 11624, "teacher_loss": 0.462502121925354 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5123329758644104, "learning_rate": 2.6347323926600702e-05, "loss": 0.2371, "step": 11625, "teacher_loss": 0.20656032860279083 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.8832279443740845, "learning_rate": 2.6345838382677107e-05, "loss": 0.3167, "step": 11626, "teacher_loss": 0.25376179814338684 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.13777241110801697, "learning_rate": 2.634435257862745e-05, "loss": 0.1507, "step": 11627, "teacher_loss": 0.15217572450637817 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5983982682228088, "learning_rate": 2.63428665144858e-05, "loss": 0.3114, "step": 11628, "teacher_loss": 0.27953118085861206 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.34603428840637207, "learning_rate": 2.6341380190286233e-05, "loss": 0.2081, "step": 11629, "teacher_loss": 0.19276206195354462 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.3325658440589905, "learning_rate": 2.6339893606062823e-05, "loss": 0.1922, "step": 11630, "teacher_loss": 0.17658621072769165 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.2541870176792145, "learning_rate": 2.6338406761849647e-05, "loss": 0.2944, "step": 11631, "teacher_loss": 0.2988821864128113 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.20130598545074463, "learning_rate": 2.63369196576808e-05, "loss": 0.3598, "step": 11632, "teacher_loss": 0.3774341940879822 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.25852763652801514, "learning_rate": 2.6335432293590377e-05, "loss": 0.1703, "step": 11633, "teacher_loss": 0.16053098440170288 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.7965031862258911, "learning_rate": 2.633394466961247e-05, "loss": 0.2626, "step": 11634, "teacher_loss": 0.20329627394676208 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.6755613088607788, "learning_rate": 2.6332456785781198e-05, "loss": 0.2316, "step": 11635, "teacher_loss": 0.1823037564754486 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.4504447281360626, "learning_rate": 2.6330968642130665e-05, "loss": 0.2945, "step": 11636, "teacher_loss": 0.2771519124507904 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.3466416299343109, "learning_rate": 2.6329480238694997e-05, "loss": 0.2082, "step": 11637, "teacher_loss": 0.19284410774707794 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.4657096266746521, "learning_rate": 2.6327991575508314e-05, "loss": 0.2554, "step": 11638, "teacher_loss": 0.23197926580905914 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.3914950489997864, "learning_rate": 2.6326502652604745e-05, "loss": 0.2653, "step": 11639, "teacher_loss": 0.2512850761413574 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.4722428023815155, "learning_rate": 2.632501347001843e-05, "loss": 0.1823, "step": 11640, "teacher_loss": 0.15013083815574646 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.6649363040924072, "learning_rate": 2.6323524027783513e-05, "loss": 0.3725, "step": 11641, "teacher_loss": 0.3400045931339264 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.39294326305389404, "learning_rate": 2.6322034325934134e-05, "loss": 0.2827, "step": 11642, "teacher_loss": 0.27043092250823975 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.6809706687927246, "learning_rate": 2.6320544364504457e-05, "loss": 0.2533, "step": 11643, "teacher_loss": 0.20577149093151093 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5013149380683899, "learning_rate": 2.6319054143528633e-05, "loss": 0.2624, "step": 11644, "teacher_loss": 0.23586280643939972 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.5766116976737976, "learning_rate": 2.6317563663040842e-05, "loss": 0.1847, "step": 11645, "teacher_loss": 0.14115270972251892 }, { "compression_loss": 0.0, "epoch": 2.1, "label_loss": 0.39663460850715637, "learning_rate": 2.6316072923075246e-05, "loss": 0.4255, "step": 11646, "teacher_loss": 0.42867511510849 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.41966313123703003, "learning_rate": 2.6314581923666022e-05, "loss": 0.2703, "step": 11647, "teacher_loss": 0.25365710258483887 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.6521139740943909, "learning_rate": 2.6313090664847358e-05, "loss": 0.3639, "step": 11648, "teacher_loss": 0.3318377137184143 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.8480049967765808, "learning_rate": 2.6311599146653446e-05, "loss": 0.3856, "step": 11649, "teacher_loss": 0.3342652916908264 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.7971171140670776, "learning_rate": 2.6310107369118476e-05, "loss": 0.3437, "step": 11650, "teacher_loss": 0.29330623149871826 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.4783957004547119, "learning_rate": 2.6308615332276658e-05, "loss": 0.2556, "step": 11651, "teacher_loss": 0.23084627091884613 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.6096468567848206, "learning_rate": 2.6307123036162192e-05, "loss": 0.3317, "step": 11652, "teacher_loss": 0.30079346895217896 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.19338107109069824, "learning_rate": 2.6305630480809294e-05, "loss": 0.2232, "step": 11653, "teacher_loss": 0.22650833427906036 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.5757306814193726, "learning_rate": 2.6304137666252185e-05, "loss": 0.2733, "step": 11654, "teacher_loss": 0.2396695613861084 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.24960431456565857, "learning_rate": 2.6302644592525098e-05, "loss": 0.3587, "step": 11655, "teacher_loss": 0.3707889914512634 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.4055192470550537, "learning_rate": 2.630115125966225e-05, "loss": 0.2862, "step": 11656, "teacher_loss": 0.27299222350120544 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.34686267375946045, "learning_rate": 2.629965766769789e-05, "loss": 0.2228, "step": 11657, "teacher_loss": 0.20901940762996674 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.20489606261253357, "learning_rate": 2.6298163816666258e-05, "loss": 0.222, "step": 11658, "teacher_loss": 0.2238956093788147 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.369179904460907, "learning_rate": 2.62966697066016e-05, "loss": 0.3928, "step": 11659, "teacher_loss": 0.39540189504623413 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.3309842050075531, "learning_rate": 2.6295175337538178e-05, "loss": 0.2053, "step": 11660, "teacher_loss": 0.19132784008979797 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 1.2091197967529297, "learning_rate": 2.6293680709510247e-05, "loss": 0.4527, "step": 11661, "teacher_loss": 0.3685988187789917 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.5470733046531677, "learning_rate": 2.629218582255208e-05, "loss": 0.4332, "step": 11662, "teacher_loss": 0.4206019639968872 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.802361011505127, "learning_rate": 2.629069067669795e-05, "loss": 0.514, "step": 11663, "teacher_loss": 0.48192915320396423 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.42300403118133545, "learning_rate": 2.628919527198213e-05, "loss": 0.2187, "step": 11664, "teacher_loss": 0.19602477550506592 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.329264760017395, "learning_rate": 2.628769960843891e-05, "loss": 0.2949, "step": 11665, "teacher_loss": 0.29107093811035156 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.7769129872322083, "learning_rate": 2.628620368610258e-05, "loss": 0.3332, "step": 11666, "teacher_loss": 0.2838786542415619 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.796055793762207, "learning_rate": 2.6284707505007442e-05, "loss": 0.2779, "step": 11667, "teacher_loss": 0.22037874162197113 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.42795711755752563, "learning_rate": 2.6283211065187787e-05, "loss": 0.334, "step": 11668, "teacher_loss": 0.3235991597175598 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.78924560546875, "learning_rate": 2.6281714366677935e-05, "loss": 0.3358, "step": 11669, "teacher_loss": 0.28545546531677246 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.5016790628433228, "learning_rate": 2.6280217409512196e-05, "loss": 0.2291, "step": 11670, "teacher_loss": 0.19881504774093628 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.4088430404663086, "learning_rate": 2.6278720193724893e-05, "loss": 0.2201, "step": 11671, "teacher_loss": 0.19909584522247314 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.49580615758895874, "learning_rate": 2.627722271935035e-05, "loss": 0.2706, "step": 11672, "teacher_loss": 0.24553239345550537 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.23618587851524353, "learning_rate": 2.62757249864229e-05, "loss": 0.2304, "step": 11673, "teacher_loss": 0.22970931231975555 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.2461511194705963, "learning_rate": 2.6274226994976885e-05, "loss": 0.1661, "step": 11674, "teacher_loss": 0.15725934505462646 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.850604236125946, "learning_rate": 2.6272728745046647e-05, "loss": 0.3856, "step": 11675, "teacher_loss": 0.333961546421051 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.28365811705589294, "learning_rate": 2.6271230236666534e-05, "loss": 0.2551, "step": 11676, "teacher_loss": 0.2519356310367584 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.5430283546447754, "learning_rate": 2.6269731469870906e-05, "loss": 0.2612, "step": 11677, "teacher_loss": 0.22983039915561676 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.7752373814582825, "learning_rate": 2.626823244469412e-05, "loss": 0.2978, "step": 11678, "teacher_loss": 0.24476996064186096 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.25934267044067383, "learning_rate": 2.6266733161170553e-05, "loss": 0.1979, "step": 11679, "teacher_loss": 0.1911078691482544 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.41404810547828674, "learning_rate": 2.626523361933457e-05, "loss": 0.2255, "step": 11680, "teacher_loss": 0.2045830339193344 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.5188248157501221, "learning_rate": 2.626373381922056e-05, "loss": 0.3525, "step": 11681, "teacher_loss": 0.3340300917625427 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.32895123958587646, "learning_rate": 2.62622337608629e-05, "loss": 0.2506, "step": 11682, "teacher_loss": 0.24193879961967468 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.45395970344543457, "learning_rate": 2.626073344429599e-05, "loss": 0.2051, "step": 11683, "teacher_loss": 0.17749321460723877 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.5270957946777344, "learning_rate": 2.625923286955422e-05, "loss": 0.2872, "step": 11684, "teacher_loss": 0.2605031728744507 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.456696093082428, "learning_rate": 2.6257732036671995e-05, "loss": 0.2282, "step": 11685, "teacher_loss": 0.20284131169319153 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.8772329092025757, "learning_rate": 2.625623094568373e-05, "loss": 0.3602, "step": 11686, "teacher_loss": 0.3027776777744293 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.4781636595726013, "learning_rate": 2.6254729596623835e-05, "loss": 0.4559, "step": 11687, "teacher_loss": 0.45338141918182373 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.2834071218967438, "learning_rate": 2.6253227989526737e-05, "loss": 0.2087, "step": 11688, "teacher_loss": 0.20041434466838837 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.7103962302207947, "learning_rate": 2.625172612442686e-05, "loss": 0.2636, "step": 11689, "teacher_loss": 0.2139425426721573 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 1.012774109840393, "learning_rate": 2.6250224001358635e-05, "loss": 0.4744, "step": 11690, "teacher_loss": 0.41453754901885986 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.36150819063186646, "learning_rate": 2.6248721620356504e-05, "loss": 0.2668, "step": 11691, "teacher_loss": 0.2562660276889801 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.8701244592666626, "learning_rate": 2.6247218981454915e-05, "loss": 0.3206, "step": 11692, "teacher_loss": 0.2595844864845276 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.6041348576545715, "learning_rate": 2.6245716084688315e-05, "loss": 0.4924, "step": 11693, "teacher_loss": 0.4800390601158142 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.676291286945343, "learning_rate": 2.6244212930091156e-05, "loss": 0.2538, "step": 11694, "teacher_loss": 0.20687994360923767 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.580389142036438, "learning_rate": 2.6242709517697908e-05, "loss": 0.3332, "step": 11695, "teacher_loss": 0.3057156801223755 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.29815980792045593, "learning_rate": 2.6241205847543047e-05, "loss": 0.2674, "step": 11696, "teacher_loss": 0.26397261023521423 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.8606027364730835, "learning_rate": 2.623970191966103e-05, "loss": 0.3569, "step": 11697, "teacher_loss": 0.3008785843849182 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.4508809447288513, "learning_rate": 2.6238197734086346e-05, "loss": 0.2334, "step": 11698, "teacher_loss": 0.2092401683330536 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.2732694149017334, "learning_rate": 2.6236693290853486e-05, "loss": 0.1813, "step": 11699, "teacher_loss": 0.17109918594360352 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.3855234384536743, "learning_rate": 2.6235188589996933e-05, "loss": 0.3228, "step": 11700, "teacher_loss": 0.31583231687545776 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.5695276260375977, "learning_rate": 2.6233683631551197e-05, "loss": 0.3078, "step": 11701, "teacher_loss": 0.2787608504295349 }, { "compression_loss": 0.0, "epoch": 2.11, "label_loss": 0.8114451169967651, "learning_rate": 2.623217841555077e-05, "loss": 0.3951, "step": 11702, "teacher_loss": 0.34884822368621826 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.3266856074333191, "learning_rate": 2.6230672942030172e-05, "loss": 0.285, "step": 11703, "teacher_loss": 0.2803923189640045 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.38466620445251465, "learning_rate": 2.6229167211023913e-05, "loss": 0.1891, "step": 11704, "teacher_loss": 0.1673886775970459 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.21358650922775269, "learning_rate": 2.6227661222566516e-05, "loss": 0.2028, "step": 11705, "teacher_loss": 0.20161615312099457 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.21007519960403442, "learning_rate": 2.6226154976692513e-05, "loss": 0.1669, "step": 11706, "teacher_loss": 0.16212603449821472 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.44750118255615234, "learning_rate": 2.6224648473436432e-05, "loss": 0.4428, "step": 11707, "teacher_loss": 0.44226568937301636 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.4854198098182678, "learning_rate": 2.6223141712832813e-05, "loss": 0.2205, "step": 11708, "teacher_loss": 0.19101554155349731 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.40673476457595825, "learning_rate": 2.62216346949162e-05, "loss": 0.1805, "step": 11709, "teacher_loss": 0.15531401336193085 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.2709745168685913, "learning_rate": 2.6220127419721157e-05, "loss": 0.2087, "step": 11710, "teacher_loss": 0.20177140831947327 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.5460803508758545, "learning_rate": 2.6218619887282227e-05, "loss": 0.2401, "step": 11711, "teacher_loss": 0.20607876777648926 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.32501888275146484, "learning_rate": 2.6217112097633977e-05, "loss": 0.2582, "step": 11712, "teacher_loss": 0.2508161664009094 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.27705562114715576, "learning_rate": 2.6215604050810977e-05, "loss": 0.2049, "step": 11713, "teacher_loss": 0.19687145948410034 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.44990891218185425, "learning_rate": 2.62140957468478e-05, "loss": 0.2435, "step": 11714, "teacher_loss": 0.22052612900733948 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.3055342137813568, "learning_rate": 2.6212587185779036e-05, "loss": 0.1773, "step": 11715, "teacher_loss": 0.16301541030406952 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.4425654411315918, "learning_rate": 2.6211078367639262e-05, "loss": 0.444, "step": 11716, "teacher_loss": 0.44413354992866516 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.2734777629375458, "learning_rate": 2.6209569292463074e-05, "loss": 0.4187, "step": 11717, "teacher_loss": 0.43479156494140625 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.3884049654006958, "learning_rate": 2.620805996028507e-05, "loss": 0.2121, "step": 11718, "teacher_loss": 0.19250774383544922 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 1.1643576622009277, "learning_rate": 2.6206550371139853e-05, "loss": 0.4344, "step": 11719, "teacher_loss": 0.35330379009246826 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.4396713078022003, "learning_rate": 2.6205040525062036e-05, "loss": 0.2801, "step": 11720, "teacher_loss": 0.2623355984687805 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.7217541337013245, "learning_rate": 2.6203530422086234e-05, "loss": 0.2726, "step": 11721, "teacher_loss": 0.22271960973739624 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.5310426354408264, "learning_rate": 2.620202006224707e-05, "loss": 0.3088, "step": 11722, "teacher_loss": 0.2840694189071655 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.4304261803627014, "learning_rate": 2.6200509445579167e-05, "loss": 0.2749, "step": 11723, "teacher_loss": 0.257620245218277 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.4293491244316101, "learning_rate": 2.6198998572117168e-05, "loss": 0.2135, "step": 11724, "teacher_loss": 0.18951985239982605 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.5550104379653931, "learning_rate": 2.6197487441895705e-05, "loss": 0.2482, "step": 11725, "teacher_loss": 0.21409985423088074 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.348992258310318, "learning_rate": 2.6195976054949432e-05, "loss": 0.2874, "step": 11726, "teacher_loss": 0.2805972099304199 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.33255815505981445, "learning_rate": 2.619446441131299e-05, "loss": 0.198, "step": 11727, "teacher_loss": 0.1829938143491745 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.18563006818294525, "learning_rate": 2.6192952511021045e-05, "loss": 0.2228, "step": 11728, "teacher_loss": 0.2268821746110916 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.5308411121368408, "learning_rate": 2.6191440354108257e-05, "loss": 0.4484, "step": 11729, "teacher_loss": 0.4392498731613159 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.2919258773326874, "learning_rate": 2.6189927940609297e-05, "loss": 0.2097, "step": 11730, "teacher_loss": 0.2005467414855957 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.44020524621009827, "learning_rate": 2.6188415270558833e-05, "loss": 0.4633, "step": 11731, "teacher_loss": 0.4659165143966675 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.7232972979545593, "learning_rate": 2.6186902343991556e-05, "loss": 0.304, "step": 11732, "teacher_loss": 0.25739309191703796 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.32231825590133667, "learning_rate": 2.6185389160942147e-05, "loss": 0.1691, "step": 11733, "teacher_loss": 0.1520366668701172 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.3430689871311188, "learning_rate": 2.61838757214453e-05, "loss": 0.2391, "step": 11734, "teacher_loss": 0.22759392857551575 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.207975834608078, "learning_rate": 2.6182362025535714e-05, "loss": 0.2289, "step": 11735, "teacher_loss": 0.23123089969158173 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.44402527809143066, "learning_rate": 2.6180848073248092e-05, "loss": 0.2475, "step": 11736, "teacher_loss": 0.22569337487220764 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.6652697324752808, "learning_rate": 2.6179333864617147e-05, "loss": 0.303, "step": 11737, "teacher_loss": 0.262746661901474 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.13566963374614716, "learning_rate": 2.6177819399677593e-05, "loss": 0.2328, "step": 11738, "teacher_loss": 0.24359755218029022 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.36873966455459595, "learning_rate": 2.6176304678464154e-05, "loss": 0.2398, "step": 11739, "teacher_loss": 0.22551871836185455 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.4505552649497986, "learning_rate": 2.6174789701011557e-05, "loss": 0.3263, "step": 11740, "teacher_loss": 0.31244975328445435 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.4136297404766083, "learning_rate": 2.6173274467354533e-05, "loss": 0.3431, "step": 11741, "teacher_loss": 0.335308313369751 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.20684108138084412, "learning_rate": 2.617175897752783e-05, "loss": 0.2296, "step": 11742, "teacher_loss": 0.23208823800086975 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.6587156653404236, "learning_rate": 2.6170243231566183e-05, "loss": 0.3759, "step": 11743, "teacher_loss": 0.34444117546081543 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.48333656787872314, "learning_rate": 2.6168727229504352e-05, "loss": 0.3458, "step": 11744, "teacher_loss": 0.33054959774017334 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.3705843389034271, "learning_rate": 2.6167210971377092e-05, "loss": 0.2402, "step": 11745, "teacher_loss": 0.22572600841522217 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.4790026545524597, "learning_rate": 2.6165694457219162e-05, "loss": 0.317, "step": 11746, "teacher_loss": 0.29898345470428467 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.17654390633106232, "learning_rate": 2.6164177687065337e-05, "loss": 0.2396, "step": 11747, "teacher_loss": 0.2465890794992447 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.6947540044784546, "learning_rate": 2.6162660660950386e-05, "loss": 0.2313, "step": 11748, "teacher_loss": 0.17977529764175415 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.37498557567596436, "learning_rate": 2.6161143378909096e-05, "loss": 0.3694, "step": 11749, "teacher_loss": 0.36881938576698303 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 1.3527212142944336, "learning_rate": 2.6159625840976253e-05, "loss": 0.3997, "step": 11750, "teacher_loss": 0.2938268184661865 }, { "epoch": 2.12, "eval_exact_match": 79.75402081362347, "eval_f1": 87.17598981359633, "step": 11750 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.19763246178627014, "learning_rate": 2.615810804718665e-05, "loss": 0.1434, "step": 11751, "teacher_loss": 0.13740935921669006 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.3919205069541931, "learning_rate": 2.6156589997575072e-05, "loss": 0.3446, "step": 11752, "teacher_loss": 0.339397668838501 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.35283082723617554, "learning_rate": 2.6155071692176348e-05, "loss": 0.2074, "step": 11753, "teacher_loss": 0.19122996926307678 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.33845284581184387, "learning_rate": 2.6153553131025268e-05, "loss": 0.236, "step": 11754, "teacher_loss": 0.22462353110313416 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.5809452533721924, "learning_rate": 2.6152034314156656e-05, "loss": 0.2105, "step": 11755, "teacher_loss": 0.1693117767572403 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.654186487197876, "learning_rate": 2.6150515241605334e-05, "loss": 0.342, "step": 11756, "teacher_loss": 0.307292640209198 }, { "compression_loss": 0.0, "epoch": 2.12, "label_loss": 0.3123341500759125, "learning_rate": 2.6148995913406123e-05, "loss": 0.3114, "step": 11757, "teacher_loss": 0.311333030462265 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.7260026335716248, "learning_rate": 2.6147476329593867e-05, "loss": 0.4434, "step": 11758, "teacher_loss": 0.41197606921195984 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.5350506901741028, "learning_rate": 2.61459564902034e-05, "loss": 0.2817, "step": 11759, "teacher_loss": 0.25350135564804077 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.6239803433418274, "learning_rate": 2.6144436395269566e-05, "loss": 0.2973, "step": 11760, "teacher_loss": 0.26102250814437866 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.47594326734542847, "learning_rate": 2.614291604482722e-05, "loss": 0.38, "step": 11761, "teacher_loss": 0.36938387155532837 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.5479391813278198, "learning_rate": 2.6141395438911216e-05, "loss": 0.233, "step": 11762, "teacher_loss": 0.19797459244728088 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.290363073348999, "learning_rate": 2.613987457755642e-05, "loss": 0.2177, "step": 11763, "teacher_loss": 0.20961186289787292 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.6466919183731079, "learning_rate": 2.6138353460797695e-05, "loss": 0.3123, "step": 11764, "teacher_loss": 0.2751414179801941 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.7366952896118164, "learning_rate": 2.6136832088669927e-05, "loss": 0.3022, "step": 11765, "teacher_loss": 0.25387638807296753 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.7886039018630981, "learning_rate": 2.6135310461207984e-05, "loss": 0.6488, "step": 11766, "teacher_loss": 0.6333101391792297 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.31554722785949707, "learning_rate": 2.613378857844676e-05, "loss": 0.2088, "step": 11767, "teacher_loss": 0.19695937633514404 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.45015841722488403, "learning_rate": 2.613226644042114e-05, "loss": 0.2931, "step": 11768, "teacher_loss": 0.27564364671707153 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.4002137780189514, "learning_rate": 2.6130744047166034e-05, "loss": 0.255, "step": 11769, "teacher_loss": 0.23888307809829712 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.3668873906135559, "learning_rate": 2.6129221398716333e-05, "loss": 0.2605, "step": 11770, "teacher_loss": 0.24871167540550232 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 1.1099088191986084, "learning_rate": 2.6127698495106955e-05, "loss": 0.514, "step": 11771, "teacher_loss": 0.4477843940258026 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.4650160074234009, "learning_rate": 2.6126175336372808e-05, "loss": 0.3301, "step": 11772, "teacher_loss": 0.3151111304759979 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.3635333478450775, "learning_rate": 2.6124651922548825e-05, "loss": 0.2498, "step": 11773, "teacher_loss": 0.23721134662628174 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.3595234751701355, "learning_rate": 2.6123128253669926e-05, "loss": 0.3037, "step": 11774, "teacher_loss": 0.29753702878952026 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.4180101752281189, "learning_rate": 2.6121604329771043e-05, "loss": 0.2116, "step": 11775, "teacher_loss": 0.18871405720710754 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.3541082739830017, "learning_rate": 2.6120080150887118e-05, "loss": 0.2568, "step": 11776, "teacher_loss": 0.24599777162075043 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.5516923069953918, "learning_rate": 2.6118555717053097e-05, "loss": 0.325, "step": 11777, "teacher_loss": 0.29986101388931274 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.33248043060302734, "learning_rate": 2.611703102830392e-05, "loss": 0.2227, "step": 11778, "teacher_loss": 0.21045032143592834 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.14766092598438263, "learning_rate": 2.6115506084674564e-05, "loss": 0.2137, "step": 11779, "teacher_loss": 0.22106239199638367 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.2766382694244385, "learning_rate": 2.6113980886199974e-05, "loss": 0.1942, "step": 11780, "teacher_loss": 0.1850774884223938 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.7963391542434692, "learning_rate": 2.6112455432915122e-05, "loss": 0.278, "step": 11781, "teacher_loss": 0.22037044167518616 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.7411714792251587, "learning_rate": 2.6110929724854987e-05, "loss": 0.2547, "step": 11782, "teacher_loss": 0.20061588287353516 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.3920944333076477, "learning_rate": 2.610940376205454e-05, "loss": 0.2193, "step": 11783, "teacher_loss": 0.20012560486793518 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.570035457611084, "learning_rate": 2.6107877544548776e-05, "loss": 0.2094, "step": 11784, "teacher_loss": 0.16934117674827576 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.8104901313781738, "learning_rate": 2.6106351072372688e-05, "loss": 0.5458, "step": 11785, "teacher_loss": 0.5163378715515137 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.3749673068523407, "learning_rate": 2.6104824345561264e-05, "loss": 0.2612, "step": 11786, "teacher_loss": 0.2485107183456421 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.43200820684432983, "learning_rate": 2.6103297364149506e-05, "loss": 0.3293, "step": 11787, "teacher_loss": 0.3178657591342926 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.6073095798492432, "learning_rate": 2.610177012817244e-05, "loss": 0.2934, "step": 11788, "teacher_loss": 0.25854456424713135 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.4115005135536194, "learning_rate": 2.6100242637665064e-05, "loss": 0.3139, "step": 11789, "teacher_loss": 0.3030462861061096 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.36522796750068665, "learning_rate": 2.6098714892662405e-05, "loss": 0.2811, "step": 11790, "teacher_loss": 0.2717519700527191 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.21329163014888763, "learning_rate": 2.609718689319949e-05, "loss": 0.2455, "step": 11791, "teacher_loss": 0.24907556176185608 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.3402857780456543, "learning_rate": 2.6095658639311348e-05, "loss": 0.2211, "step": 11792, "teacher_loss": 0.20783844590187073 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.5102912187576294, "learning_rate": 2.609413013103302e-05, "loss": 0.3836, "step": 11793, "teacher_loss": 0.36948350071907043 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.19637587666511536, "learning_rate": 2.6092601368399553e-05, "loss": 0.1778, "step": 11794, "teacher_loss": 0.17571675777435303 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.486598402261734, "learning_rate": 2.6091072351445993e-05, "loss": 0.3317, "step": 11795, "teacher_loss": 0.31446194648742676 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.24823614954948425, "learning_rate": 2.6089543080207395e-05, "loss": 0.1949, "step": 11796, "teacher_loss": 0.18901577591896057 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.6965166926383972, "learning_rate": 2.6088013554718825e-05, "loss": 0.4815, "step": 11797, "teacher_loss": 0.45760977268218994 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.6267510652542114, "learning_rate": 2.6086483775015345e-05, "loss": 0.4206, "step": 11798, "teacher_loss": 0.39770960807800293 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.24668660759925842, "learning_rate": 2.6084953741132033e-05, "loss": 0.171, "step": 11799, "teacher_loss": 0.16258426010608673 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.4710262417793274, "learning_rate": 2.6083423453103966e-05, "loss": 0.2121, "step": 11800, "teacher_loss": 0.18337951600551605 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.32312625646591187, "learning_rate": 2.6081892910966228e-05, "loss": 0.2235, "step": 11801, "teacher_loss": 0.21241521835327148 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.218858003616333, "learning_rate": 2.608036211475391e-05, "loss": 0.3959, "step": 11802, "teacher_loss": 0.41560009121894836 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.49183446168899536, "learning_rate": 2.607883106450211e-05, "loss": 0.2768, "step": 11803, "teacher_loss": 0.2529560625553131 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.3915698230266571, "learning_rate": 2.607729976024593e-05, "loss": 0.201, "step": 11804, "teacher_loss": 0.17986398935317993 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.44814667105674744, "learning_rate": 2.6075768202020483e-05, "loss": 0.2395, "step": 11805, "teacher_loss": 0.21630804240703583 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.2833609879016876, "learning_rate": 2.6074236389860873e-05, "loss": 0.372, "step": 11806, "teacher_loss": 0.3818890154361725 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.2659551799297333, "learning_rate": 2.6072704323802223e-05, "loss": 0.1641, "step": 11807, "teacher_loss": 0.15273050963878632 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.5055026412010193, "learning_rate": 2.6071172003879667e-05, "loss": 0.3614, "step": 11808, "teacher_loss": 0.3453558683395386 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.13391844928264618, "learning_rate": 2.6069639430128323e-05, "loss": 0.2053, "step": 11809, "teacher_loss": 0.21320462226867676 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.9493470191955566, "learning_rate": 2.606810660258334e-05, "loss": 0.6174, "step": 11810, "teacher_loss": 0.5805208683013916 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.16981101036071777, "learning_rate": 2.6066573521279856e-05, "loss": 0.1785, "step": 11811, "teacher_loss": 0.1795172393321991 }, { "compression_loss": 0.0, "epoch": 2.13, "label_loss": 0.3450591266155243, "learning_rate": 2.606504018625302e-05, "loss": 0.2282, "step": 11812, "teacher_loss": 0.2152593731880188 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.4901118576526642, "learning_rate": 2.6063506597537985e-05, "loss": 0.3053, "step": 11813, "teacher_loss": 0.284729927778244 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.4742254912853241, "learning_rate": 2.6061972755169916e-05, "loss": 0.2247, "step": 11814, "teacher_loss": 0.19695082306861877 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.6243545413017273, "learning_rate": 2.606043865918398e-05, "loss": 0.2953, "step": 11815, "teacher_loss": 0.2587278187274933 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.5326356291770935, "learning_rate": 2.605890430961534e-05, "loss": 0.1761, "step": 11816, "teacher_loss": 0.13653619587421417 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.2755795419216156, "learning_rate": 2.6057369706499184e-05, "loss": 0.1885, "step": 11817, "teacher_loss": 0.17877671122550964 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.8960363864898682, "learning_rate": 2.605583484987069e-05, "loss": 0.4244, "step": 11818, "teacher_loss": 0.372050404548645 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.3315697908401489, "learning_rate": 2.605429973976505e-05, "loss": 0.388, "step": 11819, "teacher_loss": 0.39432376623153687 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.42155885696411133, "learning_rate": 2.6052764376217463e-05, "loss": 0.3903, "step": 11820, "teacher_loss": 0.38677978515625 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 1.0188727378845215, "learning_rate": 2.6051228759263124e-05, "loss": 0.2918, "step": 11821, "teacher_loss": 0.21098877489566803 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.3923536539077759, "learning_rate": 2.6049692888937246e-05, "loss": 0.2795, "step": 11822, "teacher_loss": 0.2670028805732727 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.3829669654369354, "learning_rate": 2.604815676527504e-05, "loss": 0.2397, "step": 11823, "teacher_loss": 0.22377602756023407 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.5543052554130554, "learning_rate": 2.6046620388311718e-05, "loss": 0.4596, "step": 11824, "teacher_loss": 0.44908061623573303 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.3174785077571869, "learning_rate": 2.6045083758082513e-05, "loss": 0.278, "step": 11825, "teacher_loss": 0.27361518144607544 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.7123744487762451, "learning_rate": 2.604354687462265e-05, "loss": 0.3581, "step": 11826, "teacher_loss": 0.31871408224105835 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.6533921360969543, "learning_rate": 2.604200973796737e-05, "loss": 0.2568, "step": 11827, "teacher_loss": 0.21268978714942932 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.5691380500793457, "learning_rate": 2.604047234815191e-05, "loss": 0.2173, "step": 11828, "teacher_loss": 0.17817473411560059 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.3701571226119995, "learning_rate": 2.6038934705211523e-05, "loss": 0.2193, "step": 11829, "teacher_loss": 0.202579528093338 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.6815152168273926, "learning_rate": 2.603739680918146e-05, "loss": 0.2964, "step": 11830, "teacher_loss": 0.25364288687705994 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.6011703014373779, "learning_rate": 2.6035858660096975e-05, "loss": 0.3749, "step": 11831, "teacher_loss": 0.34974128007888794 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.9137331247329712, "learning_rate": 2.603432025799334e-05, "loss": 0.3293, "step": 11832, "teacher_loss": 0.2643439769744873 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.7757933735847473, "learning_rate": 2.6032781602905828e-05, "loss": 0.2992, "step": 11833, "teacher_loss": 0.24629098176956177 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.30560919642448425, "learning_rate": 2.603124269486971e-05, "loss": 0.3159, "step": 11834, "teacher_loss": 0.31700631976127625 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.5418651103973389, "learning_rate": 2.6029703533920267e-05, "loss": 0.3041, "step": 11835, "teacher_loss": 0.2777123749256134 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.27882617712020874, "learning_rate": 2.6028164120092793e-05, "loss": 0.2174, "step": 11836, "teacher_loss": 0.21056890487670898 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.3406464159488678, "learning_rate": 2.6026624453422583e-05, "loss": 0.2651, "step": 11837, "teacher_loss": 0.25672852993011475 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.3227473795413971, "learning_rate": 2.602508453394493e-05, "loss": 0.3608, "step": 11838, "teacher_loss": 0.3650456666946411 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.23000863194465637, "learning_rate": 2.6023544361695147e-05, "loss": 0.2017, "step": 11839, "teacher_loss": 0.19850674271583557 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.45348310470581055, "learning_rate": 2.602200393670854e-05, "loss": 0.2381, "step": 11840, "teacher_loss": 0.21413525938987732 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.2759324312210083, "learning_rate": 2.6020463259020424e-05, "loss": 0.306, "step": 11841, "teacher_loss": 0.30939337611198425 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.7710205316543579, "learning_rate": 2.601892232866613e-05, "loss": 0.3008, "step": 11842, "teacher_loss": 0.24857556819915771 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.2691395878791809, "learning_rate": 2.6017381145680984e-05, "loss": 0.1745, "step": 11843, "teacher_loss": 0.16393698751926422 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.31048572063446045, "learning_rate": 2.601583971010032e-05, "loss": 0.3619, "step": 11844, "teacher_loss": 0.3676021695137024 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.8053724765777588, "learning_rate": 2.6014298021959482e-05, "loss": 0.3691, "step": 11845, "teacher_loss": 0.320635586977005 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.2519734501838684, "learning_rate": 2.6012756081293807e-05, "loss": 0.1334, "step": 11846, "teacher_loss": 0.12018124759197235 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.4166175127029419, "learning_rate": 2.6011213888138658e-05, "loss": 0.3661, "step": 11847, "teacher_loss": 0.3604452610015869 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.6790676116943359, "learning_rate": 2.6009671442529385e-05, "loss": 0.271, "step": 11848, "teacher_loss": 0.22569167613983154 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.5332669019699097, "learning_rate": 2.6008128744501353e-05, "loss": 0.496, "step": 11849, "teacher_loss": 0.4918893575668335 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.17910028994083405, "learning_rate": 2.6006585794089937e-05, "loss": 0.1583, "step": 11850, "teacher_loss": 0.1560365855693817 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.35121622681617737, "learning_rate": 2.6005042591330506e-05, "loss": 0.2388, "step": 11851, "teacher_loss": 0.22635364532470703 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.24981184303760529, "learning_rate": 2.6003499136258446e-05, "loss": 0.2528, "step": 11852, "teacher_loss": 0.25310081243515015 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.4699033200740814, "learning_rate": 2.6001955428909136e-05, "loss": 0.287, "step": 11853, "teacher_loss": 0.2666309177875519 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.5475835800170898, "learning_rate": 2.600041146931798e-05, "loss": 0.2037, "step": 11854, "teacher_loss": 0.16545702517032623 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.6502259373664856, "learning_rate": 2.5998867257520363e-05, "loss": 0.3642, "step": 11855, "teacher_loss": 0.33246415853500366 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.14808988571166992, "learning_rate": 2.59973227935517e-05, "loss": 0.2296, "step": 11856, "teacher_loss": 0.2386157363653183 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.4042413830757141, "learning_rate": 2.5995778077447393e-05, "loss": 0.4354, "step": 11857, "teacher_loss": 0.4388611316680908 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.18771639466285706, "learning_rate": 2.599423310924287e-05, "loss": 0.2521, "step": 11858, "teacher_loss": 0.2592393159866333 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.2643285393714905, "learning_rate": 2.599268788897354e-05, "loss": 0.2295, "step": 11859, "teacher_loss": 0.2256062626838684 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.5425845980644226, "learning_rate": 2.599114241667483e-05, "loss": 0.4539, "step": 11860, "teacher_loss": 0.44408178329467773 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.40210098028182983, "learning_rate": 2.5989596692382182e-05, "loss": 0.2596, "step": 11861, "teacher_loss": 0.24373117089271545 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.6213204860687256, "learning_rate": 2.598805071613103e-05, "loss": 0.3646, "step": 11862, "teacher_loss": 0.3361297845840454 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 1.0089659690856934, "learning_rate": 2.598650448795682e-05, "loss": 0.4561, "step": 11863, "teacher_loss": 0.3946387767791748 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.3167431950569153, "learning_rate": 2.5984958007894995e-05, "loss": 0.2141, "step": 11864, "teacher_loss": 0.20265866816043854 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.31868553161621094, "learning_rate": 2.598341127598103e-05, "loss": 0.3728, "step": 11865, "teacher_loss": 0.37881243228912354 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.17840471863746643, "learning_rate": 2.5981864292250362e-05, "loss": 0.2437, "step": 11866, "teacher_loss": 0.2510058879852295 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.5543650388717651, "learning_rate": 2.5980317056738477e-05, "loss": 0.3757, "step": 11867, "teacher_loss": 0.35587215423583984 }, { "compression_loss": 0.0, "epoch": 2.14, "label_loss": 0.4384593963623047, "learning_rate": 2.597876956948084e-05, "loss": 0.2459, "step": 11868, "teacher_loss": 0.2244565188884735 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.573606014251709, "learning_rate": 2.5977221830512935e-05, "loss": 0.237, "step": 11869, "teacher_loss": 0.19965162873268127 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5061002969741821, "learning_rate": 2.5975673839870246e-05, "loss": 0.3205, "step": 11870, "teacher_loss": 0.29989537596702576 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.4440218210220337, "learning_rate": 2.597412559758826e-05, "loss": 0.3027, "step": 11871, "teacher_loss": 0.2870480418205261 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.15619751811027527, "learning_rate": 2.5972577103702477e-05, "loss": 0.1645, "step": 11872, "teacher_loss": 0.16544204950332642 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.4934988021850586, "learning_rate": 2.5971028358248396e-05, "loss": 0.2573, "step": 11873, "teacher_loss": 0.2310059368610382 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.9463531374931335, "learning_rate": 2.5969479361261533e-05, "loss": 0.6353, "step": 11874, "teacher_loss": 0.6007082462310791 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.8638978600502014, "learning_rate": 2.5967930112777393e-05, "loss": 0.2894, "step": 11875, "teacher_loss": 0.22558185458183289 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.3971202075481415, "learning_rate": 2.5966380612831496e-05, "loss": 0.2495, "step": 11876, "teacher_loss": 0.23309175670146942 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.6802642345428467, "learning_rate": 2.596483086145938e-05, "loss": 0.4248, "step": 11877, "teacher_loss": 0.3964252471923828 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.7412111163139343, "learning_rate": 2.5963280858696558e-05, "loss": 0.2801, "step": 11878, "teacher_loss": 0.22889423370361328 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.7711864113807678, "learning_rate": 2.5961730604578583e-05, "loss": 0.3508, "step": 11879, "teacher_loss": 0.30411767959594727 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.28763967752456665, "learning_rate": 2.596018009914098e-05, "loss": 0.3535, "step": 11880, "teacher_loss": 0.36086538434028625 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.4015776515007019, "learning_rate": 2.5958629342419315e-05, "loss": 0.233, "step": 11881, "teacher_loss": 0.21422609686851501 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 1.0421007871627808, "learning_rate": 2.5957078334449132e-05, "loss": 0.3312, "step": 11882, "teacher_loss": 0.25220659375190735 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.259662389755249, "learning_rate": 2.5955527075265995e-05, "loss": 0.2113, "step": 11883, "teacher_loss": 0.2059817612171173 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5402747392654419, "learning_rate": 2.595397556490547e-05, "loss": 0.2503, "step": 11884, "teacher_loss": 0.2180628627538681 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.26193585991859436, "learning_rate": 2.5952423803403126e-05, "loss": 0.222, "step": 11885, "teacher_loss": 0.21754762530326843 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.23677082359790802, "learning_rate": 2.5950871790794537e-05, "loss": 0.1911, "step": 11886, "teacher_loss": 0.18603497743606567 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.7284072637557983, "learning_rate": 2.5949319527115292e-05, "loss": 0.3807, "step": 11887, "teacher_loss": 0.34205758571624756 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.7798953652381897, "learning_rate": 2.5947767012400985e-05, "loss": 0.3782, "step": 11888, "teacher_loss": 0.3336077928543091 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5711961984634399, "learning_rate": 2.59462142466872e-05, "loss": 0.3253, "step": 11889, "teacher_loss": 0.2979753017425537 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.7904107570648193, "learning_rate": 2.5944661230009533e-05, "loss": 0.3771, "step": 11890, "teacher_loss": 0.33119094371795654 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5503579378128052, "learning_rate": 2.5943107962403603e-05, "loss": 0.28, "step": 11891, "teacher_loss": 0.24999725818634033 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5006875991821289, "learning_rate": 2.594155444390502e-05, "loss": 0.2232, "step": 11892, "teacher_loss": 0.19231688976287842 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.4584958553314209, "learning_rate": 2.5940000674549398e-05, "loss": 0.2258, "step": 11893, "teacher_loss": 0.1999293863773346 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.4369381070137024, "learning_rate": 2.5938446654372357e-05, "loss": 0.2421, "step": 11894, "teacher_loss": 0.22049680352210999 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.3314973711967468, "learning_rate": 2.593689238340953e-05, "loss": 0.285, "step": 11895, "teacher_loss": 0.27979743480682373 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.125652015209198, "learning_rate": 2.5935337861696548e-05, "loss": 0.2056, "step": 11896, "teacher_loss": 0.21443699300289154 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.29903966188430786, "learning_rate": 2.5933783089269062e-05, "loss": 0.1968, "step": 11897, "teacher_loss": 0.1854916512966156 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5764154195785522, "learning_rate": 2.5932228066162704e-05, "loss": 0.2746, "step": 11898, "teacher_loss": 0.24106967449188232 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5139930248260498, "learning_rate": 2.5930672792413138e-05, "loss": 0.2555, "step": 11899, "teacher_loss": 0.22674795985221863 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.20115339756011963, "learning_rate": 2.5929117268056016e-05, "loss": 0.2646, "step": 11900, "teacher_loss": 0.27169069647789 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5948539972305298, "learning_rate": 2.5927561493127e-05, "loss": 0.2505, "step": 11901, "teacher_loss": 0.21224600076675415 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.3971590995788574, "learning_rate": 2.5926005467661763e-05, "loss": 0.2606, "step": 11902, "teacher_loss": 0.24547350406646729 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5255863070487976, "learning_rate": 2.5924449191695976e-05, "loss": 0.351, "step": 11903, "teacher_loss": 0.33161526918411255 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.7192397117614746, "learning_rate": 2.5922892665265325e-05, "loss": 0.3365, "step": 11904, "teacher_loss": 0.29401519894599915 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5191932916641235, "learning_rate": 2.5921335888405493e-05, "loss": 0.2447, "step": 11905, "teacher_loss": 0.21425354480743408 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.1840202510356903, "learning_rate": 2.5919778861152172e-05, "loss": 0.2149, "step": 11906, "teacher_loss": 0.21835049986839294 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.753964900970459, "learning_rate": 2.5918221583541065e-05, "loss": 0.5318, "step": 11907, "teacher_loss": 0.5071197748184204 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.2728998363018036, "learning_rate": 2.5916664055607866e-05, "loss": 0.2123, "step": 11908, "teacher_loss": 0.2055220901966095 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.38073262572288513, "learning_rate": 2.5915106277388293e-05, "loss": 0.321, "step": 11909, "teacher_loss": 0.3143864870071411 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5783706307411194, "learning_rate": 2.591354824891806e-05, "loss": 0.2702, "step": 11910, "teacher_loss": 0.23591876029968262 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.3509276807308197, "learning_rate": 2.591198997023288e-05, "loss": 0.1896, "step": 11911, "teacher_loss": 0.17167195677757263 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.17924711108207703, "learning_rate": 2.5910431441368493e-05, "loss": 0.1613, "step": 11912, "teacher_loss": 0.15934190154075623 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.2755981683731079, "learning_rate": 2.5908872662360617e-05, "loss": 0.248, "step": 11913, "teacher_loss": 0.24497190117835999 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.7753876447677612, "learning_rate": 2.5907313633245007e-05, "loss": 0.2482, "step": 11914, "teacher_loss": 0.18959349393844604 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.6539809703826904, "learning_rate": 2.590575435405739e-05, "loss": 0.3603, "step": 11915, "teacher_loss": 0.3276401162147522 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.43185028433799744, "learning_rate": 2.5904194824833524e-05, "loss": 0.3145, "step": 11916, "teacher_loss": 0.3014155328273773 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.3070920407772064, "learning_rate": 2.5902635045609164e-05, "loss": 0.2265, "step": 11917, "teacher_loss": 0.21750569343566895 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.5546842813491821, "learning_rate": 2.590107501642007e-05, "loss": 0.2936, "step": 11918, "teacher_loss": 0.2646271586418152 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.22234024107456207, "learning_rate": 2.5899514737302007e-05, "loss": 0.241, "step": 11919, "teacher_loss": 0.24312138557434082 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.538908839225769, "learning_rate": 2.589795420829075e-05, "loss": 0.3404, "step": 11920, "teacher_loss": 0.31835097074508667 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.29677772521972656, "learning_rate": 2.589639342942208e-05, "loss": 0.2134, "step": 11921, "teacher_loss": 0.20415177941322327 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.6847197413444519, "learning_rate": 2.5894832400731778e-05, "loss": 0.2459, "step": 11922, "teacher_loss": 0.1971079707145691 }, { "compression_loss": 0.0, "epoch": 2.15, "label_loss": 0.3087640404701233, "learning_rate": 2.5893271122255625e-05, "loss": 0.2382, "step": 11923, "teacher_loss": 0.23032930493354797 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5024212598800659, "learning_rate": 2.5891709594029437e-05, "loss": 0.3413, "step": 11924, "teacher_loss": 0.32343506813049316 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.35786259174346924, "learning_rate": 2.5890147816088994e-05, "loss": 0.1907, "step": 11925, "teacher_loss": 0.17213281989097595 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5734649896621704, "learning_rate": 2.5888585788470116e-05, "loss": 0.3247, "step": 11926, "teacher_loss": 0.297050803899765 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.4436054229736328, "learning_rate": 2.5887023511208606e-05, "loss": 0.4209, "step": 11927, "teacher_loss": 0.41833817958831787 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.47309499979019165, "learning_rate": 2.5885460984340293e-05, "loss": 0.4554, "step": 11928, "teacher_loss": 0.45345252752304077 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.19974543154239655, "learning_rate": 2.5883898207900997e-05, "loss": 0.1923, "step": 11929, "teacher_loss": 0.19141972064971924 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.46143269538879395, "learning_rate": 2.5882335181926546e-05, "loss": 0.2373, "step": 11930, "teacher_loss": 0.2124168425798416 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.250038206577301, "learning_rate": 2.5880771906452775e-05, "loss": 0.1986, "step": 11931, "teacher_loss": 0.192842036485672 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.15559667348861694, "learning_rate": 2.5879208381515525e-05, "loss": 0.1781, "step": 11932, "teacher_loss": 0.1806262731552124 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.6108611822128296, "learning_rate": 2.587764460715065e-05, "loss": 0.3194, "step": 11933, "teacher_loss": 0.2870543301105499 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5526113510131836, "learning_rate": 2.5876080583393993e-05, "loss": 0.2412, "step": 11934, "teacher_loss": 0.20656481385231018 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5092881321907043, "learning_rate": 2.5874516310281414e-05, "loss": 0.2897, "step": 11935, "teacher_loss": 0.2652827501296997 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5741344094276428, "learning_rate": 2.5872951787848784e-05, "loss": 0.4561, "step": 11936, "teacher_loss": 0.4430006742477417 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.23296701908111572, "learning_rate": 2.5871387016131965e-05, "loss": 0.1774, "step": 11937, "teacher_loss": 0.17124879360198975 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.3152272701263428, "learning_rate": 2.5869821995166834e-05, "loss": 0.2424, "step": 11938, "teacher_loss": 0.23431754112243652 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.268160879611969, "learning_rate": 2.586825672498928e-05, "loss": 0.318, "step": 11939, "teacher_loss": 0.32356932759284973 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.42559123039245605, "learning_rate": 2.586669120563518e-05, "loss": 0.2329, "step": 11940, "teacher_loss": 0.21149706840515137 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.506427526473999, "learning_rate": 2.5865125437140432e-05, "loss": 0.3253, "step": 11941, "teacher_loss": 0.3051733374595642 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5129289031028748, "learning_rate": 2.5863559419540933e-05, "loss": 0.2933, "step": 11942, "teacher_loss": 0.26892921328544617 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.1196107342839241, "learning_rate": 2.586199315287259e-05, "loss": 0.2705, "step": 11943, "teacher_loss": 0.2872978448867798 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.6892185211181641, "learning_rate": 2.5860426637171307e-05, "loss": 0.3259, "step": 11944, "teacher_loss": 0.28552478551864624 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.20203818380832672, "learning_rate": 2.5858859872473e-05, "loss": 0.1576, "step": 11945, "teacher_loss": 0.15264225006103516 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.23604559898376465, "learning_rate": 2.58572928588136e-05, "loss": 0.1924, "step": 11946, "teacher_loss": 0.18756867945194244 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.38689279556274414, "learning_rate": 2.5855725596229018e-05, "loss": 0.2158, "step": 11947, "teacher_loss": 0.19682970643043518 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.7924243211746216, "learning_rate": 2.5854158084755197e-05, "loss": 0.2519, "step": 11948, "teacher_loss": 0.19183319807052612 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5708785057067871, "learning_rate": 2.585259032442808e-05, "loss": 0.3662, "step": 11949, "teacher_loss": 0.34349310398101807 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.7700195908546448, "learning_rate": 2.58510223152836e-05, "loss": 0.3334, "step": 11950, "teacher_loss": 0.2849034368991852 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.17075885832309723, "learning_rate": 2.584945405735771e-05, "loss": 0.2494, "step": 11951, "teacher_loss": 0.2581344544887543 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5903782844543457, "learning_rate": 2.5847885550686364e-05, "loss": 0.3831, "step": 11952, "teacher_loss": 0.360016793012619 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.8014289140701294, "learning_rate": 2.5846316795305533e-05, "loss": 0.3588, "step": 11953, "teacher_loss": 0.30960702896118164 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.2461264729499817, "learning_rate": 2.5844747791251174e-05, "loss": 0.3334, "step": 11954, "teacher_loss": 0.34310024976730347 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.34152668714523315, "learning_rate": 2.5843178538559262e-05, "loss": 0.2386, "step": 11955, "teacher_loss": 0.22713598608970642 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.6526594161987305, "learning_rate": 2.5841609037265778e-05, "loss": 0.3606, "step": 11956, "teacher_loss": 0.3281291723251343 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.1829129457473755, "learning_rate": 2.5840039287406702e-05, "loss": 0.3092, "step": 11957, "teacher_loss": 0.3231821060180664 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.7171481847763062, "learning_rate": 2.5838469289018025e-05, "loss": 0.471, "step": 11958, "teacher_loss": 0.4436776041984558 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5656023621559143, "learning_rate": 2.5836899042135746e-05, "loss": 0.2522, "step": 11959, "teacher_loss": 0.21734771132469177 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.808519721031189, "learning_rate": 2.5835328546795854e-05, "loss": 0.3398, "step": 11960, "teacher_loss": 0.2877105474472046 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.31676119565963745, "learning_rate": 2.583375780303437e-05, "loss": 0.1583, "step": 11961, "teacher_loss": 0.1407456398010254 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.24294674396514893, "learning_rate": 2.5832186810887302e-05, "loss": 0.1866, "step": 11962, "teacher_loss": 0.18029829859733582 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.4894581437110901, "learning_rate": 2.5830615570390666e-05, "loss": 0.3261, "step": 11963, "teacher_loss": 0.30794578790664673 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5461469888687134, "learning_rate": 2.5829044081580487e-05, "loss": 0.2955, "step": 11964, "teacher_loss": 0.2676633298397064 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5455873608589172, "learning_rate": 2.5827472344492792e-05, "loss": 0.2572, "step": 11965, "teacher_loss": 0.22513779997825623 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.39715510606765747, "learning_rate": 2.5825900359163623e-05, "loss": 0.2804, "step": 11966, "teacher_loss": 0.2674432396888733 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.3988390862941742, "learning_rate": 2.5824328125629016e-05, "loss": 0.2931, "step": 11967, "teacher_loss": 0.28136301040649414 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.42058253288269043, "learning_rate": 2.5822755643925014e-05, "loss": 0.3378, "step": 11968, "teacher_loss": 0.32863694429397583 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.40050947666168213, "learning_rate": 2.582118291408767e-05, "loss": 0.2073, "step": 11969, "teacher_loss": 0.1858624517917633 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.3080638647079468, "learning_rate": 2.5819609936153052e-05, "loss": 0.1945, "step": 11970, "teacher_loss": 0.18190079927444458 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.30709108710289, "learning_rate": 2.5818036710157218e-05, "loss": 0.2327, "step": 11971, "teacher_loss": 0.22445183992385864 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.5737199783325195, "learning_rate": 2.5816463236136236e-05, "loss": 0.3478, "step": 11972, "teacher_loss": 0.3226962387561798 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.18862685561180115, "learning_rate": 2.581488951412618e-05, "loss": 0.2282, "step": 11973, "teacher_loss": 0.2325660139322281 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.415749728679657, "learning_rate": 2.581331554416313e-05, "loss": 0.3714, "step": 11974, "teacher_loss": 0.3664560914039612 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.4448944926261902, "learning_rate": 2.5811741326283177e-05, "loss": 0.2499, "step": 11975, "teacher_loss": 0.2282048463821411 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.34253832697868347, "learning_rate": 2.5810166860522407e-05, "loss": 0.246, "step": 11976, "teacher_loss": 0.23528921604156494 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.6029420495033264, "learning_rate": 2.580859214691693e-05, "loss": 0.2501, "step": 11977, "teacher_loss": 0.21086959540843964 }, { "compression_loss": 0.0, "epoch": 2.16, "label_loss": 0.3937985301017761, "learning_rate": 2.5807017185502833e-05, "loss": 0.2822, "step": 11978, "teacher_loss": 0.2698172330856323 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.8012281656265259, "learning_rate": 2.5805441976316234e-05, "loss": 0.3212, "step": 11979, "teacher_loss": 0.2678791880607605 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.30180203914642334, "learning_rate": 2.580386651939325e-05, "loss": 0.188, "step": 11980, "teacher_loss": 0.17537912726402283 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.3701208233833313, "learning_rate": 2.5802290814769996e-05, "loss": 0.239, "step": 11981, "teacher_loss": 0.22443421185016632 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.2737005949020386, "learning_rate": 2.58007148624826e-05, "loss": 0.3264, "step": 11982, "teacher_loss": 0.3322793245315552 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.9327365756034851, "learning_rate": 2.57991386625672e-05, "loss": 0.3687, "step": 11983, "teacher_loss": 0.30606523156166077 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.27122196555137634, "learning_rate": 2.579756221505992e-05, "loss": 0.2085, "step": 11984, "teacher_loss": 0.20148402452468872 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.27522778511047363, "learning_rate": 2.5795985519996915e-05, "loss": 0.2656, "step": 11985, "teacher_loss": 0.26449668407440186 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.3158515393733978, "learning_rate": 2.5794408577414334e-05, "loss": 0.2554, "step": 11986, "teacher_loss": 0.2487136721611023 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.3624435067176819, "learning_rate": 2.5792831387348322e-05, "loss": 0.3121, "step": 11987, "teacher_loss": 0.30649369955062866 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.16503341495990753, "learning_rate": 2.5791253949835045e-05, "loss": 0.1692, "step": 11988, "teacher_loss": 0.16961508989334106 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.58681321144104, "learning_rate": 2.5789676264910668e-05, "loss": 0.2586, "step": 11989, "teacher_loss": 0.22211971879005432 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.7148056030273438, "learning_rate": 2.578809833261137e-05, "loss": 0.3244, "step": 11990, "teacher_loss": 0.28103333711624146 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.3362712860107422, "learning_rate": 2.5786520152973316e-05, "loss": 0.3379, "step": 11991, "teacher_loss": 0.338096559047699 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.41751572489738464, "learning_rate": 2.5784941726032695e-05, "loss": 0.2056, "step": 11992, "teacher_loss": 0.1820676475763321 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.7120903730392456, "learning_rate": 2.5783363051825694e-05, "loss": 0.6266, "step": 11993, "teacher_loss": 0.617125391960144 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.2720344662666321, "learning_rate": 2.5781784130388512e-05, "loss": 0.2203, "step": 11994, "teacher_loss": 0.214506596326828 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.2760605812072754, "learning_rate": 2.5780204961757345e-05, "loss": 0.2038, "step": 11995, "teacher_loss": 0.1957758665084839 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.4183105528354645, "learning_rate": 2.5778625545968397e-05, "loss": 0.2128, "step": 11996, "teacher_loss": 0.18993742763996124 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.48098820447921753, "learning_rate": 2.5777045883057882e-05, "loss": 0.268, "step": 11997, "teacher_loss": 0.24437211453914642 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.6892327070236206, "learning_rate": 2.5775465973062016e-05, "loss": 0.2954, "step": 11998, "teacher_loss": 0.2516239881515503 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.509902834892273, "learning_rate": 2.577388581601702e-05, "loss": 0.2216, "step": 11999, "teacher_loss": 0.18952716886997223 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.5059928894042969, "learning_rate": 2.577230541195913e-05, "loss": 0.2591, "step": 12000, "teacher_loss": 0.2316424399614334 }, { "epoch": 2.17, "eval_exact_match": 79.47019867549669, "eval_f1": 87.12672142557732, "step": 12000 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.34276318550109863, "learning_rate": 2.577072476092457e-05, "loss": 0.342, "step": 12001, "teacher_loss": 0.34192001819610596 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.6022912263870239, "learning_rate": 2.576914386294958e-05, "loss": 0.3775, "step": 12002, "teacher_loss": 0.3525382876396179 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.10767114162445068, "learning_rate": 2.5767562718070416e-05, "loss": 0.1679, "step": 12003, "teacher_loss": 0.17463470995426178 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.3117823600769043, "learning_rate": 2.5765981326323317e-05, "loss": 0.3082, "step": 12004, "teacher_loss": 0.30777716636657715 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.22859103977680206, "learning_rate": 2.5764399687744543e-05, "loss": 0.2343, "step": 12005, "teacher_loss": 0.23494787514209747 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.45248425006866455, "learning_rate": 2.5762817802370357e-05, "loss": 0.2488, "step": 12006, "teacher_loss": 0.2261640429496765 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.4681066870689392, "learning_rate": 2.5761235670237034e-05, "loss": 0.2143, "step": 12007, "teacher_loss": 0.18613725900650024 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.624732494354248, "learning_rate": 2.5759653291380833e-05, "loss": 0.3549, "step": 12008, "teacher_loss": 0.32493099570274353 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.3321205675601959, "learning_rate": 2.575807066583805e-05, "loss": 0.2587, "step": 12009, "teacher_loss": 0.2505359947681427 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.36879605054855347, "learning_rate": 2.5756487793644953e-05, "loss": 0.2633, "step": 12010, "teacher_loss": 0.2516269087791443 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.43571197986602783, "learning_rate": 2.5754904674837845e-05, "loss": 0.2555, "step": 12011, "teacher_loss": 0.2354586273431778 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.18305779993534088, "learning_rate": 2.5753321309453015e-05, "loss": 0.2356, "step": 12012, "teacher_loss": 0.2414005994796753 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.4291021525859833, "learning_rate": 2.575173769752677e-05, "loss": 0.2353, "step": 12013, "teacher_loss": 0.21376332640647888 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.23948828876018524, "learning_rate": 2.5750153839095413e-05, "loss": 0.1863, "step": 12014, "teacher_loss": 0.1803765594959259 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.2899554371833801, "learning_rate": 2.574856973419526e-05, "loss": 0.272, "step": 12015, "teacher_loss": 0.26999813318252563 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.4110201597213745, "learning_rate": 2.5746985382862628e-05, "loss": 0.3345, "step": 12016, "teacher_loss": 0.3260200619697571 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.341378390789032, "learning_rate": 2.5745400785133842e-05, "loss": 0.2767, "step": 12017, "teacher_loss": 0.26946961879730225 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.7312158346176147, "learning_rate": 2.5743815941045236e-05, "loss": 0.7181, "step": 12018, "teacher_loss": 0.7166070938110352 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.26973846554756165, "learning_rate": 2.574223085063314e-05, "loss": 0.2004, "step": 12019, "teacher_loss": 0.19272738695144653 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.4913783073425293, "learning_rate": 2.5740645513933895e-05, "loss": 0.2782, "step": 12020, "teacher_loss": 0.25453346967697144 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.5462285876274109, "learning_rate": 2.5739059930983853e-05, "loss": 0.2697, "step": 12021, "teacher_loss": 0.23893225193023682 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.41651853919029236, "learning_rate": 2.573747410181937e-05, "loss": 0.3523, "step": 12022, "teacher_loss": 0.3452187180519104 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.9229111671447754, "learning_rate": 2.5735888026476785e-05, "loss": 0.3941, "step": 12023, "teacher_loss": 0.33535003662109375 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.5272068977355957, "learning_rate": 2.5734301704992486e-05, "loss": 0.3146, "step": 12024, "teacher_loss": 0.2909737229347229 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.4346199631690979, "learning_rate": 2.5732715137402828e-05, "loss": 0.274, "step": 12025, "teacher_loss": 0.256185919046402 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.3124018609523773, "learning_rate": 2.573112832374419e-05, "loss": 0.239, "step": 12026, "teacher_loss": 0.23080243170261383 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.6257033348083496, "learning_rate": 2.5729541264052957e-05, "loss": 0.3644, "step": 12027, "teacher_loss": 0.3354036808013916 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.9220696687698364, "learning_rate": 2.5727953958365507e-05, "loss": 0.3085, "step": 12028, "teacher_loss": 0.24028854072093964 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.5452176332473755, "learning_rate": 2.5726366406718237e-05, "loss": 0.3183, "step": 12029, "teacher_loss": 0.2931378483772278 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.5197912454605103, "learning_rate": 2.5724778609147547e-05, "loss": 0.3316, "step": 12030, "teacher_loss": 0.3106488883495331 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.5562528371810913, "learning_rate": 2.5723190565689833e-05, "loss": 0.3332, "step": 12031, "teacher_loss": 0.30836087465286255 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.3790109157562256, "learning_rate": 2.5721602276381518e-05, "loss": 0.3352, "step": 12032, "teacher_loss": 0.3302847146987915 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.36089059710502625, "learning_rate": 2.5720013741259e-05, "loss": 0.3289, "step": 12033, "teacher_loss": 0.3252910077571869 }, { "compression_loss": 0.0, "epoch": 2.17, "label_loss": 0.7168359756469727, "learning_rate": 2.571842496035871e-05, "loss": 0.362, "step": 12034, "teacher_loss": 0.3225597143173218 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.3117433190345764, "learning_rate": 2.5716835933717072e-05, "loss": 0.1797, "step": 12035, "teacher_loss": 0.16502366960048676 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.6041591763496399, "learning_rate": 2.5715246661370515e-05, "loss": 0.4696, "step": 12036, "teacher_loss": 0.4546201825141907 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.3809165954589844, "learning_rate": 2.571365714335548e-05, "loss": 0.3571, "step": 12037, "teacher_loss": 0.3544117212295532 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.2490965873003006, "learning_rate": 2.5712067379708404e-05, "loss": 0.1948, "step": 12038, "teacher_loss": 0.18877166509628296 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.7868797183036804, "learning_rate": 2.5710477370465747e-05, "loss": 0.2954, "step": 12039, "teacher_loss": 0.24081027507781982 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.9049237370491028, "learning_rate": 2.570888711566395e-05, "loss": 0.2798, "step": 12040, "teacher_loss": 0.21031169593334198 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.4199647307395935, "learning_rate": 2.570729661533948e-05, "loss": 0.2221, "step": 12041, "teacher_loss": 0.20015479624271393 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.3062474727630615, "learning_rate": 2.57057058695288e-05, "loss": 0.1614, "step": 12042, "teacher_loss": 0.14530816674232483 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.4632222652435303, "learning_rate": 2.570411487826838e-05, "loss": 0.3317, "step": 12043, "teacher_loss": 0.3170440196990967 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.6057695746421814, "learning_rate": 2.57025236415947e-05, "loss": 0.3649, "step": 12044, "teacher_loss": 0.3381814956665039 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.7713839411735535, "learning_rate": 2.570093215954424e-05, "loss": 0.4217, "step": 12045, "teacher_loss": 0.38281798362731934 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.48428618907928467, "learning_rate": 2.5699340432153495e-05, "loss": 0.2533, "step": 12046, "teacher_loss": 0.2276536226272583 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.4086078703403473, "learning_rate": 2.5697748459458945e-05, "loss": 0.1828, "step": 12047, "teacher_loss": 0.15768729150295258 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.20795422792434692, "learning_rate": 2.5696156241497095e-05, "loss": 0.2027, "step": 12048, "teacher_loss": 0.20213884115219116 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.8001163005828857, "learning_rate": 2.5694563778304455e-05, "loss": 0.3824, "step": 12049, "teacher_loss": 0.33596912026405334 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.8652982115745544, "learning_rate": 2.5692971069917532e-05, "loss": 0.5746, "step": 12050, "teacher_loss": 0.5423203110694885 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.48111557960510254, "learning_rate": 2.5691378116372843e-05, "loss": 0.2543, "step": 12051, "teacher_loss": 0.2291422337293625 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.496149480342865, "learning_rate": 2.5689784917706905e-05, "loss": 0.2703, "step": 12052, "teacher_loss": 0.24520540237426758 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.7141072750091553, "learning_rate": 2.5688191473956247e-05, "loss": 0.2283, "step": 12053, "teacher_loss": 0.17433977127075195 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.544466495513916, "learning_rate": 2.5686597785157405e-05, "loss": 0.3349, "step": 12054, "teacher_loss": 0.311573326587677 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.5184895396232605, "learning_rate": 2.5685003851346917e-05, "loss": 0.3071, "step": 12055, "teacher_loss": 0.2836421728134155 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.20427894592285156, "learning_rate": 2.5683409672561328e-05, "loss": 0.2829, "step": 12056, "teacher_loss": 0.29160743951797485 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 1.2334232330322266, "learning_rate": 2.568181524883718e-05, "loss": 0.347, "step": 12057, "teacher_loss": 0.2484818696975708 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.6628396511077881, "learning_rate": 2.568022058021104e-05, "loss": 0.4199, "step": 12058, "teacher_loss": 0.3928550183773041 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.5072532296180725, "learning_rate": 2.5678625666719457e-05, "loss": 0.4238, "step": 12059, "teacher_loss": 0.4145410656929016 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.21382319927215576, "learning_rate": 2.567703050839901e-05, "loss": 0.2293, "step": 12060, "teacher_loss": 0.23103821277618408 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.21607086062431335, "learning_rate": 2.567543510528626e-05, "loss": 0.1608, "step": 12061, "teacher_loss": 0.15470048785209656 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.5792874097824097, "learning_rate": 2.5673839457417793e-05, "loss": 0.3208, "step": 12062, "teacher_loss": 0.2920577824115753 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.4855174124240875, "learning_rate": 2.5672243564830188e-05, "loss": 0.2983, "step": 12063, "teacher_loss": 0.27746179699897766 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.43412184715270996, "learning_rate": 2.5670647427560036e-05, "loss": 0.2588, "step": 12064, "teacher_loss": 0.23928947746753693 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.33610790967941284, "learning_rate": 2.566905104564393e-05, "loss": 0.2536, "step": 12065, "teacher_loss": 0.2444775402545929 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.17196375131607056, "learning_rate": 2.5667454419118467e-05, "loss": 0.2121, "step": 12066, "teacher_loss": 0.21656128764152527 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.45180654525756836, "learning_rate": 2.566585754802026e-05, "loss": 0.2003, "step": 12067, "teacher_loss": 0.1723524034023285 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.5299785137176514, "learning_rate": 2.5664260432385916e-05, "loss": 0.3177, "step": 12068, "teacher_loss": 0.29414108395576477 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.3738633990287781, "learning_rate": 2.5662663072252056e-05, "loss": 0.1975, "step": 12069, "teacher_loss": 0.1778697371482849 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.3002810478210449, "learning_rate": 2.5661065467655298e-05, "loss": 0.1912, "step": 12070, "teacher_loss": 0.1791335493326187 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.14524048566818237, "learning_rate": 2.5659467618632275e-05, "loss": 0.1556, "step": 12071, "teacher_loss": 0.1567300260066986 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.31826430559158325, "learning_rate": 2.5657869525219617e-05, "loss": 0.2144, "step": 12072, "teacher_loss": 0.20284314453601837 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.3811996579170227, "learning_rate": 2.5656271187453962e-05, "loss": 0.2823, "step": 12073, "teacher_loss": 0.2713657021522522 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.8714183568954468, "learning_rate": 2.565467260537196e-05, "loss": 0.4472, "step": 12074, "teacher_loss": 0.4000872075557709 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.29336345195770264, "learning_rate": 2.5653073779010262e-05, "loss": 0.208, "step": 12075, "teacher_loss": 0.19855134189128876 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.33302801847457886, "learning_rate": 2.5651474708405518e-05, "loss": 0.1924, "step": 12076, "teacher_loss": 0.1767941415309906 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.13110433518886566, "learning_rate": 2.5649875393594396e-05, "loss": 0.2092, "step": 12077, "teacher_loss": 0.21785868704319 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.21930095553398132, "learning_rate": 2.5648275834613562e-05, "loss": 0.2903, "step": 12078, "teacher_loss": 0.2981743812561035 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.4509267210960388, "learning_rate": 2.564667603149969e-05, "loss": 0.2, "step": 12079, "teacher_loss": 0.1720849871635437 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.38413649797439575, "learning_rate": 2.5645075984289447e-05, "loss": 0.2386, "step": 12080, "teacher_loss": 0.2224786877632141 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.4200987219810486, "learning_rate": 2.5643475693019536e-05, "loss": 0.2892, "step": 12081, "teacher_loss": 0.2747001647949219 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.4053970277309418, "learning_rate": 2.5641875157726636e-05, "loss": 0.285, "step": 12082, "teacher_loss": 0.2716303765773773 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.386601984500885, "learning_rate": 2.5640274378447444e-05, "loss": 0.2419, "step": 12083, "teacher_loss": 0.22576940059661865 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.3250811696052551, "learning_rate": 2.5638673355218664e-05, "loss": 0.2471, "step": 12084, "teacher_loss": 0.23847800493240356 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.11539055407047272, "learning_rate": 2.5637072088076995e-05, "loss": 0.1532, "step": 12085, "teacher_loss": 0.15744438767433167 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.4190255105495453, "learning_rate": 2.5635470577059163e-05, "loss": 0.2857, "step": 12086, "teacher_loss": 0.27085593342781067 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.9595630764961243, "learning_rate": 2.5633868822201873e-05, "loss": 0.3785, "step": 12087, "teacher_loss": 0.3139900267124176 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.28952646255493164, "learning_rate": 2.563226682354185e-05, "loss": 0.2066, "step": 12088, "teacher_loss": 0.19733980298042297 }, { "compression_loss": 0.0, "epoch": 2.18, "label_loss": 0.6147408485412598, "learning_rate": 2.5630664581115827e-05, "loss": 0.3599, "step": 12089, "teacher_loss": 0.33156412839889526 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.794202983379364, "learning_rate": 2.5629062094960543e-05, "loss": 0.4562, "step": 12090, "teacher_loss": 0.4186851382255554 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 1.1200573444366455, "learning_rate": 2.562745936511273e-05, "loss": 0.4182, "step": 12091, "teacher_loss": 0.3402583599090576 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.5985281467437744, "learning_rate": 2.562585639160913e-05, "loss": 0.339, "step": 12092, "teacher_loss": 0.3101271390914917 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.2703246474266052, "learning_rate": 2.5624253174486513e-05, "loss": 0.1877, "step": 12093, "teacher_loss": 0.1785367876291275 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.16954196989536285, "learning_rate": 2.5622649713781615e-05, "loss": 0.1536, "step": 12094, "teacher_loss": 0.15188385546207428 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.3344150185585022, "learning_rate": 2.5621046009531208e-05, "loss": 0.1925, "step": 12095, "teacher_loss": 0.17671418190002441 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.3000280261039734, "learning_rate": 2.5619442061772065e-05, "loss": 0.251, "step": 12096, "teacher_loss": 0.24557159841060638 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.5467036962509155, "learning_rate": 2.5617837870540946e-05, "loss": 0.2217, "step": 12097, "teacher_loss": 0.185600146651268 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.4630429744720459, "learning_rate": 2.5616233435874648e-05, "loss": 0.2249, "step": 12098, "teacher_loss": 0.19844374060630798 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.35714492201805115, "learning_rate": 2.561462875780994e-05, "loss": 0.2282, "step": 12099, "teacher_loss": 0.21391957998275757 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.8228484392166138, "learning_rate": 2.561302383638362e-05, "loss": 0.4099, "step": 12100, "teacher_loss": 0.36405158042907715 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.3310806155204773, "learning_rate": 2.561141867163248e-05, "loss": 0.1932, "step": 12101, "teacher_loss": 0.17791973054409027 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.8997209072113037, "learning_rate": 2.5609813263593332e-05, "loss": 0.2585, "step": 12102, "teacher_loss": 0.1873003989458084 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.48904651403427124, "learning_rate": 2.5608207612302966e-05, "loss": 0.2845, "step": 12103, "teacher_loss": 0.2617371082305908 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.40799176692962646, "learning_rate": 2.5606601717798212e-05, "loss": 0.2837, "step": 12104, "teacher_loss": 0.2698557674884796 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.45061129331588745, "learning_rate": 2.560499558011588e-05, "loss": 0.2876, "step": 12105, "teacher_loss": 0.2694329023361206 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.7518491744995117, "learning_rate": 2.560338919929279e-05, "loss": 0.3946, "step": 12106, "teacher_loss": 0.3548893332481384 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.5422612428665161, "learning_rate": 2.560178257536578e-05, "loss": 0.3216, "step": 12107, "teacher_loss": 0.29713425040245056 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.5565462708473206, "learning_rate": 2.5600175708371686e-05, "loss": 0.2897, "step": 12108, "teacher_loss": 0.2600501775741577 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 1.0550949573516846, "learning_rate": 2.559856859834734e-05, "loss": 0.3515, "step": 12109, "teacher_loss": 0.27333563566207886 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.9548097848892212, "learning_rate": 2.559696124532959e-05, "loss": 0.4254, "step": 12110, "teacher_loss": 0.3665444552898407 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.5140724778175354, "learning_rate": 2.5595353649355292e-05, "loss": 0.2482, "step": 12111, "teacher_loss": 0.21863387525081635 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.772221565246582, "learning_rate": 2.5593745810461302e-05, "loss": 0.5363, "step": 12112, "teacher_loss": 0.5100698471069336 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.3502851724624634, "learning_rate": 2.5592137728684477e-05, "loss": 0.3064, "step": 12113, "teacher_loss": 0.3014754354953766 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.28992676734924316, "learning_rate": 2.55905294040617e-05, "loss": 0.2912, "step": 12114, "teacher_loss": 0.29134488105773926 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.15178117156028748, "learning_rate": 2.5588920836629827e-05, "loss": 0.182, "step": 12115, "teacher_loss": 0.1853661835193634 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.15197540819644928, "learning_rate": 2.5587312026425752e-05, "loss": 0.154, "step": 12116, "teacher_loss": 0.15419799089431763 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.34311234951019287, "learning_rate": 2.5585702973486354e-05, "loss": 0.3549, "step": 12117, "teacher_loss": 0.35615992546081543 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.3055446743965149, "learning_rate": 2.558409367784852e-05, "loss": 0.1864, "step": 12118, "teacher_loss": 0.17313650250434875 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.40892112255096436, "learning_rate": 2.5582484139549156e-05, "loss": 0.2881, "step": 12119, "teacher_loss": 0.27466607093811035 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.4413086175918579, "learning_rate": 2.5580874358625157e-05, "loss": 0.2409, "step": 12120, "teacher_loss": 0.21865665912628174 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.17831552028656006, "learning_rate": 2.5579264335113433e-05, "loss": 0.1819, "step": 12121, "teacher_loss": 0.18225814402103424 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.16371086239814758, "learning_rate": 2.5577654069050897e-05, "loss": 0.1663, "step": 12122, "teacher_loss": 0.1666153073310852 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.33577772974967957, "learning_rate": 2.5576043560474462e-05, "loss": 0.2234, "step": 12123, "teacher_loss": 0.21088311076164246 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.5169893503189087, "learning_rate": 2.5574432809421057e-05, "loss": 0.3836, "step": 12124, "teacher_loss": 0.3687896132469177 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.5519869327545166, "learning_rate": 2.5572821815927615e-05, "loss": 0.3357, "step": 12125, "teacher_loss": 0.31165921688079834 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.33798593282699585, "learning_rate": 2.5571210580031063e-05, "loss": 0.2455, "step": 12126, "teacher_loss": 0.2352066934108734 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.7340861558914185, "learning_rate": 2.556959910176835e-05, "loss": 0.2699, "step": 12127, "teacher_loss": 0.21831554174423218 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.39911675453186035, "learning_rate": 2.556798738117642e-05, "loss": 0.27, "step": 12128, "teacher_loss": 0.2556079924106598 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.46826624870300293, "learning_rate": 2.5566375418292223e-05, "loss": 0.2859, "step": 12129, "teacher_loss": 0.26564115285873413 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.4208621382713318, "learning_rate": 2.5564763213152716e-05, "loss": 0.295, "step": 12130, "teacher_loss": 0.2809777557849884 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.7827037572860718, "learning_rate": 2.5563150765794864e-05, "loss": 0.3718, "step": 12131, "teacher_loss": 0.32617440819740295 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.2826290726661682, "learning_rate": 2.5561538076255635e-05, "loss": 0.2284, "step": 12132, "teacher_loss": 0.22239045798778534 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.379263699054718, "learning_rate": 2.5559925144572008e-05, "loss": 0.3705, "step": 12133, "teacher_loss": 0.36947742104530334 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.4460013508796692, "learning_rate": 2.555831197078095e-05, "loss": 0.2506, "step": 12134, "teacher_loss": 0.22890590131282806 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.2931884229183197, "learning_rate": 2.5556698554919453e-05, "loss": 0.2403, "step": 12135, "teacher_loss": 0.2344590574502945 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.3257597088813782, "learning_rate": 2.5555084897024515e-05, "loss": 0.2206, "step": 12136, "teacher_loss": 0.20887988805770874 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.32765859365463257, "learning_rate": 2.5553470997133125e-05, "loss": 0.2559, "step": 12137, "teacher_loss": 0.2478812038898468 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.6798316240310669, "learning_rate": 2.5551856855282284e-05, "loss": 0.2863, "step": 12138, "teacher_loss": 0.2425793558359146 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.4339195787906647, "learning_rate": 2.5550242471509e-05, "loss": 0.3771, "step": 12139, "teacher_loss": 0.37080326676368713 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.20960810780525208, "learning_rate": 2.554862784585029e-05, "loss": 0.222, "step": 12140, "teacher_loss": 0.22340822219848633 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.19935759902000427, "learning_rate": 2.554701297834317e-05, "loss": 0.2393, "step": 12141, "teacher_loss": 0.24370066821575165 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.6644387245178223, "learning_rate": 2.5545397869024666e-05, "loss": 0.3751, "step": 12142, "teacher_loss": 0.3429903984069824 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.44039052724838257, "learning_rate": 2.5543782517931802e-05, "loss": 0.4038, "step": 12143, "teacher_loss": 0.3997552692890167 }, { "compression_loss": 0.0, "epoch": 2.19, "label_loss": 0.39276933670043945, "learning_rate": 2.554216692510162e-05, "loss": 0.2379, "step": 12144, "teacher_loss": 0.22066599130630493 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.34065189957618713, "learning_rate": 2.5540551090571153e-05, "loss": 0.2422, "step": 12145, "teacher_loss": 0.2312653511762619 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.5802285671234131, "learning_rate": 2.5538935014377452e-05, "loss": 0.2676, "step": 12146, "teacher_loss": 0.2328624427318573 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.5580289363861084, "learning_rate": 2.553731869655757e-05, "loss": 0.3375, "step": 12147, "teacher_loss": 0.31304556131362915 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.6319280862808228, "learning_rate": 2.5535702137148562e-05, "loss": 0.419, "step": 12148, "teacher_loss": 0.3952864110469818 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.2756175100803375, "learning_rate": 2.5534085336187494e-05, "loss": 0.1954, "step": 12149, "teacher_loss": 0.1864548623561859 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.6401157379150391, "learning_rate": 2.5532468293711435e-05, "loss": 0.2689, "step": 12150, "teacher_loss": 0.22761327028274536 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.4783114492893219, "learning_rate": 2.553085100975745e-05, "loss": 0.3794, "step": 12151, "teacher_loss": 0.36843979358673096 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.5988929867744446, "learning_rate": 2.5529233484362628e-05, "loss": 0.2333, "step": 12152, "teacher_loss": 0.1926494836807251 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.9094337224960327, "learning_rate": 2.5527615717564046e-05, "loss": 0.3233, "step": 12153, "teacher_loss": 0.25820791721343994 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.17630723118782043, "learning_rate": 2.5525997709398802e-05, "loss": 0.1679, "step": 12154, "teacher_loss": 0.1669875979423523 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.5125003457069397, "learning_rate": 2.552437945990399e-05, "loss": 0.2524, "step": 12155, "teacher_loss": 0.2234780490398407 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.35562562942504883, "learning_rate": 2.5522760969116707e-05, "loss": 0.3187, "step": 12156, "teacher_loss": 0.31456565856933594 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.4955211877822876, "learning_rate": 2.5521142237074066e-05, "loss": 0.3079, "step": 12157, "teacher_loss": 0.2870635986328125 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.6698476076126099, "learning_rate": 2.551952326381318e-05, "loss": 0.2409, "step": 12158, "teacher_loss": 0.19325101375579834 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.19754254817962646, "learning_rate": 2.5517904049371165e-05, "loss": 0.1932, "step": 12159, "teacher_loss": 0.19276052713394165 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.38089612126350403, "learning_rate": 2.551628459378514e-05, "loss": 0.2733, "step": 12160, "teacher_loss": 0.26136380434036255 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.6516788601875305, "learning_rate": 2.5514664897092242e-05, "loss": 0.3308, "step": 12161, "teacher_loss": 0.2951948940753937 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.3119394779205322, "learning_rate": 2.5513044959329602e-05, "loss": 0.1577, "step": 12162, "teacher_loss": 0.1405426561832428 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.2910478115081787, "learning_rate": 2.5511424780534363e-05, "loss": 0.2653, "step": 12163, "teacher_loss": 0.26248979568481445 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.8668642044067383, "learning_rate": 2.5509804360743663e-05, "loss": 0.4213, "step": 12164, "teacher_loss": 0.3718433380126953 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.9940638542175293, "learning_rate": 2.5508183699994663e-05, "loss": 0.4573, "step": 12165, "teacher_loss": 0.39761459827423096 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.41105973720550537, "learning_rate": 2.5506562798324514e-05, "loss": 0.2394, "step": 12166, "teacher_loss": 0.2203579545021057 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 1.1164674758911133, "learning_rate": 2.5504941655770383e-05, "loss": 0.2955, "step": 12167, "teacher_loss": 0.20424652099609375 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.2605484127998352, "learning_rate": 2.5503320272369433e-05, "loss": 0.2504, "step": 12168, "teacher_loss": 0.24932171404361725 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.2466181516647339, "learning_rate": 2.5501698648158842e-05, "loss": 0.2516, "step": 12169, "teacher_loss": 0.2521243095397949 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.20357359945774078, "learning_rate": 2.550007678317578e-05, "loss": 0.1979, "step": 12170, "teacher_loss": 0.19724902510643005 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.594035804271698, "learning_rate": 2.5498454677457447e-05, "loss": 0.3659, "step": 12171, "teacher_loss": 0.34059953689575195 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.39862537384033203, "learning_rate": 2.549683233104102e-05, "loss": 0.1825, "step": 12172, "teacher_loss": 0.15850132703781128 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.3801894187927246, "learning_rate": 2.54952097439637e-05, "loss": 0.3092, "step": 12173, "teacher_loss": 0.301271915435791 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.20888887345790863, "learning_rate": 2.549358691626269e-05, "loss": 0.1734, "step": 12174, "teacher_loss": 0.1695026457309723 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.6259768605232239, "learning_rate": 2.5491963847975185e-05, "loss": 0.3332, "step": 12175, "teacher_loss": 0.30065059661865234 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.40156078338623047, "learning_rate": 2.549034053913841e-05, "loss": 0.3423, "step": 12176, "teacher_loss": 0.3357672393321991 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.29409000277519226, "learning_rate": 2.548871698978958e-05, "loss": 0.2437, "step": 12177, "teacher_loss": 0.2380896657705307 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.502200186252594, "learning_rate": 2.548709319996591e-05, "loss": 0.2285, "step": 12178, "teacher_loss": 0.19810648262500763 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.2712060511112213, "learning_rate": 2.548546916970464e-05, "loss": 0.1867, "step": 12179, "teacher_loss": 0.1773313283920288 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.5376166105270386, "learning_rate": 2.5483844899043e-05, "loss": 0.4442, "step": 12180, "teacher_loss": 0.43385449051856995 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.37323909997940063, "learning_rate": 2.5482220388018227e-05, "loss": 0.2298, "step": 12181, "teacher_loss": 0.21388836205005646 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.26472529768943787, "learning_rate": 2.548059563666757e-05, "loss": 0.1785, "step": 12182, "teacher_loss": 0.16889676451683044 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.7561064958572388, "learning_rate": 2.5478970645028274e-05, "loss": 0.4525, "step": 12183, "teacher_loss": 0.4187861382961273 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.369077205657959, "learning_rate": 2.5477345413137597e-05, "loss": 0.3052, "step": 12184, "teacher_loss": 0.29804956912994385 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.4446794390678406, "learning_rate": 2.5475719941032807e-05, "loss": 0.2919, "step": 12185, "teacher_loss": 0.27488404512405396 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.38614439964294434, "learning_rate": 2.5474094228751165e-05, "loss": 0.2017, "step": 12186, "teacher_loss": 0.1811678111553192 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.35596877336502075, "learning_rate": 2.5472468276329947e-05, "loss": 0.2945, "step": 12187, "teacher_loss": 0.2877189517021179 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.636809766292572, "learning_rate": 2.5470842083806424e-05, "loss": 0.2759, "step": 12188, "teacher_loss": 0.2358129769563675 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.3739003837108612, "learning_rate": 2.546921565121789e-05, "loss": 0.2272, "step": 12189, "teacher_loss": 0.2108791172504425 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.3810468316078186, "learning_rate": 2.546758897860163e-05, "loss": 0.2265, "step": 12190, "teacher_loss": 0.20934978127479553 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.5666791796684265, "learning_rate": 2.5465962065994938e-05, "loss": 0.2648, "step": 12191, "teacher_loss": 0.23131319880485535 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 1.2998859882354736, "learning_rate": 2.5464334913435112e-05, "loss": 0.3545, "step": 12192, "teacher_loss": 0.24944192171096802 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.6995748281478882, "learning_rate": 2.546270752095946e-05, "loss": 0.2771, "step": 12193, "teacher_loss": 0.23019832372665405 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.27509185671806335, "learning_rate": 2.5461079888605296e-05, "loss": 0.1971, "step": 12194, "teacher_loss": 0.18840250372886658 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.8868666887283325, "learning_rate": 2.545945201640993e-05, "loss": 0.3226, "step": 12195, "teacher_loss": 0.2598605155944824 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.6806921362876892, "learning_rate": 2.5457823904410693e-05, "loss": 0.3553, "step": 12196, "teacher_loss": 0.3191791772842407 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.5159366130828857, "learning_rate": 2.5456195552644907e-05, "loss": 0.3266, "step": 12197, "teacher_loss": 0.3055843412876129 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.22315362095832825, "learning_rate": 2.5454566961149904e-05, "loss": 0.2582, "step": 12198, "teacher_loss": 0.2620655298233032 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.6639368534088135, "learning_rate": 2.545293812996303e-05, "loss": 0.2666, "step": 12199, "teacher_loss": 0.2224433273077011 }, { "compression_loss": 0.0, "epoch": 2.2, "label_loss": 0.3419730067253113, "learning_rate": 2.545130905912162e-05, "loss": 0.2959, "step": 12200, "teacher_loss": 0.2907707691192627 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.25674182176589966, "learning_rate": 2.5449679748663027e-05, "loss": 0.2321, "step": 12201, "teacher_loss": 0.2293972373008728 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.4763478636741638, "learning_rate": 2.5448050198624606e-05, "loss": 0.2808, "step": 12202, "teacher_loss": 0.25910478830337524 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.2921602725982666, "learning_rate": 2.544642040904372e-05, "loss": 0.2109, "step": 12203, "teacher_loss": 0.2018449902534485 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.30316439270973206, "learning_rate": 2.544479037995774e-05, "loss": 0.219, "step": 12204, "teacher_loss": 0.2096112072467804 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.18290674686431885, "learning_rate": 2.5443160111404024e-05, "loss": 0.1857, "step": 12205, "teacher_loss": 0.1859687864780426 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.578205943107605, "learning_rate": 2.5441529603419963e-05, "loss": 0.2439, "step": 12206, "teacher_loss": 0.20679515600204468 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.6500942707061768, "learning_rate": 2.543989885604293e-05, "loss": 0.3032, "step": 12207, "teacher_loss": 0.2647073268890381 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.7271655797958374, "learning_rate": 2.5438267869310317e-05, "loss": 0.3424, "step": 12208, "teacher_loss": 0.29963570833206177 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.2669234275817871, "learning_rate": 2.5436636643259515e-05, "loss": 0.2364, "step": 12209, "teacher_loss": 0.2330320179462433 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.30647045373916626, "learning_rate": 2.543500517792793e-05, "loss": 0.1983, "step": 12210, "teacher_loss": 0.18623371422290802 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.5310353636741638, "learning_rate": 2.543337347335296e-05, "loss": 0.2795, "step": 12211, "teacher_loss": 0.25157874822616577 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.8132265210151672, "learning_rate": 2.5431741529572017e-05, "loss": 0.4545, "step": 12212, "teacher_loss": 0.4146061837673187 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.35578739643096924, "learning_rate": 2.5430109346622518e-05, "loss": 0.232, "step": 12213, "teacher_loss": 0.21820631623268127 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.4285944402217865, "learning_rate": 2.542847692454188e-05, "loss": 0.2554, "step": 12214, "teacher_loss": 0.23618894815444946 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.3426033854484558, "learning_rate": 2.5426844263367533e-05, "loss": 0.3584, "step": 12215, "teacher_loss": 0.3601352274417877 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.6546006202697754, "learning_rate": 2.542521136313691e-05, "loss": 0.2752, "step": 12216, "teacher_loss": 0.23302070796489716 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.33751142024993896, "learning_rate": 2.5423578223887445e-05, "loss": 0.2075, "step": 12217, "teacher_loss": 0.193055659532547 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.2591434121131897, "learning_rate": 2.5421944845656585e-05, "loss": 0.2836, "step": 12218, "teacher_loss": 0.286365270614624 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.5391420722007751, "learning_rate": 2.5420311228481775e-05, "loss": 0.2913, "step": 12219, "teacher_loss": 0.26380959153175354 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.167050302028656, "learning_rate": 2.541867737240047e-05, "loss": 0.2161, "step": 12220, "teacher_loss": 0.2215423583984375 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.4708930253982544, "learning_rate": 2.541704327745013e-05, "loss": 0.2599, "step": 12221, "teacher_loss": 0.23643046617507935 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.9012323617935181, "learning_rate": 2.541540894366822e-05, "loss": 0.3429, "step": 12222, "teacher_loss": 0.28090861439704895 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.27898362278938293, "learning_rate": 2.5413774371092208e-05, "loss": 0.2377, "step": 12223, "teacher_loss": 0.2330804020166397 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.6240382790565491, "learning_rate": 2.5412139559759575e-05, "loss": 0.362, "step": 12224, "teacher_loss": 0.33287495374679565 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.6844403743743896, "learning_rate": 2.5410504509707796e-05, "loss": 0.3733, "step": 12225, "teacher_loss": 0.33878186345100403 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 1.0009214878082275, "learning_rate": 2.5408869220974364e-05, "loss": 0.7458, "step": 12226, "teacher_loss": 0.717411994934082 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.1303437352180481, "learning_rate": 2.5407233693596767e-05, "loss": 0.1787, "step": 12227, "teacher_loss": 0.18411031365394592 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.3079826235771179, "learning_rate": 2.5405597927612504e-05, "loss": 0.2932, "step": 12228, "teacher_loss": 0.291509747505188 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.19590017199516296, "learning_rate": 2.5403961923059077e-05, "loss": 0.1753, "step": 12229, "teacher_loss": 0.1729845106601715 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.20978929102420807, "learning_rate": 2.5402325679973995e-05, "loss": 0.2061, "step": 12230, "teacher_loss": 0.2056489884853363 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.322479784488678, "learning_rate": 2.5400689198394776e-05, "loss": 0.3062, "step": 12231, "teacher_loss": 0.30440637469291687 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.577194333076477, "learning_rate": 2.5399052478358934e-05, "loss": 0.2818, "step": 12232, "teacher_loss": 0.24897003173828125 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.9230172634124756, "learning_rate": 2.5397415519903998e-05, "loss": 0.4761, "step": 12233, "teacher_loss": 0.42638739943504333 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.5293896794319153, "learning_rate": 2.53957783230675e-05, "loss": 0.247, "step": 12234, "teacher_loss": 0.21563827991485596 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.3310241401195526, "learning_rate": 2.5394140887886967e-05, "loss": 0.2422, "step": 12235, "teacher_loss": 0.23234772682189941 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.29574957489967346, "learning_rate": 2.539250321439995e-05, "loss": 0.2906, "step": 12236, "teacher_loss": 0.2899932265281677 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.7096019983291626, "learning_rate": 2.5390865302643993e-05, "loss": 0.2405, "step": 12237, "teacher_loss": 0.18833017349243164 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.17499977350234985, "learning_rate": 2.5389227152656646e-05, "loss": 0.1554, "step": 12238, "teacher_loss": 0.15326133370399475 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.6122881174087524, "learning_rate": 2.538758876447547e-05, "loss": 0.3549, "step": 12239, "teacher_loss": 0.3263135552406311 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.2680577337741852, "learning_rate": 2.538595013813803e-05, "loss": 0.1792, "step": 12240, "teacher_loss": 0.16934826970100403 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.5762743949890137, "learning_rate": 2.5384311273681885e-05, "loss": 0.3814, "step": 12241, "teacher_loss": 0.3597896695137024 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.3890647888183594, "learning_rate": 2.538267217114462e-05, "loss": 0.2509, "step": 12242, "teacher_loss": 0.23554500937461853 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.47311797738075256, "learning_rate": 2.538103283056382e-05, "loss": 0.1978, "step": 12243, "teacher_loss": 0.16722692549228668 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.6457678079605103, "learning_rate": 2.537939325197705e-05, "loss": 0.2454, "step": 12244, "teacher_loss": 0.20094084739685059 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.4834545850753784, "learning_rate": 2.5377753435421916e-05, "loss": 0.231, "step": 12245, "teacher_loss": 0.20297545194625854 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.9597777128219604, "learning_rate": 2.5376113380936003e-05, "loss": 0.3049, "step": 12246, "teacher_loss": 0.23213037848472595 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.5074963569641113, "learning_rate": 2.5374473088556927e-05, "loss": 0.3856, "step": 12247, "teacher_loss": 0.3720594644546509 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.33791038393974304, "learning_rate": 2.537283255832229e-05, "loss": 0.235, "step": 12248, "teacher_loss": 0.22358958423137665 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.5721325874328613, "learning_rate": 2.5371191790269692e-05, "loss": 0.3622, "step": 12249, "teacher_loss": 0.3388659358024597 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.27837905287742615, "learning_rate": 2.5369550784436767e-05, "loss": 0.2977, "step": 12250, "teacher_loss": 0.29985642433166504 }, { "epoch": 2.21, "eval_exact_match": 79.29044465468307, "eval_f1": 86.76596337668238, "step": 12250 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.9296122789382935, "learning_rate": 2.536790954086113e-05, "loss": 0.3225, "step": 12251, "teacher_loss": 0.2550719976425171 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.2560519874095917, "learning_rate": 2.536626805958041e-05, "loss": 0.1957, "step": 12252, "teacher_loss": 0.1890290528535843 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.8779016733169556, "learning_rate": 2.536462634063225e-05, "loss": 0.2906, "step": 12253, "teacher_loss": 0.22538354992866516 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 1.0749298334121704, "learning_rate": 2.5362984384054276e-05, "loss": 0.9309, "step": 12254, "teacher_loss": 0.9148869514465332 }, { "compression_loss": 0.0, "epoch": 2.21, "label_loss": 0.3358515799045563, "learning_rate": 2.536134218988414e-05, "loss": 0.1955, "step": 12255, "teacher_loss": 0.1799592226743698 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.5111252069473267, "learning_rate": 2.53596997581595e-05, "loss": 0.2038, "step": 12256, "teacher_loss": 0.1696714460849762 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.4583056569099426, "learning_rate": 2.5358057088917998e-05, "loss": 0.2966, "step": 12257, "teacher_loss": 0.27862897515296936 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.33745095133781433, "learning_rate": 2.5356414182197304e-05, "loss": 0.2372, "step": 12258, "teacher_loss": 0.22602951526641846 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 1.1009776592254639, "learning_rate": 2.5354771038035083e-05, "loss": 0.5399, "step": 12259, "teacher_loss": 0.4775271713733673 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.4117460250854492, "learning_rate": 2.5353127656469006e-05, "loss": 0.2163, "step": 12260, "teacher_loss": 0.19461394846439362 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.3260696232318878, "learning_rate": 2.5351484037536752e-05, "loss": 0.2238, "step": 12261, "teacher_loss": 0.21238702535629272 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.2323688566684723, "learning_rate": 2.5349840181276006e-05, "loss": 0.2343, "step": 12262, "teacher_loss": 0.2345256507396698 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.27624520659446716, "learning_rate": 2.5348196087724453e-05, "loss": 0.2016, "step": 12263, "teacher_loss": 0.19335487484931946 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.5712655782699585, "learning_rate": 2.534655175691979e-05, "loss": 0.223, "step": 12264, "teacher_loss": 0.18433691561222076 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.26065075397491455, "learning_rate": 2.5344907188899715e-05, "loss": 0.2413, "step": 12265, "teacher_loss": 0.23919284343719482 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.5144751071929932, "learning_rate": 2.5343262383701935e-05, "loss": 0.2358, "step": 12266, "teacher_loss": 0.20479834079742432 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.2731708884239197, "learning_rate": 2.5341617341364162e-05, "loss": 0.25, "step": 12267, "teacher_loss": 0.24739684164524078 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.9103981256484985, "learning_rate": 2.5339972061924107e-05, "loss": 0.3201, "step": 12268, "teacher_loss": 0.2545044720172882 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.4443921446800232, "learning_rate": 2.5338326545419492e-05, "loss": 0.2583, "step": 12269, "teacher_loss": 0.23766474425792694 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.908003568649292, "learning_rate": 2.5336680791888046e-05, "loss": 0.443, "step": 12270, "teacher_loss": 0.39138558506965637 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.5844656229019165, "learning_rate": 2.5335034801367504e-05, "loss": 0.3635, "step": 12271, "teacher_loss": 0.3389894366264343 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.5441825985908508, "learning_rate": 2.533338857389559e-05, "loss": 0.3986, "step": 12272, "teacher_loss": 0.38241827487945557 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.696174144744873, "learning_rate": 2.533174210951007e-05, "loss": 0.755, "step": 12273, "teacher_loss": 0.7615283727645874 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.4746894836425781, "learning_rate": 2.533009540824867e-05, "loss": 0.2785, "step": 12274, "teacher_loss": 0.2566780149936676 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.37983238697052, "learning_rate": 2.5328448470149155e-05, "loss": 0.2736, "step": 12275, "teacher_loss": 0.26174378395080566 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.38325080275535583, "learning_rate": 2.5326801295249287e-05, "loss": 0.2921, "step": 12276, "teacher_loss": 0.28199535608291626 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.4038093090057373, "learning_rate": 2.532515388358682e-05, "loss": 0.3282, "step": 12277, "teacher_loss": 0.3197876214981079 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.477444052696228, "learning_rate": 2.532350623519954e-05, "loss": 0.2698, "step": 12278, "teacher_loss": 0.2467428743839264 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 1.0434633493423462, "learning_rate": 2.5321858350125206e-05, "loss": 0.6004, "step": 12279, "teacher_loss": 0.5511615872383118 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.6344360113143921, "learning_rate": 2.532021022840161e-05, "loss": 0.3336, "step": 12280, "teacher_loss": 0.3002076745033264 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.3277432322502136, "learning_rate": 2.5318561870066536e-05, "loss": 0.3299, "step": 12281, "teacher_loss": 0.3301534652709961 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.23082366585731506, "learning_rate": 2.5316913275157772e-05, "loss": 0.2516, "step": 12282, "teacher_loss": 0.25393566489219666 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.30696696043014526, "learning_rate": 2.531526444371312e-05, "loss": 0.2247, "step": 12283, "teacher_loss": 0.21550793945789337 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.3932054936885834, "learning_rate": 2.531361537577038e-05, "loss": 0.2081, "step": 12284, "teacher_loss": 0.18748819828033447 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.3584969639778137, "learning_rate": 2.5311966071367362e-05, "loss": 0.2614, "step": 12285, "teacher_loss": 0.25062295794487 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.23466432094573975, "learning_rate": 2.5310316530541883e-05, "loss": 0.195, "step": 12286, "teacher_loss": 0.19064068794250488 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.2840055227279663, "learning_rate": 2.530866675333175e-05, "loss": 0.2686, "step": 12287, "teacher_loss": 0.2669365704059601 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 1.2123680114746094, "learning_rate": 2.5307016739774802e-05, "loss": 0.3259, "step": 12288, "teacher_loss": 0.22741109132766724 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.3435225486755371, "learning_rate": 2.5305366489908858e-05, "loss": 0.269, "step": 12289, "teacher_loss": 0.26074591279029846 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.4015759229660034, "learning_rate": 2.5303716003771757e-05, "loss": 0.251, "step": 12290, "teacher_loss": 0.23431888222694397 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.13438430428504944, "learning_rate": 2.5302065281401344e-05, "loss": 0.1647, "step": 12291, "teacher_loss": 0.1680353432893753 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.688004732131958, "learning_rate": 2.530041432283546e-05, "loss": 0.4842, "step": 12292, "teacher_loss": 0.46154218912124634 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.81205153465271, "learning_rate": 2.5298763128111956e-05, "loss": 0.5582, "step": 12293, "teacher_loss": 0.5300028324127197 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.45525839924812317, "learning_rate": 2.529711169726869e-05, "loss": 0.2474, "step": 12294, "teacher_loss": 0.22434329986572266 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.20514178276062012, "learning_rate": 2.5295460030343528e-05, "loss": 0.2127, "step": 12295, "teacher_loss": 0.21359267830848694 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.24290992319583893, "learning_rate": 2.529380812737433e-05, "loss": 0.2109, "step": 12296, "teacher_loss": 0.20738664269447327 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.47009631991386414, "learning_rate": 2.5292155988398984e-05, "loss": 0.2567, "step": 12297, "teacher_loss": 0.2329806685447693 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.5115929841995239, "learning_rate": 2.529050361345535e-05, "loss": 0.3712, "step": 12298, "teacher_loss": 0.3556061089038849 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.4043690860271454, "learning_rate": 2.5288851002581323e-05, "loss": 0.2584, "step": 12299, "teacher_loss": 0.24222835898399353 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.8033109903335571, "learning_rate": 2.528719815581479e-05, "loss": 0.4493, "step": 12300, "teacher_loss": 0.4099869430065155 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.6267677545547485, "learning_rate": 2.528554507319365e-05, "loss": 0.3262, "step": 12301, "teacher_loss": 0.29279619455337524 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.412370502948761, "learning_rate": 2.528389175475579e-05, "loss": 0.2608, "step": 12302, "teacher_loss": 0.24394631385803223 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.11264127492904663, "learning_rate": 2.5282238200539134e-05, "loss": 0.1502, "step": 12303, "teacher_loss": 0.15435829758644104 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.2367410659790039, "learning_rate": 2.5280584410581575e-05, "loss": 0.2112, "step": 12304, "teacher_loss": 0.20840021967887878 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.5079933404922485, "learning_rate": 2.5278930384921046e-05, "loss": 0.36, "step": 12305, "teacher_loss": 0.34352245926856995 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.5174824595451355, "learning_rate": 2.527727612359546e-05, "loss": 0.2778, "step": 12306, "teacher_loss": 0.25112849473953247 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.20662257075309753, "learning_rate": 2.5275621626642743e-05, "loss": 0.229, "step": 12307, "teacher_loss": 0.23151341080665588 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.4123964309692383, "learning_rate": 2.5273966894100833e-05, "loss": 0.1731, "step": 12308, "teacher_loss": 0.1465543806552887 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.26879632472991943, "learning_rate": 2.527231192600766e-05, "loss": 0.2096, "step": 12309, "teacher_loss": 0.20302796363830566 }, { "compression_loss": 0.0, "epoch": 2.22, "label_loss": 0.36674627661705017, "learning_rate": 2.5270656722401175e-05, "loss": 0.2694, "step": 12310, "teacher_loss": 0.25857865810394287 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 1.0059449672698975, "learning_rate": 2.5269001283319325e-05, "loss": 0.3877, "step": 12311, "teacher_loss": 0.31898999214172363 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.603652834892273, "learning_rate": 2.526734560880006e-05, "loss": 0.3063, "step": 12312, "teacher_loss": 0.2732313275337219 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.3125823736190796, "learning_rate": 2.526568969888135e-05, "loss": 0.2279, "step": 12313, "teacher_loss": 0.2184869349002838 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.9605036973953247, "learning_rate": 2.526403355360115e-05, "loss": 0.3873, "step": 12314, "teacher_loss": 0.32365259528160095 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.689501166343689, "learning_rate": 2.526237717299743e-05, "loss": 0.2791, "step": 12315, "teacher_loss": 0.23346194624900818 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.6650179028511047, "learning_rate": 2.5260720557108177e-05, "loss": 0.3886, "step": 12316, "teacher_loss": 0.3578924536705017 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.5012112259864807, "learning_rate": 2.5259063705971362e-05, "loss": 0.3322, "step": 12317, "teacher_loss": 0.31341585516929626 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.7076420783996582, "learning_rate": 2.5257406619624973e-05, "loss": 0.3736, "step": 12318, "teacher_loss": 0.33649519085884094 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.1985689401626587, "learning_rate": 2.5255749298107005e-05, "loss": 0.2025, "step": 12319, "teacher_loss": 0.20289430022239685 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.5653092861175537, "learning_rate": 2.525409174145545e-05, "loss": 0.4016, "step": 12320, "teacher_loss": 0.38338807225227356 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.2578890919685364, "learning_rate": 2.525243394970832e-05, "loss": 0.3023, "step": 12321, "teacher_loss": 0.3072129786014557 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.2701598107814789, "learning_rate": 2.5250775922903617e-05, "loss": 0.2705, "step": 12322, "teacher_loss": 0.2705652713775635 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.40176039934158325, "learning_rate": 2.524911766107935e-05, "loss": 0.3017, "step": 12323, "teacher_loss": 0.2906301021575928 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.34677693247795105, "learning_rate": 2.524745916427355e-05, "loss": 0.2365, "step": 12324, "teacher_loss": 0.22424519062042236 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.46364545822143555, "learning_rate": 2.5245800432524233e-05, "loss": 0.2939, "step": 12325, "teacher_loss": 0.2750820219516754 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.7258146405220032, "learning_rate": 2.524414146586943e-05, "loss": 0.2242, "step": 12326, "teacher_loss": 0.1684824824333191 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.17541539669036865, "learning_rate": 2.5242482264347177e-05, "loss": 0.2386, "step": 12327, "teacher_loss": 0.24560345709323883 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.2925964891910553, "learning_rate": 2.5240822827995513e-05, "loss": 0.2453, "step": 12328, "teacher_loss": 0.24004411697387695 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.49576184153556824, "learning_rate": 2.5239163156852486e-05, "loss": 0.2554, "step": 12329, "teacher_loss": 0.22866977751255035 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.3057345747947693, "learning_rate": 2.5237503250956145e-05, "loss": 0.2151, "step": 12330, "teacher_loss": 0.20503583550453186 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.2105225920677185, "learning_rate": 2.523584311034455e-05, "loss": 0.2559, "step": 12331, "teacher_loss": 0.2609631419181824 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.5388713479042053, "learning_rate": 2.523418273505576e-05, "loss": 0.4055, "step": 12332, "teacher_loss": 0.3907148241996765 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.2054351270198822, "learning_rate": 2.5232522125127847e-05, "loss": 0.2211, "step": 12333, "teacher_loss": 0.22279176115989685 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.19322121143341064, "learning_rate": 2.523086128059888e-05, "loss": 0.1828, "step": 12334, "teacher_loss": 0.18162457644939423 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.16752223670482635, "learning_rate": 2.522920020150693e-05, "loss": 0.2175, "step": 12335, "teacher_loss": 0.22306691110134125 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.4404308795928955, "learning_rate": 2.52275388878901e-05, "loss": 0.2492, "step": 12336, "teacher_loss": 0.22796694934368134 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.3641465902328491, "learning_rate": 2.5225877339786456e-05, "loss": 0.2439, "step": 12337, "teacher_loss": 0.23058444261550903 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.10579530894756317, "learning_rate": 2.5224215557234108e-05, "loss": 0.2283, "step": 12338, "teacher_loss": 0.24196499586105347 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.20032191276550293, "learning_rate": 2.5222553540271152e-05, "loss": 0.1729, "step": 12339, "teacher_loss": 0.16986048221588135 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.22834090888500214, "learning_rate": 2.522089128893569e-05, "loss": 0.2164, "step": 12340, "teacher_loss": 0.21511945128440857 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.2949870824813843, "learning_rate": 2.5219228803265837e-05, "loss": 0.1955, "step": 12341, "teacher_loss": 0.18441908061504364 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.28125235438346863, "learning_rate": 2.5217566083299708e-05, "loss": 0.3397, "step": 12342, "teacher_loss": 0.3462051451206207 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.5554111003875732, "learning_rate": 2.521590312907542e-05, "loss": 0.3863, "step": 12343, "teacher_loss": 0.36748817563056946 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.4666186571121216, "learning_rate": 2.5214239940631098e-05, "loss": 0.4264, "step": 12344, "teacher_loss": 0.4218958020210266 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.6386550068855286, "learning_rate": 2.5212576518004884e-05, "loss": 0.3441, "step": 12345, "teacher_loss": 0.31139230728149414 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.3177204728126526, "learning_rate": 2.5210912861234904e-05, "loss": 0.2426, "step": 12346, "teacher_loss": 0.23427358269691467 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.5241358876228333, "learning_rate": 2.5209248970359312e-05, "loss": 0.3523, "step": 12347, "teacher_loss": 0.3332063853740692 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.36660993099212646, "learning_rate": 2.5207584845416245e-05, "loss": 0.2527, "step": 12348, "teacher_loss": 0.24004098773002625 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.3544429540634155, "learning_rate": 2.5205920486443863e-05, "loss": 0.1726, "step": 12349, "teacher_loss": 0.15240055322647095 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.2600035071372986, "learning_rate": 2.5204255893480326e-05, "loss": 0.1647, "step": 12350, "teacher_loss": 0.15409821271896362 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.22153496742248535, "learning_rate": 2.520259106656379e-05, "loss": 0.2229, "step": 12351, "teacher_loss": 0.22299712896347046 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.6594671607017517, "learning_rate": 2.5200926005732437e-05, "loss": 0.3902, "step": 12352, "teacher_loss": 0.36033380031585693 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.3865515887737274, "learning_rate": 2.5199260711024425e-05, "loss": 0.2841, "step": 12353, "teacher_loss": 0.27273595333099365 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.2779424488544464, "learning_rate": 2.519759518247795e-05, "loss": 0.2179, "step": 12354, "teacher_loss": 0.2112237960100174 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.4998508393764496, "learning_rate": 2.5195929420131194e-05, "loss": 0.3598, "step": 12355, "teacher_loss": 0.3441901206970215 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.9949327707290649, "learning_rate": 2.5194263424022338e-05, "loss": 0.3591, "step": 12356, "teacher_loss": 0.2884986698627472 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.2694770395755768, "learning_rate": 2.519259719418959e-05, "loss": 0.298, "step": 12357, "teacher_loss": 0.30119824409484863 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.3596166670322418, "learning_rate": 2.519093073067115e-05, "loss": 0.2584, "step": 12358, "teacher_loss": 0.2471758872270584 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.3671939969062805, "learning_rate": 2.5189264033505216e-05, "loss": 0.213, "step": 12359, "teacher_loss": 0.19588702917099 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.6233421564102173, "learning_rate": 2.5187597102730014e-05, "loss": 0.2771, "step": 12360, "teacher_loss": 0.23864424228668213 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 1.1287585496902466, "learning_rate": 2.5185929938383748e-05, "loss": 0.8695, "step": 12361, "teacher_loss": 0.840638279914856 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.12235197424888611, "learning_rate": 2.5184262540504654e-05, "loss": 0.1577, "step": 12362, "teacher_loss": 0.1616610586643219 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.43008720874786377, "learning_rate": 2.518259490913095e-05, "loss": 0.2776, "step": 12363, "teacher_loss": 0.2606595754623413 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.10917875170707703, "learning_rate": 2.518092704430087e-05, "loss": 0.2169, "step": 12364, "teacher_loss": 0.22888991236686707 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.7224491834640503, "learning_rate": 2.5179258946052664e-05, "loss": 0.3426, "step": 12365, "teacher_loss": 0.3003872036933899 }, { "compression_loss": 0.0, "epoch": 2.23, "label_loss": 0.46664172410964966, "learning_rate": 2.5177590614424564e-05, "loss": 0.1992, "step": 12366, "teacher_loss": 0.1694466471672058 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.25799527764320374, "learning_rate": 2.5175922049454826e-05, "loss": 0.1859, "step": 12367, "teacher_loss": 0.1779094785451889 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.22151395678520203, "learning_rate": 2.5174253251181707e-05, "loss": 0.1945, "step": 12368, "teacher_loss": 0.1915152221918106 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.28556329011917114, "learning_rate": 2.5172584219643466e-05, "loss": 0.2379, "step": 12369, "teacher_loss": 0.23260748386383057 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.7169944047927856, "learning_rate": 2.5170914954878366e-05, "loss": 0.3269, "step": 12370, "teacher_loss": 0.283573716878891 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.26275110244750977, "learning_rate": 2.516924545692468e-05, "loss": 0.2019, "step": 12371, "teacher_loss": 0.1951197236776352 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.8241665363311768, "learning_rate": 2.5167575725820684e-05, "loss": 0.3122, "step": 12372, "teacher_loss": 0.25532424449920654 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.4831430912017822, "learning_rate": 2.516590576160466e-05, "loss": 0.2744, "step": 12373, "teacher_loss": 0.2511821985244751 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 1.5642801523208618, "learning_rate": 2.5164235564314896e-05, "loss": 0.3503, "step": 12374, "teacher_loss": 0.21542152762413025 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 1.031935214996338, "learning_rate": 2.5162565133989685e-05, "loss": 0.4997, "step": 12375, "teacher_loss": 0.44060200452804565 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.38501325249671936, "learning_rate": 2.516089447066732e-05, "loss": 0.2633, "step": 12376, "teacher_loss": 0.24980224668979645 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.34710633754730225, "learning_rate": 2.5159223574386117e-05, "loss": 0.2682, "step": 12377, "teacher_loss": 0.2593998610973358 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.5922276973724365, "learning_rate": 2.5157552445184372e-05, "loss": 0.2502, "step": 12378, "teacher_loss": 0.21220096945762634 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.2799423933029175, "learning_rate": 2.5155881083100402e-05, "loss": 0.2017, "step": 12379, "teacher_loss": 0.19295164942741394 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.27575406432151794, "learning_rate": 2.515420948817253e-05, "loss": 0.249, "step": 12380, "teacher_loss": 0.24604164063930511 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.7159192562103271, "learning_rate": 2.5152537660439078e-05, "loss": 0.3301, "step": 12381, "teacher_loss": 0.2872636914253235 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.43319171667099, "learning_rate": 2.5150865599938377e-05, "loss": 0.4257, "step": 12382, "teacher_loss": 0.42484402656555176 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.4488913118839264, "learning_rate": 2.514919330670876e-05, "loss": 0.2145, "step": 12383, "teacher_loss": 0.188436821103096 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.7564199566841125, "learning_rate": 2.514752078078857e-05, "loss": 0.3163, "step": 12384, "teacher_loss": 0.2674005925655365 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.5323621034622192, "learning_rate": 2.5145848022216157e-05, "loss": 0.2552, "step": 12385, "teacher_loss": 0.22435428202152252 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.4655833840370178, "learning_rate": 2.5144175031029863e-05, "loss": 0.2853, "step": 12386, "teacher_loss": 0.2652449905872345 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.27367931604385376, "learning_rate": 2.514250180726805e-05, "loss": 0.2329, "step": 12387, "teacher_loss": 0.22837644815444946 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.2601550221443176, "learning_rate": 2.5140828350969076e-05, "loss": 0.1812, "step": 12388, "teacher_loss": 0.17247986793518066 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.352405309677124, "learning_rate": 2.5139154662171316e-05, "loss": 0.2368, "step": 12389, "teacher_loss": 0.22398895025253296 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.4569525420665741, "learning_rate": 2.5137480740913136e-05, "loss": 0.1855, "step": 12390, "teacher_loss": 0.15537753701210022 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.598187267780304, "learning_rate": 2.513580658723292e-05, "loss": 0.1885, "step": 12391, "teacher_loss": 0.14298535883426666 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.3036641478538513, "learning_rate": 2.5134132201169042e-05, "loss": 0.1919, "step": 12392, "teacher_loss": 0.17942848801612854 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.298325777053833, "learning_rate": 2.51324575827599e-05, "loss": 0.2851, "step": 12393, "teacher_loss": 0.28361421823501587 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.1908550262451172, "learning_rate": 2.5130782732043885e-05, "loss": 0.1807, "step": 12394, "teacher_loss": 0.17959949374198914 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.08722509443759918, "learning_rate": 2.5129107649059395e-05, "loss": 0.22, "step": 12395, "teacher_loss": 0.23478442430496216 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.5926755666732788, "learning_rate": 2.5127432333844836e-05, "loss": 0.35, "step": 12396, "teacher_loss": 0.32299649715423584 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.981011152267456, "learning_rate": 2.5125756786438615e-05, "loss": 0.4414, "step": 12397, "teacher_loss": 0.3814009428024292 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.3214312195777893, "learning_rate": 2.5124081006879148e-05, "loss": 0.2711, "step": 12398, "teacher_loss": 0.265512615442276 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.6141905784606934, "learning_rate": 2.512240499520486e-05, "loss": 0.2893, "step": 12399, "teacher_loss": 0.253153920173645 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.6215115785598755, "learning_rate": 2.5120728751454175e-05, "loss": 0.2918, "step": 12400, "teacher_loss": 0.25519269704818726 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.5350840091705322, "learning_rate": 2.511905227566552e-05, "loss": 0.2205, "step": 12401, "teacher_loss": 0.18551771342754364 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.28319722414016724, "learning_rate": 2.5117375567877335e-05, "loss": 0.1994, "step": 12402, "teacher_loss": 0.190045565366745 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.3871760964393616, "learning_rate": 2.5115698628128065e-05, "loss": 0.2039, "step": 12403, "teacher_loss": 0.18352742493152618 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.38654810190200806, "learning_rate": 2.511402145645615e-05, "loss": 0.1853, "step": 12404, "teacher_loss": 0.16290079057216644 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.7009180784225464, "learning_rate": 2.511234405290005e-05, "loss": 0.267, "step": 12405, "teacher_loss": 0.21875181794166565 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.37582865357398987, "learning_rate": 2.5110666417498217e-05, "loss": 0.2261, "step": 12406, "teacher_loss": 0.20947568118572235 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.4072246551513672, "learning_rate": 2.510898855028912e-05, "loss": 0.2537, "step": 12407, "teacher_loss": 0.23660865426063538 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.4788782298564911, "learning_rate": 2.510731045131122e-05, "loss": 0.2341, "step": 12408, "teacher_loss": 0.20688827335834503 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.3572787344455719, "learning_rate": 2.5105632120602994e-05, "loss": 0.2772, "step": 12409, "teacher_loss": 0.26835209131240845 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.9476114511489868, "learning_rate": 2.5103953558202925e-05, "loss": 0.3384, "step": 12410, "teacher_loss": 0.27070653438568115 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.27621549367904663, "learning_rate": 2.5102274764149493e-05, "loss": 0.2388, "step": 12411, "teacher_loss": 0.234610915184021 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.5277109146118164, "learning_rate": 2.5100595738481187e-05, "loss": 0.3384, "step": 12412, "teacher_loss": 0.31734293699264526 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.3266430199146271, "learning_rate": 2.5098916481236505e-05, "loss": 0.3007, "step": 12413, "teacher_loss": 0.2977985143661499 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.43012717366218567, "learning_rate": 2.5097236992453945e-05, "loss": 0.3224, "step": 12414, "teacher_loss": 0.3104206323623657 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.8111578226089478, "learning_rate": 2.5095557272172015e-05, "loss": 0.3192, "step": 12415, "teacher_loss": 0.2645253539085388 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.35283225774765015, "learning_rate": 2.5093877320429227e-05, "loss": 0.2315, "step": 12416, "teacher_loss": 0.21797379851341248 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.6910042762756348, "learning_rate": 2.5092197137264093e-05, "loss": 0.2358, "step": 12417, "teacher_loss": 0.1852441281080246 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.9138078689575195, "learning_rate": 2.5090516722715138e-05, "loss": 0.3345, "step": 12418, "teacher_loss": 0.2701514661312103 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.3401826322078705, "learning_rate": 2.5088836076820885e-05, "loss": 0.231, "step": 12419, "teacher_loss": 0.2188795804977417 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 0.38918742537498474, "learning_rate": 2.508715519961987e-05, "loss": 0.2489, "step": 12420, "teacher_loss": 0.23334649205207825 }, { "compression_loss": 0.0, "epoch": 2.24, "label_loss": 1.1607577800750732, "learning_rate": 2.508547409115063e-05, "loss": 0.56, "step": 12421, "teacher_loss": 0.4932183623313904 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.3616155683994293, "learning_rate": 2.5083792751451707e-05, "loss": 0.3087, "step": 12422, "teacher_loss": 0.3028377890586853 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 1.1060240268707275, "learning_rate": 2.5082111180561646e-05, "loss": 0.3738, "step": 12423, "teacher_loss": 0.29246437549591064 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.18614168465137482, "learning_rate": 2.5080429378519004e-05, "loss": 0.2952, "step": 12424, "teacher_loss": 0.3073011338710785 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.20125222206115723, "learning_rate": 2.507874734536234e-05, "loss": 0.1697, "step": 12425, "teacher_loss": 0.16613906621932983 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 1.0296711921691895, "learning_rate": 2.5077065081130217e-05, "loss": 0.3491, "step": 12426, "teacher_loss": 0.27345359325408936 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.3762807846069336, "learning_rate": 2.5075382585861205e-05, "loss": 0.3469, "step": 12427, "teacher_loss": 0.3435822129249573 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.2760469913482666, "learning_rate": 2.5073699859593874e-05, "loss": 0.1915, "step": 12428, "teacher_loss": 0.18213629722595215 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.4713650047779083, "learning_rate": 2.5072016902366818e-05, "loss": 0.2741, "step": 12429, "teacher_loss": 0.25217410922050476 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.3024109899997711, "learning_rate": 2.5070333714218604e-05, "loss": 0.2178, "step": 12430, "teacher_loss": 0.20842163264751434 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.21881170570850372, "learning_rate": 2.506865029518783e-05, "loss": 0.202, "step": 12431, "teacher_loss": 0.20007693767547607 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.5328092575073242, "learning_rate": 2.506696664531309e-05, "loss": 0.3248, "step": 12432, "teacher_loss": 0.30166205763816833 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.11480280011892319, "learning_rate": 2.5065282764632992e-05, "loss": 0.2431, "step": 12433, "teacher_loss": 0.2573142647743225 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.3078177869319916, "learning_rate": 2.5063598653186143e-05, "loss": 0.3077, "step": 12434, "teacher_loss": 0.3077346682548523 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.594077467918396, "learning_rate": 2.506191431101114e-05, "loss": 0.2496, "step": 12435, "teacher_loss": 0.21134255826473236 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.32119521498680115, "learning_rate": 2.506022973814661e-05, "loss": 0.2102, "step": 12436, "teacher_loss": 0.19791030883789062 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.3036797046661377, "learning_rate": 2.5058544934631178e-05, "loss": 0.3126, "step": 12437, "teacher_loss": 0.31364506483078003 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.468548059463501, "learning_rate": 2.5056859900503462e-05, "loss": 0.4161, "step": 12438, "teacher_loss": 0.41031479835510254 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.26208731532096863, "learning_rate": 2.5055174635802106e-05, "loss": 0.2121, "step": 12439, "teacher_loss": 0.2065260410308838 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.7338371872901917, "learning_rate": 2.5053489140565737e-05, "loss": 0.2328, "step": 12440, "teacher_loss": 0.17709453403949738 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.718498945236206, "learning_rate": 2.5051803414833008e-05, "loss": 0.507, "step": 12441, "teacher_loss": 0.48351842164993286 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.5107282400131226, "learning_rate": 2.5050117458642566e-05, "loss": 0.2644, "step": 12442, "teacher_loss": 0.2370738983154297 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.5591605305671692, "learning_rate": 2.5048431272033058e-05, "loss": 0.2987, "step": 12443, "teacher_loss": 0.2697696089744568 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.34486252069473267, "learning_rate": 2.5046744855043147e-05, "loss": 0.2359, "step": 12444, "teacher_loss": 0.22376489639282227 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.24425595998764038, "learning_rate": 2.5045058207711503e-05, "loss": 0.2221, "step": 12445, "teacher_loss": 0.2196749746799469 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.1774607002735138, "learning_rate": 2.504337133007679e-05, "loss": 0.1448, "step": 12446, "teacher_loss": 0.14113838970661163 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.579138994216919, "learning_rate": 2.504168422217768e-05, "loss": 0.3361, "step": 12447, "teacher_loss": 0.30908310413360596 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.8592791557312012, "learning_rate": 2.503999688405286e-05, "loss": 0.6142, "step": 12448, "teacher_loss": 0.5869715213775635 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.5177626609802246, "learning_rate": 2.5038309315741013e-05, "loss": 0.2527, "step": 12449, "teacher_loss": 0.2232607901096344 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.7014118432998657, "learning_rate": 2.503662151728083e-05, "loss": 0.315, "step": 12450, "teacher_loss": 0.2720857858657837 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.7837603092193604, "learning_rate": 2.5034933488711004e-05, "loss": 0.3439, "step": 12451, "teacher_loss": 0.29497236013412476 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.6769936084747314, "learning_rate": 2.5033245230070238e-05, "loss": 0.3023, "step": 12452, "teacher_loss": 0.2606814503669739 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.1808786541223526, "learning_rate": 2.5031556741397248e-05, "loss": 0.1769, "step": 12453, "teacher_loss": 0.17642217874526978 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.4352392256259918, "learning_rate": 2.5029868022730734e-05, "loss": 0.3776, "step": 12454, "teacher_loss": 0.3712334930896759 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.23163536190986633, "learning_rate": 2.5028179074109418e-05, "loss": 0.1717, "step": 12455, "teacher_loss": 0.16507402062416077 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.3340536952018738, "learning_rate": 2.502648989557202e-05, "loss": 0.2201, "step": 12456, "teacher_loss": 0.20744773745536804 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.30356213450431824, "learning_rate": 2.502480048715727e-05, "loss": 0.1938, "step": 12457, "teacher_loss": 0.18157672882080078 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.5310879945755005, "learning_rate": 2.50231108489039e-05, "loss": 0.3114, "step": 12458, "teacher_loss": 0.286965012550354 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.27941781282424927, "learning_rate": 2.5021420980850653e-05, "loss": 0.3636, "step": 12459, "teacher_loss": 0.3730059862136841 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.4968075454235077, "learning_rate": 2.5019730883036265e-05, "loss": 0.2928, "step": 12460, "teacher_loss": 0.2701363265514374 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.8067477345466614, "learning_rate": 2.501804055549949e-05, "loss": 0.3855, "step": 12461, "teacher_loss": 0.3387216329574585 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.2549434304237366, "learning_rate": 2.5016349998279083e-05, "loss": 0.1938, "step": 12462, "teacher_loss": 0.18702520430088043 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.19229668378829956, "learning_rate": 2.5014659211413797e-05, "loss": 0.2082, "step": 12463, "teacher_loss": 0.21000494062900543 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.2706279158592224, "learning_rate": 2.5012968194942402e-05, "loss": 0.237, "step": 12464, "teacher_loss": 0.2332686334848404 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.7078277468681335, "learning_rate": 2.501127694890367e-05, "loss": 0.2465, "step": 12465, "teacher_loss": 0.1952914595603943 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 1.0988714694976807, "learning_rate": 2.5009585473336367e-05, "loss": 0.7812, "step": 12466, "teacher_loss": 0.7459425926208496 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.8042713403701782, "learning_rate": 2.5007893768279286e-05, "loss": 0.3015, "step": 12467, "teacher_loss": 0.2455812245607376 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.17142805457115173, "learning_rate": 2.50062018337712e-05, "loss": 0.2256, "step": 12468, "teacher_loss": 0.2316223531961441 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.884581983089447, "learning_rate": 2.500450966985091e-05, "loss": 0.3603, "step": 12469, "teacher_loss": 0.30199873447418213 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.2815982401371002, "learning_rate": 2.5002817276557206e-05, "loss": 0.1867, "step": 12470, "teacher_loss": 0.1761147677898407 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.25426995754241943, "learning_rate": 2.500112465392889e-05, "loss": 0.2518, "step": 12471, "teacher_loss": 0.2514955401420593 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.926116943359375, "learning_rate": 2.4999431802004772e-05, "loss": 0.2665, "step": 12472, "teacher_loss": 0.19315484166145325 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.45118045806884766, "learning_rate": 2.4997738720823666e-05, "loss": 0.2295, "step": 12473, "teacher_loss": 0.2048342376947403 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.48730432987213135, "learning_rate": 2.4996045410424386e-05, "loss": 0.3418, "step": 12474, "teacher_loss": 0.3256661891937256 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.6854239702224731, "learning_rate": 2.4994351870845752e-05, "loss": 0.3122, "step": 12475, "teacher_loss": 0.2707221508026123 }, { "compression_loss": 0.0, "epoch": 2.25, "label_loss": 0.6844598054885864, "learning_rate": 2.4992658102126594e-05, "loss": 0.3126, "step": 12476, "teacher_loss": 0.27124080061912537 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.6983169317245483, "learning_rate": 2.499096410430574e-05, "loss": 0.4343, "step": 12477, "teacher_loss": 0.40491223335266113 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.31748753786087036, "learning_rate": 2.4989269877422047e-05, "loss": 0.2389, "step": 12478, "teacher_loss": 0.230166956782341 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.1992478370666504, "learning_rate": 2.4987575421514338e-05, "loss": 0.2306, "step": 12479, "teacher_loss": 0.234049454331398 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.26578769087791443, "learning_rate": 2.4985880736621467e-05, "loss": 0.1775, "step": 12480, "teacher_loss": 0.16771894693374634 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.2331969141960144, "learning_rate": 2.498418582278229e-05, "loss": 0.3109, "step": 12481, "teacher_loss": 0.3195742070674896 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.3971533179283142, "learning_rate": 2.4982490680035668e-05, "loss": 0.3003, "step": 12482, "teacher_loss": 0.28949248790740967 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.31105104088783264, "learning_rate": 2.498079530842046e-05, "loss": 0.2199, "step": 12483, "teacher_loss": 0.2097305804491043 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.6346704959869385, "learning_rate": 2.497909970797555e-05, "loss": 0.2819, "step": 12484, "teacher_loss": 0.24269193410873413 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.5110999345779419, "learning_rate": 2.4977403878739793e-05, "loss": 0.2657, "step": 12485, "teacher_loss": 0.23842313885688782 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.5784667730331421, "learning_rate": 2.4975707820752078e-05, "loss": 0.2474, "step": 12486, "teacher_loss": 0.2106650024652481 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.6825512051582336, "learning_rate": 2.4974011534051297e-05, "loss": 0.2365, "step": 12487, "teacher_loss": 0.1869291514158249 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.4182283282279968, "learning_rate": 2.4972315018676334e-05, "loss": 0.4759, "step": 12488, "teacher_loss": 0.4823169708251953 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.5324483513832092, "learning_rate": 2.4970618274666087e-05, "loss": 0.2425, "step": 12489, "teacher_loss": 0.21033841371536255 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.43507567048072815, "learning_rate": 2.4968921302059452e-05, "loss": 0.3342, "step": 12490, "teacher_loss": 0.3229830265045166 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.5093084573745728, "learning_rate": 2.4967224100895342e-05, "loss": 0.2482, "step": 12491, "teacher_loss": 0.21914741396903992 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.612106204032898, "learning_rate": 2.4965526671212666e-05, "loss": 0.3166, "step": 12492, "teacher_loss": 0.2837115526199341 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.48089852929115295, "learning_rate": 2.4963829013050344e-05, "loss": 0.2472, "step": 12493, "teacher_loss": 0.22123689949512482 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.31869077682495117, "learning_rate": 2.4962131126447292e-05, "loss": 0.1997, "step": 12494, "teacher_loss": 0.18648570775985718 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.37320131063461304, "learning_rate": 2.4960433011442445e-05, "loss": 0.2413, "step": 12495, "teacher_loss": 0.22659853100776672 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.5218843221664429, "learning_rate": 2.495873466807473e-05, "loss": 0.2672, "step": 12496, "teacher_loss": 0.238898366689682 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.5973473191261292, "learning_rate": 2.4957036096383093e-05, "loss": 0.4843, "step": 12497, "teacher_loss": 0.4716907739639282 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.8592730760574341, "learning_rate": 2.4955337296406464e-05, "loss": 0.3459, "step": 12498, "teacher_loss": 0.2888760566711426 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.5340474843978882, "learning_rate": 2.4953638268183796e-05, "loss": 0.3955, "step": 12499, "teacher_loss": 0.3801245093345642 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.4711238741874695, "learning_rate": 2.495193901175405e-05, "loss": 0.4534, "step": 12500, "teacher_loss": 0.45140478014945984 }, { "epoch": 2.26, "eval_exact_match": 79.66887417218543, "eval_f1": 86.99380135903661, "step": 12500 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.5490753650665283, "learning_rate": 2.495023952715618e-05, "loss": 0.2264, "step": 12501, "teacher_loss": 0.19051897525787354 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.2956361472606659, "learning_rate": 2.4948539814429148e-05, "loss": 0.3202, "step": 12502, "teacher_loss": 0.32292336225509644 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.4812681972980499, "learning_rate": 2.494683987361193e-05, "loss": 0.3406, "step": 12503, "teacher_loss": 0.3249479830265045 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.24697396159172058, "learning_rate": 2.494513970474349e-05, "loss": 0.2478, "step": 12504, "teacher_loss": 0.24792295694351196 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.3574371039867401, "learning_rate": 2.4943439307862818e-05, "loss": 0.2795, "step": 12505, "teacher_loss": 0.27086251974105835 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.25199514627456665, "learning_rate": 2.4941738683008892e-05, "loss": 0.207, "step": 12506, "teacher_loss": 0.20205508172512054 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.3918592631816864, "learning_rate": 2.4940037830220705e-05, "loss": 0.2103, "step": 12507, "teacher_loss": 0.19016845524311066 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.25740641355514526, "learning_rate": 2.4938336749537253e-05, "loss": 0.2962, "step": 12508, "teacher_loss": 0.3005411922931671 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.686660647392273, "learning_rate": 2.493663544099754e-05, "loss": 0.3121, "step": 12509, "teacher_loss": 0.2705281674861908 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.2648322582244873, "learning_rate": 2.493493390464056e-05, "loss": 0.1813, "step": 12510, "teacher_loss": 0.17202773690223694 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.28975337743759155, "learning_rate": 2.493323214050534e-05, "loss": 0.2357, "step": 12511, "teacher_loss": 0.22966080904006958 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.39938560128211975, "learning_rate": 2.4931530148630883e-05, "loss": 0.2839, "step": 12512, "teacher_loss": 0.27103251218795776 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.15705978870391846, "learning_rate": 2.4929827929056213e-05, "loss": 0.1816, "step": 12513, "teacher_loss": 0.1842774599790573 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.7605505585670471, "learning_rate": 2.4928125481820368e-05, "loss": 0.2459, "step": 12514, "teacher_loss": 0.1887163519859314 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.3978753685951233, "learning_rate": 2.492642280696237e-05, "loss": 0.2234, "step": 12515, "teacher_loss": 0.2039695680141449 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.20168451964855194, "learning_rate": 2.4924719904521254e-05, "loss": 0.1695, "step": 12516, "teacher_loss": 0.16588471829891205 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.9932999014854431, "learning_rate": 2.4923016774536067e-05, "loss": 0.3706, "step": 12517, "teacher_loss": 0.30135998129844666 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.592831552028656, "learning_rate": 2.4921313417045857e-05, "loss": 0.3943, "step": 12518, "teacher_loss": 0.37229329347610474 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.6639014482498169, "learning_rate": 2.4919609832089676e-05, "loss": 0.4947, "step": 12519, "teacher_loss": 0.47589507699012756 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.1901385635137558, "learning_rate": 2.491790601970658e-05, "loss": 0.1983, "step": 12520, "teacher_loss": 0.19923299551010132 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.25974661111831665, "learning_rate": 2.491620197993564e-05, "loss": 0.2114, "step": 12521, "teacher_loss": 0.20604172348976135 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.45143961906433105, "learning_rate": 2.4914497712815917e-05, "loss": 0.4274, "step": 12522, "teacher_loss": 0.42470675706863403 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.6493076682090759, "learning_rate": 2.4912793218386484e-05, "loss": 0.3064, "step": 12523, "teacher_loss": 0.26826074719429016 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.6655941605567932, "learning_rate": 2.4911088496686422e-05, "loss": 0.2173, "step": 12524, "teacher_loss": 0.1674552857875824 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.5885056257247925, "learning_rate": 2.4909383547754817e-05, "loss": 0.3462, "step": 12525, "teacher_loss": 0.3192649483680725 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.4662756621837616, "learning_rate": 2.4907678371630757e-05, "loss": 0.2976, "step": 12526, "teacher_loss": 0.2788658142089844 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.2689884305000305, "learning_rate": 2.4905972968353343e-05, "loss": 0.2023, "step": 12527, "teacher_loss": 0.19487161934375763 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.3776562809944153, "learning_rate": 2.490426733796166e-05, "loss": 0.2818, "step": 12528, "teacher_loss": 0.2711542546749115 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.38208937644958496, "learning_rate": 2.4902561480494826e-05, "loss": 0.2328, "step": 12529, "teacher_loss": 0.21626605093479156 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.38997215032577515, "learning_rate": 2.4900855395991942e-05, "loss": 0.2399, "step": 12530, "teacher_loss": 0.22323068976402283 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.5471556186676025, "learning_rate": 2.489914908449213e-05, "loss": 0.3361, "step": 12531, "teacher_loss": 0.3126351833343506 }, { "compression_loss": 0.0, "epoch": 2.26, "label_loss": 0.25133901834487915, "learning_rate": 2.4897442546034512e-05, "loss": 0.2212, "step": 12532, "teacher_loss": 0.21784579753875732 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.44157394766807556, "learning_rate": 2.489573578065821e-05, "loss": 0.2291, "step": 12533, "teacher_loss": 0.20552313327789307 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.4130289852619171, "learning_rate": 2.4894028788402352e-05, "loss": 0.2631, "step": 12534, "teacher_loss": 0.24649138748645782 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.4055495262145996, "learning_rate": 2.489232156930608e-05, "loss": 0.2167, "step": 12535, "teacher_loss": 0.1957492232322693 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.5434260368347168, "learning_rate": 2.489061412340853e-05, "loss": 0.3274, "step": 12536, "teacher_loss": 0.30337586998939514 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.32822442054748535, "learning_rate": 2.488890645074886e-05, "loss": 0.2183, "step": 12537, "teacher_loss": 0.206126406788826 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.3327506184577942, "learning_rate": 2.4887198551366206e-05, "loss": 0.272, "step": 12538, "teacher_loss": 0.2652987837791443 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.3866737484931946, "learning_rate": 2.4885490425299736e-05, "loss": 0.2614, "step": 12539, "teacher_loss": 0.24746572971343994 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.5399439930915833, "learning_rate": 2.4883782072588604e-05, "loss": 0.2845, "step": 12540, "teacher_loss": 0.2560719847679138 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.5061490535736084, "learning_rate": 2.4882073493271988e-05, "loss": 0.296, "step": 12541, "teacher_loss": 0.2726753354072571 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.10065954178571701, "learning_rate": 2.488036468738905e-05, "loss": 0.1788, "step": 12542, "teacher_loss": 0.18744251132011414 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.26066139340400696, "learning_rate": 2.4878655654978973e-05, "loss": 0.3447, "step": 12543, "teacher_loss": 0.3539879620075226 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.5510700941085815, "learning_rate": 2.4876946396080946e-05, "loss": 0.3587, "step": 12544, "teacher_loss": 0.3372902572154999 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.2775374948978424, "learning_rate": 2.4875236910734145e-05, "loss": 0.1607, "step": 12545, "teacher_loss": 0.14766675233840942 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.5086871981620789, "learning_rate": 2.487352719897777e-05, "loss": 0.2464, "step": 12546, "teacher_loss": 0.21727171540260315 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.5366396307945251, "learning_rate": 2.487181726085102e-05, "loss": 0.2226, "step": 12547, "teacher_loss": 0.1876683235168457 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.6070277690887451, "learning_rate": 2.4870107096393095e-05, "loss": 0.2706, "step": 12548, "teacher_loss": 0.233234703540802 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.21357940137386322, "learning_rate": 2.486839670564321e-05, "loss": 0.1809, "step": 12549, "teacher_loss": 0.17726486921310425 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.8291566371917725, "learning_rate": 2.4866686088640574e-05, "loss": 0.3084, "step": 12550, "teacher_loss": 0.2505527138710022 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.7683846950531006, "learning_rate": 2.4864975245424403e-05, "loss": 0.3819, "step": 12551, "teacher_loss": 0.3389424681663513 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.4099001884460449, "learning_rate": 2.4863264176033936e-05, "loss": 0.1966, "step": 12552, "teacher_loss": 0.1729380488395691 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.45399191975593567, "learning_rate": 2.4861552880508385e-05, "loss": 0.2722, "step": 12553, "teacher_loss": 0.25204190611839294 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.42610734701156616, "learning_rate": 2.4859841358886993e-05, "loss": 0.299, "step": 12554, "teacher_loss": 0.2848728895187378 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.46575161814689636, "learning_rate": 2.4858129611209005e-05, "loss": 0.5935, "step": 12555, "teacher_loss": 0.607714056968689 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.8861938118934631, "learning_rate": 2.485641763751366e-05, "loss": 0.3314, "step": 12556, "teacher_loss": 0.26979681849479675 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.3615124821662903, "learning_rate": 2.485470543784021e-05, "loss": 0.2185, "step": 12557, "teacher_loss": 0.20262214541435242 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.6139078140258789, "learning_rate": 2.4852993012227908e-05, "loss": 0.3569, "step": 12558, "teacher_loss": 0.32838189601898193 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.3575991988182068, "learning_rate": 2.4851280360716017e-05, "loss": 0.2659, "step": 12559, "teacher_loss": 0.255714476108551 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.17090153694152832, "learning_rate": 2.4849567483343803e-05, "loss": 0.1647, "step": 12560, "teacher_loss": 0.16402272880077362 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.2501254081726074, "learning_rate": 2.4847854380150535e-05, "loss": 0.2092, "step": 12561, "teacher_loss": 0.20463499426841736 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.22109168767929077, "learning_rate": 2.48461410511755e-05, "loss": 0.2612, "step": 12562, "teacher_loss": 0.26566118001937866 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.2977604568004608, "learning_rate": 2.4844427496457966e-05, "loss": 0.2298, "step": 12563, "teacher_loss": 0.22224295139312744 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.5917311906814575, "learning_rate": 2.484271371603722e-05, "loss": 0.3686, "step": 12564, "teacher_loss": 0.34376245737075806 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.29340487718582153, "learning_rate": 2.4840999709952563e-05, "loss": 0.178, "step": 12565, "teacher_loss": 0.16520051658153534 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.5470061302185059, "learning_rate": 2.4839285478243286e-05, "loss": 0.2867, "step": 12566, "teacher_loss": 0.2577952742576599 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.3474600315093994, "learning_rate": 2.4837571020948695e-05, "loss": 0.2905, "step": 12567, "teacher_loss": 0.28420406579971313 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.28272077441215515, "learning_rate": 2.4835856338108095e-05, "loss": 0.1794, "step": 12568, "teacher_loss": 0.16790196299552917 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.4079461693763733, "learning_rate": 2.48341414297608e-05, "loss": 0.2594, "step": 12569, "teacher_loss": 0.24289198219776154 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.394878625869751, "learning_rate": 2.483242629594612e-05, "loss": 0.1792, "step": 12570, "teacher_loss": 0.15524597465991974 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.38712814450263977, "learning_rate": 2.4830710936703392e-05, "loss": 0.2225, "step": 12571, "teacher_loss": 0.2041681706905365 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.4921199679374695, "learning_rate": 2.4828995352071927e-05, "loss": 0.273, "step": 12572, "teacher_loss": 0.24862496554851532 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.2518167495727539, "learning_rate": 2.4827279542091072e-05, "loss": 0.2261, "step": 12573, "teacher_loss": 0.2232484519481659 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.29522982239723206, "learning_rate": 2.4825563506800162e-05, "loss": 0.2395, "step": 12574, "teacher_loss": 0.23333227634429932 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.6245726346969604, "learning_rate": 2.482384724623854e-05, "loss": 0.2636, "step": 12575, "teacher_loss": 0.22354191541671753 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.31734150648117065, "learning_rate": 2.482213076044555e-05, "loss": 0.2549, "step": 12576, "teacher_loss": 0.24794290959835052 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.37666282057762146, "learning_rate": 2.4820414049460556e-05, "loss": 0.2871, "step": 12577, "teacher_loss": 0.2771133780479431 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.4308215081691742, "learning_rate": 2.4818697113322907e-05, "loss": 0.2576, "step": 12578, "teacher_loss": 0.23832008242607117 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.548661470413208, "learning_rate": 2.4816979952071974e-05, "loss": 0.2596, "step": 12579, "teacher_loss": 0.2275036871433258 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.23549598455429077, "learning_rate": 2.4815262565747118e-05, "loss": 0.2242, "step": 12580, "teacher_loss": 0.22291819751262665 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.2603442668914795, "learning_rate": 2.481354495438772e-05, "loss": 0.2423, "step": 12581, "teacher_loss": 0.24032625555992126 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.2904854714870453, "learning_rate": 2.4811827118033163e-05, "loss": 0.2392, "step": 12582, "teacher_loss": 0.23345373570919037 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.24639549851417542, "learning_rate": 2.4810109056722825e-05, "loss": 0.2424, "step": 12583, "teacher_loss": 0.24196115136146545 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.7632948756217957, "learning_rate": 2.48083907704961e-05, "loss": 0.399, "step": 12584, "teacher_loss": 0.35852521657943726 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.5758053064346313, "learning_rate": 2.480667225939238e-05, "loss": 0.2988, "step": 12585, "teacher_loss": 0.2680344581604004 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.39537566900253296, "learning_rate": 2.480495352345107e-05, "loss": 0.2271, "step": 12586, "teacher_loss": 0.20835396647453308 }, { "compression_loss": 0.0, "epoch": 2.27, "label_loss": 0.5928500294685364, "learning_rate": 2.4803234562711566e-05, "loss": 0.3838, "step": 12587, "teacher_loss": 0.3605412244796753 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.6006373167037964, "learning_rate": 2.4801515377213293e-05, "loss": 0.2344, "step": 12588, "teacher_loss": 0.19367870688438416 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.35082510113716125, "learning_rate": 2.4799795966995654e-05, "loss": 0.1777, "step": 12589, "teacher_loss": 0.158429816365242 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.6805994510650635, "learning_rate": 2.479807633209808e-05, "loss": 0.3202, "step": 12590, "teacher_loss": 0.2801085114479065 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.6047331094741821, "learning_rate": 2.4796356472559984e-05, "loss": 0.3591, "step": 12591, "teacher_loss": 0.33181139826774597 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.34327465295791626, "learning_rate": 2.4794636388420812e-05, "loss": 0.2174, "step": 12592, "teacher_loss": 0.20337122678756714 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.39451533555984497, "learning_rate": 2.4792916079719988e-05, "loss": 0.4363, "step": 12593, "teacher_loss": 0.44098925590515137 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.20249181985855103, "learning_rate": 2.4791195546496964e-05, "loss": 0.175, "step": 12594, "teacher_loss": 0.17193642258644104 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.5126531720161438, "learning_rate": 2.4789474788791178e-05, "loss": 0.428, "step": 12595, "teacher_loss": 0.41857418417930603 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.6117051839828491, "learning_rate": 2.4787753806642088e-05, "loss": 0.3943, "step": 12596, "teacher_loss": 0.37014323472976685 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.352652370929718, "learning_rate": 2.4786032600089145e-05, "loss": 0.2082, "step": 12597, "teacher_loss": 0.1921531856060028 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.31507253646850586, "learning_rate": 2.4784311169171818e-05, "loss": 0.2205, "step": 12598, "teacher_loss": 0.21004696190357208 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.4715031385421753, "learning_rate": 2.478258951392957e-05, "loss": 0.2077, "step": 12599, "teacher_loss": 0.17837247252464294 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.49409180879592896, "learning_rate": 2.4780867634401875e-05, "loss": 0.2671, "step": 12600, "teacher_loss": 0.24185073375701904 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.2607385516166687, "learning_rate": 2.477914553062821e-05, "loss": 0.2311, "step": 12601, "teacher_loss": 0.22775794565677643 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.36936667561531067, "learning_rate": 2.4777423202648052e-05, "loss": 0.1558, "step": 12602, "teacher_loss": 0.13212105631828308 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.5796270370483398, "learning_rate": 2.4775700650500897e-05, "loss": 0.5298, "step": 12603, "teacher_loss": 0.5242799520492554 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.3918241858482361, "learning_rate": 2.4773977874226236e-05, "loss": 0.208, "step": 12604, "teacher_loss": 0.18760943412780762 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.20024292171001434, "learning_rate": 2.4772254873863564e-05, "loss": 0.1556, "step": 12605, "teacher_loss": 0.1506561040878296 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.40143290162086487, "learning_rate": 2.4770531649452393e-05, "loss": 0.2617, "step": 12606, "teacher_loss": 0.2462194263935089 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.11884262412786484, "learning_rate": 2.4768808201032222e-05, "loss": 0.1853, "step": 12607, "teacher_loss": 0.19266246259212494 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.42557787895202637, "learning_rate": 2.4767084528642564e-05, "loss": 0.2762, "step": 12608, "teacher_loss": 0.25965040922164917 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.6889470815658569, "learning_rate": 2.4765360632322942e-05, "loss": 0.5168, "step": 12609, "teacher_loss": 0.4976810812950134 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.47595643997192383, "learning_rate": 2.476363651211288e-05, "loss": 0.2868, "step": 12610, "teacher_loss": 0.2657453715801239 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.3744402527809143, "learning_rate": 2.4761912168051903e-05, "loss": 0.5431, "step": 12611, "teacher_loss": 0.5617977976799011 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.25561782717704773, "learning_rate": 2.4760187600179555e-05, "loss": 0.2317, "step": 12612, "teacher_loss": 0.2290431261062622 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.49452996253967285, "learning_rate": 2.475846280853536e-05, "loss": 0.2657, "step": 12613, "teacher_loss": 0.2402990460395813 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.13165296614170074, "learning_rate": 2.475673779315887e-05, "loss": 0.19, "step": 12614, "teacher_loss": 0.19649936258792877 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.4186062216758728, "learning_rate": 2.475501255408964e-05, "loss": 0.2468, "step": 12615, "teacher_loss": 0.22768718004226685 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.36524632573127747, "learning_rate": 2.4753287091367218e-05, "loss": 0.2488, "step": 12616, "teacher_loss": 0.23591215908527374 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.5232087969779968, "learning_rate": 2.475156140503116e-05, "loss": 0.7313, "step": 12617, "teacher_loss": 0.754426121711731 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.4180338680744171, "learning_rate": 2.4749835495121048e-05, "loss": 0.2435, "step": 12618, "teacher_loss": 0.22413134574890137 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.20741084218025208, "learning_rate": 2.4748109361676427e-05, "loss": 0.2252, "step": 12619, "teacher_loss": 0.22714349627494812 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.6563153862953186, "learning_rate": 2.4746383004736894e-05, "loss": 0.4134, "step": 12620, "teacher_loss": 0.38638395071029663 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.4777422547340393, "learning_rate": 2.474465642434201e-05, "loss": 0.2106, "step": 12621, "teacher_loss": 0.1809486448764801 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.37529456615448, "learning_rate": 2.4742929620531373e-05, "loss": 0.2244, "step": 12622, "teacher_loss": 0.2076016664505005 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.21721863746643066, "learning_rate": 2.4741202593344575e-05, "loss": 0.1945, "step": 12623, "teacher_loss": 0.19192591309547424 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.4583682417869568, "learning_rate": 2.4739475342821204e-05, "loss": 0.2591, "step": 12624, "teacher_loss": 0.23691397905349731 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.7615426182746887, "learning_rate": 2.4737747869000863e-05, "loss": 0.3782, "step": 12625, "teacher_loss": 0.335564523935318 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 1.04924476146698, "learning_rate": 2.473602017192316e-05, "loss": 0.3659, "step": 12626, "teacher_loss": 0.28997552394866943 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.19185186922550201, "learning_rate": 2.4734292251627704e-05, "loss": 0.2216, "step": 12627, "teacher_loss": 0.22491714358329773 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.18634033203125, "learning_rate": 2.4732564108154116e-05, "loss": 0.263, "step": 12628, "teacher_loss": 0.27149099111557007 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.9119465351104736, "learning_rate": 2.4730835741542007e-05, "loss": 0.2255, "step": 12629, "teacher_loss": 0.14918920397758484 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.23655898869037628, "learning_rate": 2.472910715183101e-05, "loss": 0.206, "step": 12630, "teacher_loss": 0.2026577889919281 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.7155637741088867, "learning_rate": 2.472737833906076e-05, "loss": 0.5321, "step": 12631, "teacher_loss": 0.51168292760849 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.832538366317749, "learning_rate": 2.4725649303270887e-05, "loss": 0.3579, "step": 12632, "teacher_loss": 0.30513080954551697 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.3827417492866516, "learning_rate": 2.4723920044501028e-05, "loss": 0.3668, "step": 12633, "teacher_loss": 0.3649759888648987 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.3006754517555237, "learning_rate": 2.4722190562790843e-05, "loss": 0.2357, "step": 12634, "teacher_loss": 0.22851170599460602 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.5746712684631348, "learning_rate": 2.4720460858179974e-05, "loss": 0.2796, "step": 12635, "teacher_loss": 0.24684450030326843 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.22356471419334412, "learning_rate": 2.4718730930708083e-05, "loss": 0.2389, "step": 12636, "teacher_loss": 0.24058832228183746 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.3699165880680084, "learning_rate": 2.471700078041483e-05, "loss": 0.2287, "step": 12637, "teacher_loss": 0.21297301352024078 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.5901683568954468, "learning_rate": 2.471527040733988e-05, "loss": 0.4873, "step": 12638, "teacher_loss": 0.47582483291625977 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.44878697395324707, "learning_rate": 2.4713539811522907e-05, "loss": 0.3, "step": 12639, "teacher_loss": 0.28348231315612793 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.7039507031440735, "learning_rate": 2.471180899300359e-05, "loss": 0.2676, "step": 12640, "teacher_loss": 0.2190612554550171 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.9100903868675232, "learning_rate": 2.4710077951821615e-05, "loss": 0.4385, "step": 12641, "teacher_loss": 0.38615643978118896 }, { "compression_loss": 0.0, "epoch": 2.28, "label_loss": 0.1566615104675293, "learning_rate": 2.4708346688016658e-05, "loss": 0.1491, "step": 12642, "teacher_loss": 0.148284912109375 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.4914247989654541, "learning_rate": 2.4706615201628423e-05, "loss": 0.2217, "step": 12643, "teacher_loss": 0.19177308678627014 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.33896830677986145, "learning_rate": 2.47048834926966e-05, "loss": 0.2658, "step": 12644, "teacher_loss": 0.2576187252998352 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.2465217411518097, "learning_rate": 2.47031515612609e-05, "loss": 0.1478, "step": 12645, "teacher_loss": 0.1367756426334381 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.8291858434677124, "learning_rate": 2.470141940736102e-05, "loss": 0.4457, "step": 12646, "teacher_loss": 0.40305519104003906 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.632556676864624, "learning_rate": 2.4699687031036684e-05, "loss": 0.3488, "step": 12647, "teacher_loss": 0.31732529401779175 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.4063131511211395, "learning_rate": 2.4697954432327602e-05, "loss": 0.2055, "step": 12648, "teacher_loss": 0.18320028483867645 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.44793960452079773, "learning_rate": 2.4696221611273503e-05, "loss": 0.3355, "step": 12649, "teacher_loss": 0.3230014741420746 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.31597214937210083, "learning_rate": 2.469448856791411e-05, "loss": 0.209, "step": 12650, "teacher_loss": 0.1970748007297516 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.26016974449157715, "learning_rate": 2.4692755302289168e-05, "loss": 0.2629, "step": 12651, "teacher_loss": 0.2631891369819641 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.65497225522995, "learning_rate": 2.46910218144384e-05, "loss": 0.268, "step": 12652, "teacher_loss": 0.22499778866767883 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.40610772371292114, "learning_rate": 2.4689288104401562e-05, "loss": 0.3379, "step": 12653, "teacher_loss": 0.33034467697143555 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.12130075693130493, "learning_rate": 2.4687554172218395e-05, "loss": 0.1984, "step": 12654, "teacher_loss": 0.20696593821048737 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.2080228626728058, "learning_rate": 2.4685820017928653e-05, "loss": 0.2071, "step": 12655, "teacher_loss": 0.20703697204589844 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.5996205806732178, "learning_rate": 2.46840856415721e-05, "loss": 0.3033, "step": 12656, "teacher_loss": 0.27038830518722534 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.32981282472610474, "learning_rate": 2.4682351043188498e-05, "loss": 0.1987, "step": 12657, "teacher_loss": 0.1841558814048767 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.7262628078460693, "learning_rate": 2.4680616222817617e-05, "loss": 0.4945, "step": 12658, "teacher_loss": 0.46879836916923523 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.558749258518219, "learning_rate": 2.4678881180499228e-05, "loss": 0.4088, "step": 12659, "teacher_loss": 0.39215636253356934 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.45009884238243103, "learning_rate": 2.467714591627311e-05, "loss": 0.2467, "step": 12660, "teacher_loss": 0.22408342361450195 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.17860107123851776, "learning_rate": 2.4675410430179053e-05, "loss": 0.2351, "step": 12661, "teacher_loss": 0.24140053987503052 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.33198362588882446, "learning_rate": 2.467367472225685e-05, "loss": 0.2607, "step": 12662, "teacher_loss": 0.2528004050254822 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.4819888770580292, "learning_rate": 2.4671938792546276e-05, "loss": 0.6162, "step": 12663, "teacher_loss": 0.6310882568359375 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.3727151155471802, "learning_rate": 2.467020264108715e-05, "loss": 0.2742, "step": 12664, "teacher_loss": 0.2632971405982971 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 1.2337837219238281, "learning_rate": 2.466846626791927e-05, "loss": 0.5593, "step": 12665, "teacher_loss": 0.48441237211227417 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.23402002453804016, "learning_rate": 2.4666729673082444e-05, "loss": 0.2933, "step": 12666, "teacher_loss": 0.299884170293808 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.13813212513923645, "learning_rate": 2.466499285661649e-05, "loss": 0.1937, "step": 12667, "teacher_loss": 0.1998957097530365 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.5632753372192383, "learning_rate": 2.466325581856123e-05, "loss": 0.3686, "step": 12668, "teacher_loss": 0.3469313383102417 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.49237850308418274, "learning_rate": 2.466151855895648e-05, "loss": 0.295, "step": 12669, "teacher_loss": 0.27306854724884033 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.6426405310630798, "learning_rate": 2.465978107784208e-05, "loss": 0.2057, "step": 12670, "teacher_loss": 0.15719977021217346 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.4500117301940918, "learning_rate": 2.465804337525786e-05, "loss": 0.2699, "step": 12671, "teacher_loss": 0.24993078410625458 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.6109174489974976, "learning_rate": 2.4656305451243664e-05, "loss": 0.2414, "step": 12672, "teacher_loss": 0.20038671791553497 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.5285136699676514, "learning_rate": 2.4654567305839334e-05, "loss": 0.242, "step": 12673, "teacher_loss": 0.21015599370002747 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.4114764332771301, "learning_rate": 2.465282893908472e-05, "loss": 0.3416, "step": 12674, "teacher_loss": 0.33384984731674194 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.5111281871795654, "learning_rate": 2.4651090351019684e-05, "loss": 0.5183, "step": 12675, "teacher_loss": 0.5191189050674438 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.9804028272628784, "learning_rate": 2.4649351541684076e-05, "loss": 0.5613, "step": 12676, "teacher_loss": 0.5147008895874023 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.8732770085334778, "learning_rate": 2.464761251111777e-05, "loss": 0.6973, "step": 12677, "teacher_loss": 0.677788496017456 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.5299378633499146, "learning_rate": 2.4645873259360635e-05, "loss": 0.2369, "step": 12678, "teacher_loss": 0.20437392592430115 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.3381912112236023, "learning_rate": 2.464413378645254e-05, "loss": 0.337, "step": 12679, "teacher_loss": 0.3368957042694092 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.4966122508049011, "learning_rate": 2.464239409243338e-05, "loss": 0.2339, "step": 12680, "teacher_loss": 0.20476022362709045 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.2683950662612915, "learning_rate": 2.4640654177343028e-05, "loss": 0.2275, "step": 12681, "teacher_loss": 0.2229967713356018 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.3283715844154358, "learning_rate": 2.4638914041221384e-05, "loss": 0.3041, "step": 12682, "teacher_loss": 0.3014216125011444 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.5279498100280762, "learning_rate": 2.4637173684108333e-05, "loss": 0.3553, "step": 12683, "teacher_loss": 0.3360982835292816 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.20253853499889374, "learning_rate": 2.4635433106043787e-05, "loss": 0.2606, "step": 12684, "teacher_loss": 0.266999751329422 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.6865276098251343, "learning_rate": 2.4633692307067654e-05, "loss": 0.3736, "step": 12685, "teacher_loss": 0.3388213515281677 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.500142514705658, "learning_rate": 2.4631951287219833e-05, "loss": 0.2231, "step": 12686, "teacher_loss": 0.19230876863002777 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.4036378860473633, "learning_rate": 2.4630210046540246e-05, "loss": 0.26, "step": 12687, "teacher_loss": 0.24406495690345764 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.8144952058792114, "learning_rate": 2.462846858506882e-05, "loss": 0.3633, "step": 12688, "teacher_loss": 0.313166081905365 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.41987690329551697, "learning_rate": 2.4626726902845477e-05, "loss": 0.2733, "step": 12689, "teacher_loss": 0.25701674818992615 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.3125081956386566, "learning_rate": 2.462498499991014e-05, "loss": 0.1773, "step": 12690, "teacher_loss": 0.16227251291275024 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.16725917160511017, "learning_rate": 2.4623242876302764e-05, "loss": 0.1751, "step": 12691, "teacher_loss": 0.17591989040374756 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.3306730389595032, "learning_rate": 2.462150053206328e-05, "loss": 0.1988, "step": 12692, "teacher_loss": 0.1841772198677063 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.6826362609863281, "learning_rate": 2.4619757967231632e-05, "loss": 0.3099, "step": 12693, "teacher_loss": 0.26851630210876465 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.3717074394226074, "learning_rate": 2.461801518184778e-05, "loss": 0.2559, "step": 12694, "teacher_loss": 0.24304817616939545 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.7762868404388428, "learning_rate": 2.461627217595167e-05, "loss": 0.3276, "step": 12695, "teacher_loss": 0.277798056602478 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.501883864402771, "learning_rate": 2.4614528949583273e-05, "loss": 0.2252, "step": 12696, "teacher_loss": 0.19446077942848206 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.7247898578643799, "learning_rate": 2.4612785502782554e-05, "loss": 0.2129, "step": 12697, "teacher_loss": 0.1560727059841156 }, { "compression_loss": 0.0, "epoch": 2.29, "label_loss": 0.6145369410514832, "learning_rate": 2.4611041835589482e-05, "loss": 0.3189, "step": 12698, "teacher_loss": 0.28607177734375 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.6533224582672119, "learning_rate": 2.4609297948044035e-05, "loss": 0.2296, "step": 12699, "teacher_loss": 0.18252351880073547 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.5860744714736938, "learning_rate": 2.46075538401862e-05, "loss": 0.2772, "step": 12700, "teacher_loss": 0.2428891509771347 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.32241320610046387, "learning_rate": 2.4605809512055956e-05, "loss": 0.1683, "step": 12701, "teacher_loss": 0.15118065476417542 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.4126654267311096, "learning_rate": 2.4604064963693303e-05, "loss": 0.3635, "step": 12702, "teacher_loss": 0.35798850655555725 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.49540072679519653, "learning_rate": 2.4602320195138232e-05, "loss": 0.3843, "step": 12703, "teacher_loss": 0.3719090223312378 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.3412644863128662, "learning_rate": 2.4600575206430753e-05, "loss": 0.3166, "step": 12704, "teacher_loss": 0.31381258368492126 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.6605757474899292, "learning_rate": 2.4598829997610867e-05, "loss": 0.2652, "step": 12705, "teacher_loss": 0.22126471996307373 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.41099756956100464, "learning_rate": 2.4597084568718583e-05, "loss": 0.2606, "step": 12706, "teacher_loss": 0.24391400814056396 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.4479926824569702, "learning_rate": 2.459533891979393e-05, "loss": 0.3547, "step": 12707, "teacher_loss": 0.3442840576171875 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.711146354675293, "learning_rate": 2.4593593050876924e-05, "loss": 0.3534, "step": 12708, "teacher_loss": 0.3136245012283325 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.36635780334472656, "learning_rate": 2.4591846962007583e-05, "loss": 0.2239, "step": 12709, "teacher_loss": 0.2080240696668625 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.5478852987289429, "learning_rate": 2.4590100653225958e-05, "loss": 0.2378, "step": 12710, "teacher_loss": 0.203330397605896 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.8523598909378052, "learning_rate": 2.4588354124572076e-05, "loss": 0.5593, "step": 12711, "teacher_loss": 0.5267688632011414 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.2741791307926178, "learning_rate": 2.458660737608598e-05, "loss": 0.2045, "step": 12712, "teacher_loss": 0.19675593078136444 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.14311210811138153, "learning_rate": 2.458486040780772e-05, "loss": 0.1528, "step": 12713, "teacher_loss": 0.1538282036781311 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.6791624426841736, "learning_rate": 2.4583113219777346e-05, "loss": 0.3526, "step": 12714, "teacher_loss": 0.316366970539093 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.3931400179862976, "learning_rate": 2.4581365812034917e-05, "loss": 0.204, "step": 12715, "teacher_loss": 0.18297532200813293 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.28954386711120605, "learning_rate": 2.45796181846205e-05, "loss": 0.2521, "step": 12716, "teacher_loss": 0.24791626632213593 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.2832193970680237, "learning_rate": 2.4577870337574158e-05, "loss": 0.2633, "step": 12717, "teacher_loss": 0.2611054480075836 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.41847631335258484, "learning_rate": 2.457612227093596e-05, "loss": 0.2211, "step": 12718, "teacher_loss": 0.1992010772228241 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.48299989104270935, "learning_rate": 2.4574373984745996e-05, "loss": 0.2396, "step": 12719, "teacher_loss": 0.21255764365196228 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.5592890977859497, "learning_rate": 2.457262547904434e-05, "loss": 0.3456, "step": 12720, "teacher_loss": 0.3218947649002075 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.3585044741630554, "learning_rate": 2.4570876753871083e-05, "loss": 0.3714, "step": 12721, "teacher_loss": 0.3728066086769104 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.4857742190361023, "learning_rate": 2.4569127809266315e-05, "loss": 0.2593, "step": 12722, "teacher_loss": 0.23412850499153137 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.3344912827014923, "learning_rate": 2.4567378645270134e-05, "loss": 0.4273, "step": 12723, "teacher_loss": 0.43757182359695435 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.7104486227035522, "learning_rate": 2.456562926192265e-05, "loss": 0.2779, "step": 12724, "teacher_loss": 0.22980816662311554 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.26461973786354065, "learning_rate": 2.4563879659263964e-05, "loss": 0.2036, "step": 12725, "teacher_loss": 0.19678860902786255 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.2016523778438568, "learning_rate": 2.4562129837334192e-05, "loss": 0.2153, "step": 12726, "teacher_loss": 0.2168487012386322 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.3421795070171356, "learning_rate": 2.4560379796173452e-05, "loss": 0.3832, "step": 12727, "teacher_loss": 0.38770729303359985 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.5051214694976807, "learning_rate": 2.4558629535821863e-05, "loss": 0.2918, "step": 12728, "teacher_loss": 0.26808029413223267 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.2910989820957184, "learning_rate": 2.4556879056319557e-05, "loss": 0.2564, "step": 12729, "teacher_loss": 0.252596914768219 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.2521718442440033, "learning_rate": 2.455512835770667e-05, "loss": 0.1889, "step": 12730, "teacher_loss": 0.18191729485988617 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.9507643580436707, "learning_rate": 2.455337744002334e-05, "loss": 0.4776, "step": 12731, "teacher_loss": 0.4250204563140869 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 1.057058334350586, "learning_rate": 2.4551626303309703e-05, "loss": 0.3406, "step": 12732, "teacher_loss": 0.2610388398170471 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.8102385997772217, "learning_rate": 2.4549874947605915e-05, "loss": 0.2968, "step": 12733, "teacher_loss": 0.23970946669578552 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.30617085099220276, "learning_rate": 2.4548123372952126e-05, "loss": 0.2129, "step": 12734, "teacher_loss": 0.20250718295574188 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.5361084342002869, "learning_rate": 2.4546371579388496e-05, "loss": 0.3743, "step": 12735, "teacher_loss": 0.35636162757873535 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.49368131160736084, "learning_rate": 2.4544619566955185e-05, "loss": 0.3168, "step": 12736, "teacher_loss": 0.29718470573425293 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.3797915279865265, "learning_rate": 2.4542867335692362e-05, "loss": 0.1948, "step": 12737, "teacher_loss": 0.17422893643379211 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.7465950846672058, "learning_rate": 2.4541114885640207e-05, "loss": 0.572, "step": 12738, "teacher_loss": 0.5526261329650879 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.2563946843147278, "learning_rate": 2.4539362216838893e-05, "loss": 0.2088, "step": 12739, "teacher_loss": 0.2034914195537567 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.5228832364082336, "learning_rate": 2.4537609329328597e-05, "loss": 0.2076, "step": 12740, "teacher_loss": 0.17252935469150543 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.667177677154541, "learning_rate": 2.4535856223149525e-05, "loss": 0.366, "step": 12741, "teacher_loss": 0.33253538608551025 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.5715077519416809, "learning_rate": 2.453410289834185e-05, "loss": 0.2295, "step": 12742, "teacher_loss": 0.19145861268043518 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.41520828008651733, "learning_rate": 2.453234935494578e-05, "loss": 0.2576, "step": 12743, "teacher_loss": 0.24007537961006165 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.5495928525924683, "learning_rate": 2.4530595593001527e-05, "loss": 0.3345, "step": 12744, "teacher_loss": 0.3105456233024597 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.7105432152748108, "learning_rate": 2.4528841612549284e-05, "loss": 0.4395, "step": 12745, "teacher_loss": 0.40933847427368164 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.2253831923007965, "learning_rate": 2.452708741362928e-05, "loss": 0.346, "step": 12746, "teacher_loss": 0.3594166040420532 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.696350634098053, "learning_rate": 2.4525332996281716e-05, "loss": 0.257, "step": 12747, "teacher_loss": 0.20816929638385773 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.2134370505809784, "learning_rate": 2.4523578360546828e-05, "loss": 0.3286, "step": 12748, "teacher_loss": 0.34137117862701416 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.5214452743530273, "learning_rate": 2.4521823506464844e-05, "loss": 0.3039, "step": 12749, "teacher_loss": 0.2797281742095947 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.5118767023086548, "learning_rate": 2.4520068434075995e-05, "loss": 0.3028, "step": 12750, "teacher_loss": 0.27952322363853455 }, { "epoch": 2.3, "eval_exact_match": 79.70671712393566, "eval_f1": 86.9584835966808, "step": 12750 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.8415837287902832, "learning_rate": 2.4518313143420514e-05, "loss": 0.3539, "step": 12751, "teacher_loss": 0.29970085620880127 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.3951093256473541, "learning_rate": 2.4516557634538657e-05, "loss": 0.2491, "step": 12752, "teacher_loss": 0.2329024374485016 }, { "compression_loss": 0.0, "epoch": 2.3, "label_loss": 0.4233248829841614, "learning_rate": 2.451480190747066e-05, "loss": 0.179, "step": 12753, "teacher_loss": 0.1518113911151886 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.626502275466919, "learning_rate": 2.4513045962256788e-05, "loss": 0.2675, "step": 12754, "teacher_loss": 0.22757482528686523 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.46928292512893677, "learning_rate": 2.4511289798937285e-05, "loss": 0.3684, "step": 12755, "teacher_loss": 0.35724061727523804 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.48779958486557007, "learning_rate": 2.450953341755243e-05, "loss": 0.3318, "step": 12756, "teacher_loss": 0.3144574761390686 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.5141881704330444, "learning_rate": 2.4507776818142483e-05, "loss": 0.2522, "step": 12757, "teacher_loss": 0.2230379581451416 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.4260299801826477, "learning_rate": 2.4506020000747717e-05, "loss": 0.2063, "step": 12758, "teacher_loss": 0.181864395737648 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.41852647066116333, "learning_rate": 2.4504262965408415e-05, "loss": 0.2698, "step": 12759, "teacher_loss": 0.2532217502593994 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.2920951843261719, "learning_rate": 2.4502505712164863e-05, "loss": 0.2895, "step": 12760, "teacher_loss": 0.28918135166168213 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.3665492534637451, "learning_rate": 2.450074824105734e-05, "loss": 0.1975, "step": 12761, "teacher_loss": 0.17876750230789185 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.5575348734855652, "learning_rate": 2.4498990552126144e-05, "loss": 0.2254, "step": 12762, "teacher_loss": 0.18852347135543823 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.6259362697601318, "learning_rate": 2.4497232645411575e-05, "loss": 0.5717, "step": 12763, "teacher_loss": 0.5657031536102295 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.35016322135925293, "learning_rate": 2.4495474520953938e-05, "loss": 0.2699, "step": 12764, "teacher_loss": 0.26096174120903015 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.6094595193862915, "learning_rate": 2.4493716178793537e-05, "loss": 0.5837, "step": 12765, "teacher_loss": 0.5808587074279785 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.5083569288253784, "learning_rate": 2.449195761897069e-05, "loss": 0.2551, "step": 12766, "teacher_loss": 0.22692260146141052 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.4897061586380005, "learning_rate": 2.4490198841525708e-05, "loss": 0.2157, "step": 12767, "teacher_loss": 0.18526628613471985 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.18948040902614594, "learning_rate": 2.4488439846498924e-05, "loss": 0.2087, "step": 12768, "teacher_loss": 0.21088364720344543 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.47084784507751465, "learning_rate": 2.4486680633930658e-05, "loss": 0.279, "step": 12769, "teacher_loss": 0.25768622756004333 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.4271124005317688, "learning_rate": 2.448492120386125e-05, "loss": 0.2873, "step": 12770, "teacher_loss": 0.2717765271663666 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.5003107190132141, "learning_rate": 2.448316155633104e-05, "loss": 0.3744, "step": 12771, "teacher_loss": 0.3604600131511688 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.35192644596099854, "learning_rate": 2.4481401691380362e-05, "loss": 0.3499, "step": 12772, "teacher_loss": 0.3497084379196167 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.40583306550979614, "learning_rate": 2.447964160904957e-05, "loss": 0.2442, "step": 12773, "teacher_loss": 0.22625015676021576 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.4257432818412781, "learning_rate": 2.4477881309379024e-05, "loss": 0.2118, "step": 12774, "teacher_loss": 0.18802779912948608 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.7121524214744568, "learning_rate": 2.447612079240907e-05, "loss": 0.3702, "step": 12775, "teacher_loss": 0.33223316073417664 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.4624972343444824, "learning_rate": 2.447436005818008e-05, "loss": 0.2369, "step": 12776, "teacher_loss": 0.21184581518173218 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.34011220932006836, "learning_rate": 2.4472599106732413e-05, "loss": 0.2076, "step": 12777, "teacher_loss": 0.19285979866981506 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.5103950500488281, "learning_rate": 2.4470837938106452e-05, "loss": 0.2585, "step": 12778, "teacher_loss": 0.23056337237358093 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.4049499034881592, "learning_rate": 2.4469076552342575e-05, "loss": 0.2973, "step": 12779, "teacher_loss": 0.2852926552295685 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.3633689284324646, "learning_rate": 2.4467314949481162e-05, "loss": 0.2364, "step": 12780, "teacher_loss": 0.2223348617553711 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.6356157064437866, "learning_rate": 2.44655531295626e-05, "loss": 0.3848, "step": 12781, "teacher_loss": 0.356976181268692 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.50611811876297, "learning_rate": 2.4463791092627284e-05, "loss": 0.2772, "step": 12782, "teacher_loss": 0.2517325282096863 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.5781041383743286, "learning_rate": 2.4462028838715613e-05, "loss": 0.2358, "step": 12783, "teacher_loss": 0.1977129578590393 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.3778180480003357, "learning_rate": 2.446026636786799e-05, "loss": 0.201, "step": 12784, "teacher_loss": 0.18131864070892334 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.39219459891319275, "learning_rate": 2.4458503680124818e-05, "loss": 0.4406, "step": 12785, "teacher_loss": 0.4460013806819916 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.22727757692337036, "learning_rate": 2.4456740775526523e-05, "loss": 0.194, "step": 12786, "teacher_loss": 0.19034844636917114 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.4749402403831482, "learning_rate": 2.4454977654113512e-05, "loss": 0.2972, "step": 12787, "teacher_loss": 0.2774598002433777 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.6500797271728516, "learning_rate": 2.4453214315926203e-05, "loss": 0.5804, "step": 12788, "teacher_loss": 0.5726256370544434 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.17040009796619415, "learning_rate": 2.445145076100504e-05, "loss": 0.27, "step": 12789, "teacher_loss": 0.28105106949806213 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.6615462303161621, "learning_rate": 2.4449686989390447e-05, "loss": 0.2726, "step": 12790, "teacher_loss": 0.22943192720413208 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.7984317541122437, "learning_rate": 2.444792300112286e-05, "loss": 0.7198, "step": 12791, "teacher_loss": 0.7110674381256104 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.6699482202529907, "learning_rate": 2.444615879624273e-05, "loss": 0.3182, "step": 12792, "teacher_loss": 0.2791202664375305 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.3365625739097595, "learning_rate": 2.4444394374790493e-05, "loss": 0.2111, "step": 12793, "teacher_loss": 0.19713972508907318 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.6909478902816772, "learning_rate": 2.4442629736806613e-05, "loss": 0.239, "step": 12794, "teacher_loss": 0.18882140517234802 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.2484227865934372, "learning_rate": 2.4440864882331544e-05, "loss": 0.2787, "step": 12795, "teacher_loss": 0.28203296661376953 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.8718396425247192, "learning_rate": 2.4439099811405747e-05, "loss": 0.5807, "step": 12796, "teacher_loss": 0.5483235716819763 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.33335673809051514, "learning_rate": 2.4437334524069693e-05, "loss": 0.2481, "step": 12797, "teacher_loss": 0.23857778310775757 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.34471312165260315, "learning_rate": 2.443556902036385e-05, "loss": 0.2114, "step": 12798, "teacher_loss": 0.19653251767158508 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.5021098852157593, "learning_rate": 2.44338033003287e-05, "loss": 0.2408, "step": 12799, "teacher_loss": 0.21180084347724915 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.43334415555000305, "learning_rate": 2.4432037364004725e-05, "loss": 0.4199, "step": 12800, "teacher_loss": 0.4184553623199463 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.34729498624801636, "learning_rate": 2.4430271211432413e-05, "loss": 0.2348, "step": 12801, "teacher_loss": 0.22225135564804077 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.4381135404109955, "learning_rate": 2.442850484265225e-05, "loss": 0.2947, "step": 12802, "teacher_loss": 0.2787514626979828 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.5590059757232666, "learning_rate": 2.4426738257704745e-05, "loss": 0.2537, "step": 12803, "teacher_loss": 0.21981194615364075 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.1918146014213562, "learning_rate": 2.4424971456630395e-05, "loss": 0.2044, "step": 12804, "teacher_loss": 0.2057977318763733 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.5542712807655334, "learning_rate": 2.4423204439469703e-05, "loss": 0.3578, "step": 12805, "teacher_loss": 0.3360062539577484 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.38596487045288086, "learning_rate": 2.442143720626319e-05, "loss": 0.2367, "step": 12806, "teacher_loss": 0.2201429009437561 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.366477370262146, "learning_rate": 2.441966975705136e-05, "loss": 0.3205, "step": 12807, "teacher_loss": 0.315398633480072 }, { "compression_loss": 0.0, "epoch": 2.31, "label_loss": 0.5178214311599731, "learning_rate": 2.441790209187475e-05, "loss": 0.4788, "step": 12808, "teacher_loss": 0.47442692518234253 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.1455693542957306, "learning_rate": 2.4416134210773884e-05, "loss": 0.2032, "step": 12809, "teacher_loss": 0.2095700055360794 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.4795738756656647, "learning_rate": 2.441436611378929e-05, "loss": 0.3943, "step": 12810, "teacher_loss": 0.38480839133262634 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.12746132910251617, "learning_rate": 2.4412597800961506e-05, "loss": 0.1341, "step": 12811, "teacher_loss": 0.13479305803775787 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.5107969045639038, "learning_rate": 2.4410829272331073e-05, "loss": 0.2647, "step": 12812, "teacher_loss": 0.23736470937728882 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.5077449083328247, "learning_rate": 2.440906052793854e-05, "loss": 0.234, "step": 12813, "teacher_loss": 0.20358508825302124 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.28745418787002563, "learning_rate": 2.4407291567824462e-05, "loss": 0.219, "step": 12814, "teacher_loss": 0.2114378660917282 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.756926417350769, "learning_rate": 2.440552239202939e-05, "loss": 0.3529, "step": 12815, "teacher_loss": 0.30806252360343933 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.2376304715871811, "learning_rate": 2.4403753000593892e-05, "loss": 0.1985, "step": 12816, "teacher_loss": 0.19418640434741974 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.35985973477363586, "learning_rate": 2.440198339355853e-05, "loss": 0.3249, "step": 12817, "teacher_loss": 0.32096678018569946 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.5735594034194946, "learning_rate": 2.440021357096388e-05, "loss": 0.2514, "step": 12818, "teacher_loss": 0.21555694937705994 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.40221551060676575, "learning_rate": 2.4398443532850512e-05, "loss": 0.2101, "step": 12819, "teacher_loss": 0.1887645721435547 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.5203587412834167, "learning_rate": 2.439667327925901e-05, "loss": 0.2435, "step": 12820, "teacher_loss": 0.21272984147071838 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.3704550266265869, "learning_rate": 2.4394902810229967e-05, "loss": 0.3303, "step": 12821, "teacher_loss": 0.32578545808792114 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.3017027676105499, "learning_rate": 2.4393132125803965e-05, "loss": 0.2913, "step": 12822, "teacher_loss": 0.290158748626709 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.46001148223876953, "learning_rate": 2.4391361226021613e-05, "loss": 0.3411, "step": 12823, "teacher_loss": 0.32784056663513184 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.7779672741889954, "learning_rate": 2.4389590110923498e-05, "loss": 0.2237, "step": 12824, "teacher_loss": 0.16211147606372833 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.8369835615158081, "learning_rate": 2.4387818780550236e-05, "loss": 0.3391, "step": 12825, "teacher_loss": 0.2838076949119568 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.5473663210868835, "learning_rate": 2.438604723494244e-05, "loss": 0.2696, "step": 12826, "teacher_loss": 0.23878192901611328 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.2935411334037781, "learning_rate": 2.438427547414071e-05, "loss": 0.1478, "step": 12827, "teacher_loss": 0.13162542879581451 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.5983997583389282, "learning_rate": 2.438250349818569e-05, "loss": 0.2661, "step": 12828, "teacher_loss": 0.2292214334011078 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.6560078263282776, "learning_rate": 2.438073130711799e-05, "loss": 0.293, "step": 12829, "teacher_loss": 0.2526160478591919 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.4568673372268677, "learning_rate": 2.4378958900978246e-05, "loss": 0.49, "step": 12830, "teacher_loss": 0.493631511926651 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.43570154905319214, "learning_rate": 2.4377186279807098e-05, "loss": 0.2768, "step": 12831, "teacher_loss": 0.2591586410999298 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.4226142168045044, "learning_rate": 2.437541344364518e-05, "loss": 0.2951, "step": 12832, "teacher_loss": 0.2809029519557953 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.6435673236846924, "learning_rate": 2.4373640392533136e-05, "loss": 0.234, "step": 12833, "teacher_loss": 0.18852336704730988 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.4074741005897522, "learning_rate": 2.4371867126511627e-05, "loss": 0.2719, "step": 12834, "teacher_loss": 0.2568800449371338 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.28506845235824585, "learning_rate": 2.4370093645621306e-05, "loss": 0.2323, "step": 12835, "teacher_loss": 0.22639970481395721 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.6510648727416992, "learning_rate": 2.4368319949902826e-05, "loss": 0.2479, "step": 12836, "teacher_loss": 0.20315083861351013 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.593468964099884, "learning_rate": 2.4366546039396858e-05, "loss": 0.3265, "step": 12837, "teacher_loss": 0.2967977523803711 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.553874671459198, "learning_rate": 2.436477191414407e-05, "loss": 0.3117, "step": 12838, "teacher_loss": 0.2848353385925293 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.8633517026901245, "learning_rate": 2.436299757418514e-05, "loss": 0.3394, "step": 12839, "teacher_loss": 0.2811833620071411 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.384809672832489, "learning_rate": 2.4361223019560748e-05, "loss": 0.2539, "step": 12840, "teacher_loss": 0.23934416472911835 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.3511003255844116, "learning_rate": 2.435944825031158e-05, "loss": 0.2505, "step": 12841, "teacher_loss": 0.23937579989433289 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.664929986000061, "learning_rate": 2.4357673266478325e-05, "loss": 0.3503, "step": 12842, "teacher_loss": 0.31534427404403687 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.4717535376548767, "learning_rate": 2.4355898068101676e-05, "loss": 0.3487, "step": 12843, "teacher_loss": 0.33506864309310913 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.47175133228302, "learning_rate": 2.435412265522233e-05, "loss": 0.2879, "step": 12844, "teacher_loss": 0.26751506328582764 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.5003417730331421, "learning_rate": 2.4352347027881003e-05, "loss": 0.495, "step": 12845, "teacher_loss": 0.49436646699905396 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.9326711893081665, "learning_rate": 2.4350571186118398e-05, "loss": 0.289, "step": 12846, "teacher_loss": 0.21743376553058624 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.6477702856063843, "learning_rate": 2.4348795129975226e-05, "loss": 0.2612, "step": 12847, "teacher_loss": 0.21828335523605347 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.7703279256820679, "learning_rate": 2.4347018859492218e-05, "loss": 0.3405, "step": 12848, "teacher_loss": 0.2926962077617645 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.29670923948287964, "learning_rate": 2.4345242374710086e-05, "loss": 0.1722, "step": 12849, "teacher_loss": 0.1583748459815979 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.32970547676086426, "learning_rate": 2.4343465675669565e-05, "loss": 0.2327, "step": 12850, "teacher_loss": 0.22187373042106628 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.37952157855033875, "learning_rate": 2.434168876241139e-05, "loss": 0.3122, "step": 12851, "teacher_loss": 0.30476221442222595 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.6123507022857666, "learning_rate": 2.4339911634976298e-05, "loss": 0.2425, "step": 12852, "teacher_loss": 0.20138241350650787 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.9636555910110474, "learning_rate": 2.433813429340504e-05, "loss": 0.4365, "step": 12853, "teacher_loss": 0.3779287338256836 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.3442821800708771, "learning_rate": 2.4336356737738354e-05, "loss": 0.2589, "step": 12854, "teacher_loss": 0.24940142035484314 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.5900931358337402, "learning_rate": 2.4334578968017003e-05, "loss": 0.2551, "step": 12855, "teacher_loss": 0.21786624193191528 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.5348082184791565, "learning_rate": 2.4332800984281743e-05, "loss": 0.2973, "step": 12856, "teacher_loss": 0.27087366580963135 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.4526482820510864, "learning_rate": 2.4331022786573336e-05, "loss": 0.2881, "step": 12857, "teacher_loss": 0.2698673605918884 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.9048672914505005, "learning_rate": 2.4329244374932552e-05, "loss": 0.3537, "step": 12858, "teacher_loss": 0.29248857498168945 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.6123314499855042, "learning_rate": 2.4327465749400167e-05, "loss": 0.292, "step": 12859, "teacher_loss": 0.25638720393180847 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.16980476677417755, "learning_rate": 2.4325686910016957e-05, "loss": 0.2306, "step": 12860, "teacher_loss": 0.23734743893146515 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.3036035895347595, "learning_rate": 2.4323907856823705e-05, "loss": 0.2395, "step": 12861, "teacher_loss": 0.2323692888021469 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.7467605471611023, "learning_rate": 2.4322128589861204e-05, "loss": 0.4394, "step": 12862, "teacher_loss": 0.40528684854507446 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.4392651319503784, "learning_rate": 2.432034910917024e-05, "loss": 0.2473, "step": 12863, "teacher_loss": 0.22596198320388794 }, { "compression_loss": 0.0, "epoch": 2.32, "label_loss": 0.2262078821659088, "learning_rate": 2.4318569414791614e-05, "loss": 0.2982, "step": 12864, "teacher_loss": 0.3061489164829254 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.5485791563987732, "learning_rate": 2.431678950676613e-05, "loss": 0.4582, "step": 12865, "teacher_loss": 0.44810742139816284 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4318222999572754, "learning_rate": 2.4315009385134597e-05, "loss": 0.3681, "step": 12866, "teacher_loss": 0.3609977960586548 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.3778802752494812, "learning_rate": 2.4313229049937826e-05, "loss": 0.2738, "step": 12867, "teacher_loss": 0.26223719120025635 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.32510340213775635, "learning_rate": 2.4311448501216636e-05, "loss": 0.2605, "step": 12868, "teacher_loss": 0.2533513903617859 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.6053298115730286, "learning_rate": 2.430966773901185e-05, "loss": 0.2939, "step": 12869, "teacher_loss": 0.2592756748199463 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4363131821155548, "learning_rate": 2.4307886763364293e-05, "loss": 0.271, "step": 12870, "teacher_loss": 0.2526601254940033 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.6778519153594971, "learning_rate": 2.4306105574314797e-05, "loss": 0.3893, "step": 12871, "teacher_loss": 0.35726216435432434 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.5926095247268677, "learning_rate": 2.4304324171904208e-05, "loss": 0.292, "step": 12872, "teacher_loss": 0.25857964158058167 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.6763759851455688, "learning_rate": 2.4302542556173358e-05, "loss": 0.5491, "step": 12873, "teacher_loss": 0.5350015163421631 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.46671485900878906, "learning_rate": 2.4300760727163094e-05, "loss": 0.2788, "step": 12874, "teacher_loss": 0.25795796513557434 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.22786307334899902, "learning_rate": 2.4298978684914275e-05, "loss": 0.1963, "step": 12875, "teacher_loss": 0.19277650117874146 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.6583261489868164, "learning_rate": 2.4297196429467755e-05, "loss": 0.3046, "step": 12876, "teacher_loss": 0.2652910649776459 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.47161003947257996, "learning_rate": 2.4295413960864397e-05, "loss": 0.3226, "step": 12877, "teacher_loss": 0.3059903383255005 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 1.1870452165603638, "learning_rate": 2.429363127914506e-05, "loss": 1.0165, "step": 12878, "teacher_loss": 0.9975295066833496 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.43254750967025757, "learning_rate": 2.4291848384350627e-05, "loss": 0.3582, "step": 12879, "teacher_loss": 0.3499205708503723 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4510774612426758, "learning_rate": 2.4290065276521967e-05, "loss": 0.2167, "step": 12880, "teacher_loss": 0.19068217277526855 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.19737738370895386, "learning_rate": 2.4288281955699963e-05, "loss": 0.2152, "step": 12881, "teacher_loss": 0.2171318233013153 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4947117567062378, "learning_rate": 2.42864984219255e-05, "loss": 0.2662, "step": 12882, "teacher_loss": 0.24084588885307312 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4688015580177307, "learning_rate": 2.4284714675239476e-05, "loss": 0.3399, "step": 12883, "teacher_loss": 0.325562447309494 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.7933242321014404, "learning_rate": 2.428293071568278e-05, "loss": 0.3408, "step": 12884, "teacher_loss": 0.2905702292919159 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.7716907858848572, "learning_rate": 2.428114654329631e-05, "loss": 0.286, "step": 12885, "teacher_loss": 0.23205523192882538 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.3969205617904663, "learning_rate": 2.4279362158120976e-05, "loss": 0.3129, "step": 12886, "teacher_loss": 0.3036167025566101 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.3845710754394531, "learning_rate": 2.427757756019769e-05, "loss": 0.43, "step": 12887, "teacher_loss": 0.43506473302841187 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.3012787401676178, "learning_rate": 2.427579274956737e-05, "loss": 0.1644, "step": 12888, "teacher_loss": 0.14919152855873108 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.5357356071472168, "learning_rate": 2.427400772627093e-05, "loss": 0.2243, "step": 12889, "teacher_loss": 0.1897270530462265 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.2902115285396576, "learning_rate": 2.4272222490349296e-05, "loss": 0.2089, "step": 12890, "teacher_loss": 0.19987821578979492 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.677614688873291, "learning_rate": 2.4270437041843402e-05, "loss": 0.5213, "step": 12891, "teacher_loss": 0.5039756298065186 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.342756986618042, "learning_rate": 2.426865138079418e-05, "loss": 0.1997, "step": 12892, "teacher_loss": 0.18376266956329346 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.3064303398132324, "learning_rate": 2.4266865507242567e-05, "loss": 0.2505, "step": 12893, "teacher_loss": 0.2442825436592102 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.6494128704071045, "learning_rate": 2.426507942122952e-05, "loss": 0.3287, "step": 12894, "teacher_loss": 0.29306161403656006 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.7751800417900085, "learning_rate": 2.4263293122795975e-05, "loss": 0.2899, "step": 12895, "teacher_loss": 0.2360188364982605 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.19788327813148499, "learning_rate": 2.426150661198289e-05, "loss": 0.2246, "step": 12896, "teacher_loss": 0.22758856415748596 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.5023260712623596, "learning_rate": 2.425971988883123e-05, "loss": 0.3587, "step": 12897, "teacher_loss": 0.3427194654941559 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.34856122732162476, "learning_rate": 2.425793295338195e-05, "loss": 0.2399, "step": 12898, "teacher_loss": 0.2278183251619339 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.3226652443408966, "learning_rate": 2.4256145805676028e-05, "loss": 0.2796, "step": 12899, "teacher_loss": 0.27479395270347595 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4358535408973694, "learning_rate": 2.425435844575443e-05, "loss": 0.2926, "step": 12900, "teacher_loss": 0.27668309211730957 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.3327135741710663, "learning_rate": 2.4252570873658147e-05, "loss": 0.1885, "step": 12901, "teacher_loss": 0.1724540889263153 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.5379756093025208, "learning_rate": 2.425078308942815e-05, "loss": 0.2289, "step": 12902, "teacher_loss": 0.194538876414299 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.16775484383106232, "learning_rate": 2.4248995093105432e-05, "loss": 0.1744, "step": 12903, "teacher_loss": 0.17518025636672974 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.7153120040893555, "learning_rate": 2.4247206884730982e-05, "loss": 0.2966, "step": 12904, "teacher_loss": 0.250084787607193 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.5726916790008545, "learning_rate": 2.4245418464345805e-05, "loss": 0.2916, "step": 12905, "teacher_loss": 0.2603141963481903 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4702540934085846, "learning_rate": 2.4243629831990905e-05, "loss": 0.1945, "step": 12906, "teacher_loss": 0.16389453411102295 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.10126000642776489, "learning_rate": 2.4241840987707283e-05, "loss": 0.2282, "step": 12907, "teacher_loss": 0.24225324392318726 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.3827143609523773, "learning_rate": 2.424005193153596e-05, "loss": 0.2177, "step": 12908, "teacher_loss": 0.19934983551502228 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 1.3696250915527344, "learning_rate": 2.4238262663517944e-05, "loss": 0.4843, "step": 12909, "teacher_loss": 0.3858788013458252 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.35199326276779175, "learning_rate": 2.4236473183694267e-05, "loss": 0.1789, "step": 12910, "teacher_loss": 0.15964165329933167 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4294697940349579, "learning_rate": 2.4234683492105953e-05, "loss": 0.1948, "step": 12911, "teacher_loss": 0.16868704557418823 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4898097515106201, "learning_rate": 2.423289358879403e-05, "loss": 0.428, "step": 12912, "teacher_loss": 0.4211471378803253 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.6356112957000732, "learning_rate": 2.423110347379954e-05, "loss": 0.277, "step": 12913, "teacher_loss": 0.2371285855770111 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4554620385169983, "learning_rate": 2.4229313147163522e-05, "loss": 0.1964, "step": 12914, "teacher_loss": 0.16757169365882874 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.6927334666252136, "learning_rate": 2.4227522608927027e-05, "loss": 0.5312, "step": 12915, "teacher_loss": 0.5132949352264404 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.2921598255634308, "learning_rate": 2.4225731859131104e-05, "loss": 0.1969, "step": 12916, "teacher_loss": 0.18632224202156067 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.3445168435573578, "learning_rate": 2.422394089781681e-05, "loss": 0.2861, "step": 12917, "teacher_loss": 0.27960968017578125 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.44665610790252686, "learning_rate": 2.4222149725025204e-05, "loss": 0.2412, "step": 12918, "teacher_loss": 0.21832284331321716 }, { "compression_loss": 0.0, "epoch": 2.33, "label_loss": 0.4019852578639984, "learning_rate": 2.4220358340797355e-05, "loss": 0.286, "step": 12919, "teacher_loss": 0.27310192584991455 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.8094888925552368, "learning_rate": 2.4218566745174334e-05, "loss": 0.3434, "step": 12920, "teacher_loss": 0.29155802726745605 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.22321835160255432, "learning_rate": 2.4216774938197217e-05, "loss": 0.2474, "step": 12921, "teacher_loss": 0.25004521012306213 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.10976250469684601, "learning_rate": 2.4214982919907087e-05, "loss": 0.1576, "step": 12922, "teacher_loss": 0.16286514699459076 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.5718361139297485, "learning_rate": 2.4213190690345018e-05, "loss": 0.2914, "step": 12923, "teacher_loss": 0.26018911600112915 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.12915055453777313, "learning_rate": 2.4211398249552116e-05, "loss": 0.2025, "step": 12924, "teacher_loss": 0.21062982082366943 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.4890318512916565, "learning_rate": 2.4209605597569465e-05, "loss": 0.3664, "step": 12925, "teacher_loss": 0.352780282497406 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.5125355124473572, "learning_rate": 2.4207812734438167e-05, "loss": 0.2228, "step": 12926, "teacher_loss": 0.19059734046459198 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.8767570853233337, "learning_rate": 2.420601966019934e-05, "loss": 0.2796, "step": 12927, "teacher_loss": 0.2132851928472519 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.3248770833015442, "learning_rate": 2.4204226374894078e-05, "loss": 0.264, "step": 12928, "teacher_loss": 0.2572064995765686 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.48605668544769287, "learning_rate": 2.4202432878563493e-05, "loss": 0.2587, "step": 12929, "teacher_loss": 0.23340681195259094 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.51018887758255, "learning_rate": 2.4200639171248724e-05, "loss": 0.2467, "step": 12930, "teacher_loss": 0.21747201681137085 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 1.2381446361541748, "learning_rate": 2.419884525299088e-05, "loss": 0.5333, "step": 12931, "teacher_loss": 0.45499977469444275 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.16283690929412842, "learning_rate": 2.419705112383109e-05, "loss": 0.3474, "step": 12932, "teacher_loss": 0.36786627769470215 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.6832075119018555, "learning_rate": 2.4195256783810494e-05, "loss": 0.2207, "step": 12933, "teacher_loss": 0.1692793220281601 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.1613585352897644, "learning_rate": 2.4193462232970233e-05, "loss": 0.1564, "step": 12934, "teacher_loss": 0.1558990776538849 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.3078269958496094, "learning_rate": 2.4191667471351444e-05, "loss": 0.2014, "step": 12935, "teacher_loss": 0.18962684273719788 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.8351870179176331, "learning_rate": 2.4189872498995275e-05, "loss": 0.3237, "step": 12936, "teacher_loss": 0.26690834760665894 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.1850230097770691, "learning_rate": 2.4188077315942883e-05, "loss": 0.171, "step": 12937, "teacher_loss": 0.16943730413913727 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.6224699020385742, "learning_rate": 2.4186281922235433e-05, "loss": 0.277, "step": 12938, "teacher_loss": 0.23863530158996582 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.5444072484970093, "learning_rate": 2.418448631791407e-05, "loss": 0.3496, "step": 12939, "teacher_loss": 0.32795074582099915 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.3170098662376404, "learning_rate": 2.4182690503019976e-05, "loss": 0.3244, "step": 12940, "teacher_loss": 0.3252352774143219 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.302931010723114, "learning_rate": 2.418089447759432e-05, "loss": 0.2019, "step": 12941, "teacher_loss": 0.19065412878990173 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.5815369486808777, "learning_rate": 2.417909824167828e-05, "loss": 0.2874, "step": 12942, "teacher_loss": 0.25477108359336853 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.32386094331741333, "learning_rate": 2.417730179531304e-05, "loss": 0.1849, "step": 12943, "teacher_loss": 0.16948944330215454 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.6512867212295532, "learning_rate": 2.417550513853978e-05, "loss": 0.2304, "step": 12944, "teacher_loss": 0.1836310774087906 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.2545059323310852, "learning_rate": 2.41737082713997e-05, "loss": 0.2309, "step": 12945, "teacher_loss": 0.2283049076795578 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.4940689206123352, "learning_rate": 2.4171911193933995e-05, "loss": 0.2268, "step": 12946, "teacher_loss": 0.1970774382352829 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.3047316074371338, "learning_rate": 2.4170113906183863e-05, "loss": 0.2181, "step": 12947, "teacher_loss": 0.20843788981437683 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.950525164604187, "learning_rate": 2.4168316408190508e-05, "loss": 0.2977, "step": 12948, "teacher_loss": 0.2251891791820526 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.4151240587234497, "learning_rate": 2.4166518699995152e-05, "loss": 0.2681, "step": 12949, "teacher_loss": 0.25171977281570435 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.263724148273468, "learning_rate": 2.4164720781639002e-05, "loss": 0.2371, "step": 12950, "teacher_loss": 0.23416763544082642 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.6097198128700256, "learning_rate": 2.4162922653163283e-05, "loss": 0.3844, "step": 12951, "teacher_loss": 0.35935860872268677 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.19677267968654633, "learning_rate": 2.4161124314609216e-05, "loss": 0.2537, "step": 12952, "teacher_loss": 0.2599792778491974 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.43097972869873047, "learning_rate": 2.415932576601804e-05, "loss": 0.2859, "step": 12953, "teacher_loss": 0.2697813808917999 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.4007374346256256, "learning_rate": 2.4157527007430987e-05, "loss": 0.2022, "step": 12954, "teacher_loss": 0.18011607229709625 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.4225662350654602, "learning_rate": 2.4155728038889288e-05, "loss": 0.297, "step": 12955, "teacher_loss": 0.2830048203468323 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.6245284676551819, "learning_rate": 2.41539288604342e-05, "loss": 0.3412, "step": 12956, "teacher_loss": 0.30976295471191406 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.7469122409820557, "learning_rate": 2.4152129472106967e-05, "loss": 0.2704, "step": 12957, "teacher_loss": 0.21744760870933533 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.5938356518745422, "learning_rate": 2.4150329873948846e-05, "loss": 0.2721, "step": 12958, "teacher_loss": 0.23637652397155762 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.5098090767860413, "learning_rate": 2.414853006600109e-05, "loss": 0.2569, "step": 12959, "teacher_loss": 0.22878050804138184 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.2532508969306946, "learning_rate": 2.4146730048304974e-05, "loss": 0.1694, "step": 12960, "teacher_loss": 0.16007313132286072 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.3584757447242737, "learning_rate": 2.414492982090176e-05, "loss": 0.2856, "step": 12961, "teacher_loss": 0.27745863795280457 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.3009755611419678, "learning_rate": 2.414312938383272e-05, "loss": 0.2786, "step": 12962, "teacher_loss": 0.2760612666606903 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.4707462787628174, "learning_rate": 2.4141328737139142e-05, "loss": 0.2504, "step": 12963, "teacher_loss": 0.22595767676830292 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.5244890451431274, "learning_rate": 2.41395278808623e-05, "loss": 0.2976, "step": 12964, "teacher_loss": 0.27240821719169617 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.09805616736412048, "learning_rate": 2.4137726815043483e-05, "loss": 0.1416, "step": 12965, "teacher_loss": 0.14643973112106323 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.5068673491477966, "learning_rate": 2.413592553972399e-05, "loss": 0.3147, "step": 12966, "teacher_loss": 0.29337289929389954 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.44178706407546997, "learning_rate": 2.4134124054945115e-05, "loss": 0.2537, "step": 12967, "teacher_loss": 0.23279644548892975 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.4431309700012207, "learning_rate": 2.413232236074816e-05, "loss": 0.2117, "step": 12968, "teacher_loss": 0.18601390719413757 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.5502498149871826, "learning_rate": 2.4130520457174432e-05, "loss": 0.2932, "step": 12969, "teacher_loss": 0.2646617293357849 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.593531608581543, "learning_rate": 2.4128718344265246e-05, "loss": 0.3587, "step": 12970, "teacher_loss": 0.3325750231742859 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.8571649193763733, "learning_rate": 2.4126916022061916e-05, "loss": 1.056, "step": 12971, "teacher_loss": 1.0780761241912842 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.7511194944381714, "learning_rate": 2.412511349060577e-05, "loss": 0.3463, "step": 12972, "teacher_loss": 0.301334410905838 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.1949847936630249, "learning_rate": 2.4123310749938124e-05, "loss": 0.23, "step": 12973, "teacher_loss": 0.2338375300168991 }, { "compression_loss": 0.0, "epoch": 2.34, "label_loss": 0.42560848593711853, "learning_rate": 2.412150780010032e-05, "loss": 0.3273, "step": 12974, "teacher_loss": 0.31637537479400635 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.15780578553676605, "learning_rate": 2.411970464113369e-05, "loss": 0.2449, "step": 12975, "teacher_loss": 0.2545315623283386 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.4683195948600769, "learning_rate": 2.4117901273079578e-05, "loss": 0.294, "step": 12976, "teacher_loss": 0.2745956480503082 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.4335499703884125, "learning_rate": 2.4116097695979315e-05, "loss": 0.2497, "step": 12977, "teacher_loss": 0.2292264699935913 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.5725017786026001, "learning_rate": 2.4114293909874276e-05, "loss": 0.2751, "step": 12978, "teacher_loss": 0.2420164793729782 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.5932450890541077, "learning_rate": 2.4112489914805798e-05, "loss": 0.242, "step": 12979, "teacher_loss": 0.202985018491745 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.46854367852211, "learning_rate": 2.4110685710815245e-05, "loss": 0.3681, "step": 12980, "teacher_loss": 0.35695719718933105 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.5692731738090515, "learning_rate": 2.4108881297943985e-05, "loss": 0.2404, "step": 12981, "teacher_loss": 0.2038128525018692 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.29996636509895325, "learning_rate": 2.4107076676233388e-05, "loss": 0.231, "step": 12982, "teacher_loss": 0.22337926924228668 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.5239046216011047, "learning_rate": 2.410527184572483e-05, "loss": 0.3938, "step": 12983, "teacher_loss": 0.37928909063339233 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.14179030060768127, "learning_rate": 2.410346680645968e-05, "loss": 0.1952, "step": 12984, "teacher_loss": 0.20113706588745117 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.609269380569458, "learning_rate": 2.4101661558479336e-05, "loss": 0.3447, "step": 12985, "teacher_loss": 0.3152737617492676 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.43035614490509033, "learning_rate": 2.4099856101825177e-05, "loss": 0.2076, "step": 12986, "teacher_loss": 0.1828109323978424 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.5959734320640564, "learning_rate": 2.4098050436538608e-05, "loss": 0.2486, "step": 12987, "teacher_loss": 0.21003341674804688 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.6695563197135925, "learning_rate": 2.4096244562661014e-05, "loss": 0.3115, "step": 12988, "teacher_loss": 0.27170369029045105 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.6153783798217773, "learning_rate": 2.40944384802338e-05, "loss": 0.2904, "step": 12989, "teacher_loss": 0.2542846202850342 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.4398386478424072, "learning_rate": 2.4092632189298384e-05, "loss": 0.3017, "step": 12990, "teacher_loss": 0.28638529777526855 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.7592819929122925, "learning_rate": 2.409082568989617e-05, "loss": 0.6927, "step": 12991, "teacher_loss": 0.6853551864624023 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.36400753259658813, "learning_rate": 2.408901898206858e-05, "loss": 0.2877, "step": 12992, "teacher_loss": 0.2791703939437866 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.37116605043411255, "learning_rate": 2.4087212065857037e-05, "loss": 0.1809, "step": 12993, "teacher_loss": 0.15975654125213623 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.3625389337539673, "learning_rate": 2.4085404941302963e-05, "loss": 0.2835, "step": 12994, "teacher_loss": 0.27467402815818787 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.6715219020843506, "learning_rate": 2.4083597608447797e-05, "loss": 0.3734, "step": 12995, "teacher_loss": 0.34024500846862793 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.24331369996070862, "learning_rate": 2.408179006733297e-05, "loss": 0.2583, "step": 12996, "teacher_loss": 0.259914368391037 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.930972158908844, "learning_rate": 2.4079982317999923e-05, "loss": 0.4675, "step": 12997, "teacher_loss": 0.41603749990463257 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.4074627161026001, "learning_rate": 2.4078174360490105e-05, "loss": 0.3719, "step": 12998, "teacher_loss": 0.36791500449180603 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 1.1688551902770996, "learning_rate": 2.4076366194844967e-05, "loss": 0.2962, "step": 12999, "teacher_loss": 0.19921022653579712 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.2772231698036194, "learning_rate": 2.4074557821105967e-05, "loss": 0.3056, "step": 13000, "teacher_loss": 0.3087250292301178 }, { "epoch": 2.35, "eval_exact_match": 79.50804162724693, "eval_f1": 87.11857570895603, "step": 13000 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.48724859952926636, "learning_rate": 2.4072749239314565e-05, "loss": 0.2717, "step": 13001, "teacher_loss": 0.24779880046844482 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.42677101492881775, "learning_rate": 2.407094044951222e-05, "loss": 0.2201, "step": 13002, "teacher_loss": 0.19712623953819275 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.2668790817260742, "learning_rate": 2.4069131451740405e-05, "loss": 0.178, "step": 13003, "teacher_loss": 0.16812410950660706 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.5046723484992981, "learning_rate": 2.4067322246040604e-05, "loss": 0.2798, "step": 13004, "teacher_loss": 0.25482630729675293 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.4670093059539795, "learning_rate": 2.4065512832454285e-05, "loss": 0.2522, "step": 13005, "teacher_loss": 0.2283104658126831 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.4037174582481384, "learning_rate": 2.4063703211022934e-05, "loss": 0.3289, "step": 13006, "teacher_loss": 0.32054901123046875 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.8299764394760132, "learning_rate": 2.4061893381788044e-05, "loss": 0.3451, "step": 13007, "teacher_loss": 0.2912542223930359 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.6067514419555664, "learning_rate": 2.406008334479111e-05, "loss": 0.3132, "step": 13008, "teacher_loss": 0.28060683608055115 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.29773783683776855, "learning_rate": 2.4058273100073625e-05, "loss": 0.4186, "step": 13009, "teacher_loss": 0.4319990277290344 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.6178734302520752, "learning_rate": 2.4056462647677098e-05, "loss": 0.3844, "step": 13010, "teacher_loss": 0.3584526479244232 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.22649678587913513, "learning_rate": 2.4054651987643037e-05, "loss": 0.2315, "step": 13011, "teacher_loss": 0.232097327709198 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.4931163787841797, "learning_rate": 2.405284112001295e-05, "loss": 0.2842, "step": 13012, "teacher_loss": 0.2609565556049347 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.6831939816474915, "learning_rate": 2.405103004482836e-05, "loss": 0.259, "step": 13013, "teacher_loss": 0.21185873448848724 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.23586717247962952, "learning_rate": 2.4049218762130782e-05, "loss": 0.3572, "step": 13014, "teacher_loss": 0.3706822693347931 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.15566489100456238, "learning_rate": 2.404740727196176e-05, "loss": 0.2357, "step": 13015, "teacher_loss": 0.24456503987312317 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.6956691145896912, "learning_rate": 2.4045595574362805e-05, "loss": 0.3429, "step": 13016, "teacher_loss": 0.3036884665489197 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.5355004072189331, "learning_rate": 2.404378366937547e-05, "loss": 0.2581, "step": 13017, "teacher_loss": 0.22725075483322144 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.5629329681396484, "learning_rate": 2.4041971557041282e-05, "loss": 0.4853, "step": 13018, "teacher_loss": 0.4766855239868164 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.4025377035140991, "learning_rate": 2.4040159237401802e-05, "loss": 0.2525, "step": 13019, "teacher_loss": 0.23581308126449585 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.4799646735191345, "learning_rate": 2.4038346710498574e-05, "loss": 0.2959, "step": 13020, "teacher_loss": 0.2755019962787628 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.8231446146965027, "learning_rate": 2.4036533976373153e-05, "loss": 0.6856, "step": 13021, "teacher_loss": 0.6702879667282104 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.26017385721206665, "learning_rate": 2.40347210350671e-05, "loss": 0.2506, "step": 13022, "teacher_loss": 0.2495332956314087 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.35366058349609375, "learning_rate": 2.4032907886621984e-05, "loss": 0.3247, "step": 13023, "teacher_loss": 0.32142865657806396 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 1.2711718082427979, "learning_rate": 2.403109453107937e-05, "loss": 0.3749, "step": 13024, "teacher_loss": 0.27532148361206055 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.8148959875106812, "learning_rate": 2.4029280968480832e-05, "loss": 0.7794, "step": 13025, "teacher_loss": 0.7754602432250977 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.7226345539093018, "learning_rate": 2.4027467198867957e-05, "loss": 0.3409, "step": 13026, "teacher_loss": 0.2984299063682556 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.35176289081573486, "learning_rate": 2.4025653222282324e-05, "loss": 0.3125, "step": 13027, "teacher_loss": 0.30808767676353455 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.26792585849761963, "learning_rate": 2.4023839038765525e-05, "loss": 0.1785, "step": 13028, "teacher_loss": 0.16851915419101715 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.6042264699935913, "learning_rate": 2.4022024648359147e-05, "loss": 0.2457, "step": 13029, "teacher_loss": 0.2058224380016327 }, { "compression_loss": 0.0, "epoch": 2.35, "label_loss": 0.24340839684009552, "learning_rate": 2.4020210051104796e-05, "loss": 0.1287, "step": 13030, "teacher_loss": 0.11591649055480957 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.3650391101837158, "learning_rate": 2.4018395247044074e-05, "loss": 0.3565, "step": 13031, "teacher_loss": 0.3555457592010498 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.5148910880088806, "learning_rate": 2.4016580236218585e-05, "loss": 0.2747, "step": 13032, "teacher_loss": 0.2479724884033203 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.4794608950614929, "learning_rate": 2.4014765018669948e-05, "loss": 0.2279, "step": 13033, "teacher_loss": 0.19998782873153687 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.49105241894721985, "learning_rate": 2.4012949594439773e-05, "loss": 0.3682, "step": 13034, "teacher_loss": 0.3545871078968048 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.3831844925880432, "learning_rate": 2.4011133963569683e-05, "loss": 0.2565, "step": 13035, "teacher_loss": 0.24241241812705994 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 1.0132230520248413, "learning_rate": 2.400931812610131e-05, "loss": 0.3664, "step": 13036, "teacher_loss": 0.2944791913032532 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.5572211146354675, "learning_rate": 2.400750208207629e-05, "loss": 0.2423, "step": 13037, "teacher_loss": 0.20729447901248932 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.6663508415222168, "learning_rate": 2.4005685831536248e-05, "loss": 0.2686, "step": 13038, "teacher_loss": 0.22439594566822052 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.783195972442627, "learning_rate": 2.4003869374522832e-05, "loss": 0.4137, "step": 13039, "teacher_loss": 0.37265104055404663 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.5105181932449341, "learning_rate": 2.4002052711077685e-05, "loss": 0.3055, "step": 13040, "teacher_loss": 0.2827637791633606 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.31425777077674866, "learning_rate": 2.4000235841242457e-05, "loss": 0.2017, "step": 13041, "teacher_loss": 0.18913927674293518 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.7799074649810791, "learning_rate": 2.3998418765058813e-05, "loss": 0.2922, "step": 13042, "teacher_loss": 0.23801207542419434 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.43896448612213135, "learning_rate": 2.3996601482568395e-05, "loss": 0.204, "step": 13043, "teacher_loss": 0.17783747613430023 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.6664102673530579, "learning_rate": 2.399478399381288e-05, "loss": 0.2434, "step": 13044, "teacher_loss": 0.1964050531387329 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.39133018255233765, "learning_rate": 2.3992966298833945e-05, "loss": 0.2961, "step": 13045, "teacher_loss": 0.2855234146118164 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.33472564816474915, "learning_rate": 2.3991148397673247e-05, "loss": 0.1984, "step": 13046, "teacher_loss": 0.1832292079925537 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.4228655993938446, "learning_rate": 2.3989330290372476e-05, "loss": 0.3104, "step": 13047, "teacher_loss": 0.29794585704803467 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.7621529698371887, "learning_rate": 2.398751197697331e-05, "loss": 0.2786, "step": 13048, "teacher_loss": 0.2248266637325287 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 1.0917127132415771, "learning_rate": 2.3985693457517444e-05, "loss": 0.3016, "step": 13049, "teacher_loss": 0.21382871270179749 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.7560780644416809, "learning_rate": 2.3983874732046566e-05, "loss": 0.3908, "step": 13050, "teacher_loss": 0.3501774072647095 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.45956382155418396, "learning_rate": 2.3982055800602374e-05, "loss": 0.319, "step": 13051, "teacher_loss": 0.3033749461174011 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.46796321868896484, "learning_rate": 2.3980236663226574e-05, "loss": 0.2386, "step": 13052, "teacher_loss": 0.21316882967948914 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.6254762411117554, "learning_rate": 2.3978417319960872e-05, "loss": 0.3448, "step": 13053, "teacher_loss": 0.31356537342071533 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.2411290407180786, "learning_rate": 2.3976597770846977e-05, "loss": 0.2698, "step": 13054, "teacher_loss": 0.2730352282524109 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.7285536527633667, "learning_rate": 2.3974778015926607e-05, "loss": 0.2679, "step": 13055, "teacher_loss": 0.21672102808952332 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.12649649381637573, "learning_rate": 2.397295805524149e-05, "loss": 0.1768, "step": 13056, "teacher_loss": 0.1823521852493286 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.1894121766090393, "learning_rate": 2.397113788883334e-05, "loss": 0.2567, "step": 13057, "teacher_loss": 0.26419055461883545 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.396454393863678, "learning_rate": 2.3969317516743902e-05, "loss": 0.3096, "step": 13058, "teacher_loss": 0.29998183250427246 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.3379182815551758, "learning_rate": 2.3967496939014904e-05, "loss": 0.329, "step": 13059, "teacher_loss": 0.3280338644981384 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.9625312089920044, "learning_rate": 2.3965676155688085e-05, "loss": 0.6018, "step": 13060, "teacher_loss": 0.5617706179618835 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.47420382499694824, "learning_rate": 2.3963855166805195e-05, "loss": 0.3892, "step": 13061, "teacher_loss": 0.37970829010009766 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.35196182131767273, "learning_rate": 2.3962033972407978e-05, "loss": 0.3564, "step": 13062, "teacher_loss": 0.35692551732063293 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.615990161895752, "learning_rate": 2.3960212572538193e-05, "loss": 0.2929, "step": 13063, "teacher_loss": 0.2569584250450134 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.2505914270877838, "learning_rate": 2.39583909672376e-05, "loss": 0.1605, "step": 13064, "teacher_loss": 0.1505323350429535 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.3963540196418762, "learning_rate": 2.395656915654795e-05, "loss": 0.2649, "step": 13065, "teacher_loss": 0.2503194212913513 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.38173213601112366, "learning_rate": 2.3954747140511034e-05, "loss": 0.4081, "step": 13066, "teacher_loss": 0.41103553771972656 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 1.1104040145874023, "learning_rate": 2.395292491916861e-05, "loss": 0.3491, "step": 13067, "teacher_loss": 0.2645367383956909 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.8333873152732849, "learning_rate": 2.3951102492562456e-05, "loss": 0.4994, "step": 13068, "teacher_loss": 0.46227729320526123 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.5347037315368652, "learning_rate": 2.3949279860734366e-05, "loss": 0.3395, "step": 13069, "teacher_loss": 0.31778132915496826 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.33279284834861755, "learning_rate": 2.3947457023726118e-05, "loss": 0.2469, "step": 13070, "teacher_loss": 0.23739996552467346 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.3597686290740967, "learning_rate": 2.3945633981579506e-05, "loss": 0.2124, "step": 13071, "teacher_loss": 0.19603756070137024 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.5787531733512878, "learning_rate": 2.3943810734336325e-05, "loss": 0.3539, "step": 13072, "teacher_loss": 0.3289181888103485 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.23367469012737274, "learning_rate": 2.394198728203838e-05, "loss": 0.2434, "step": 13073, "teacher_loss": 0.24449768662452698 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.46680039167404175, "learning_rate": 2.3940163624727477e-05, "loss": 0.2442, "step": 13074, "teacher_loss": 0.21946552395820618 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.1710911989212036, "learning_rate": 2.3938339762445426e-05, "loss": 0.1713, "step": 13075, "teacher_loss": 0.1713024079799652 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.5758000016212463, "learning_rate": 2.3936515695234046e-05, "loss": 0.3851, "step": 13076, "teacher_loss": 0.3638860285282135 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.357105553150177, "learning_rate": 2.3934691423135145e-05, "loss": 0.2604, "step": 13077, "teacher_loss": 0.24965639412403107 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.29043570160865784, "learning_rate": 2.3932866946190567e-05, "loss": 0.2982, "step": 13078, "teacher_loss": 0.2990264296531677 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.5501710176467896, "learning_rate": 2.393104226444212e-05, "loss": 0.3048, "step": 13079, "teacher_loss": 0.27749407291412354 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.9138692617416382, "learning_rate": 2.3929217377931665e-05, "loss": 0.5338, "step": 13080, "teacher_loss": 0.49158889055252075 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.3634622097015381, "learning_rate": 2.392739228670102e-05, "loss": 0.1321, "step": 13081, "teacher_loss": 0.1063578873872757 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.3361184298992157, "learning_rate": 2.3925566990792033e-05, "loss": 0.1603, "step": 13082, "teacher_loss": 0.14072290062904358 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.7473549842834473, "learning_rate": 2.392374149024656e-05, "loss": 0.3559, "step": 13083, "teacher_loss": 0.3123679757118225 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.3420478105545044, "learning_rate": 2.3921915785106446e-05, "loss": 0.2304, "step": 13084, "teacher_loss": 0.2179594188928604 }, { "compression_loss": 0.0, "epoch": 2.36, "label_loss": 0.6225875020027161, "learning_rate": 2.3920089875413553e-05, "loss": 0.3771, "step": 13085, "teacher_loss": 0.34978365898132324 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.779904305934906, "learning_rate": 2.3918263761209746e-05, "loss": 0.2891, "step": 13086, "teacher_loss": 0.23457324504852295 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.3165227770805359, "learning_rate": 2.3916437442536885e-05, "loss": 0.1997, "step": 13087, "teacher_loss": 0.18668217957019806 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.4047742486000061, "learning_rate": 2.3914610919436843e-05, "loss": 0.2468, "step": 13088, "teacher_loss": 0.22930093109607697 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.2210788130760193, "learning_rate": 2.3912784191951505e-05, "loss": 0.2253, "step": 13089, "teacher_loss": 0.2258215993642807 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.71305251121521, "learning_rate": 2.391095726012275e-05, "loss": 0.4028, "step": 13090, "teacher_loss": 0.3682757019996643 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.585705041885376, "learning_rate": 2.390913012399246e-05, "loss": 0.3035, "step": 13091, "teacher_loss": 0.27212128043174744 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.31673091650009155, "learning_rate": 2.3907302783602522e-05, "loss": 0.2039, "step": 13092, "teacher_loss": 0.19141682982444763 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.5323687195777893, "learning_rate": 2.3905475238994844e-05, "loss": 0.2625, "step": 13093, "teacher_loss": 0.23250234127044678 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.2283971905708313, "learning_rate": 2.3903647490211317e-05, "loss": 0.1882, "step": 13094, "teacher_loss": 0.1836811751127243 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.46738266944885254, "learning_rate": 2.3901819537293843e-05, "loss": 0.2395, "step": 13095, "teacher_loss": 0.21416707336902618 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.2503347098827362, "learning_rate": 2.389999138028434e-05, "loss": 0.2066, "step": 13096, "teacher_loss": 0.20170539617538452 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.3184281289577484, "learning_rate": 2.389816301922472e-05, "loss": 0.1897, "step": 13097, "teacher_loss": 0.17538359761238098 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.3551815152168274, "learning_rate": 2.3896334454156898e-05, "loss": 0.298, "step": 13098, "teacher_loss": 0.2916863262653351 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.7322406768798828, "learning_rate": 2.3894505685122796e-05, "loss": 0.3185, "step": 13099, "teacher_loss": 0.2725127339363098 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.2698879837989807, "learning_rate": 2.389267671216435e-05, "loss": 0.22, "step": 13100, "teacher_loss": 0.2144922912120819 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.6521711349487305, "learning_rate": 2.3890847535323485e-05, "loss": 0.4658, "step": 13101, "teacher_loss": 0.4450523257255554 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.25021934509277344, "learning_rate": 2.3889018154642145e-05, "loss": 0.2571, "step": 13102, "teacher_loss": 0.2579100728034973 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.4916960895061493, "learning_rate": 2.3887188570162266e-05, "loss": 0.2333, "step": 13103, "teacher_loss": 0.2045513242483139 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.43899160623550415, "learning_rate": 2.3885358781925802e-05, "loss": 0.196, "step": 13104, "teacher_loss": 0.16897623240947723 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.5957446098327637, "learning_rate": 2.3883528789974703e-05, "loss": 0.4954, "step": 13105, "teacher_loss": 0.4842890501022339 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.32777661085128784, "learning_rate": 2.388169859435092e-05, "loss": 0.2274, "step": 13106, "teacher_loss": 0.21625575423240662 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.38102781772613525, "learning_rate": 2.387986819509642e-05, "loss": 0.1865, "step": 13107, "teacher_loss": 0.16483914852142334 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.24892719089984894, "learning_rate": 2.387803759225316e-05, "loss": 0.2052, "step": 13108, "teacher_loss": 0.20032528042793274 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.3413429260253906, "learning_rate": 2.3876206785863114e-05, "loss": 0.2351, "step": 13109, "teacher_loss": 0.2233349233865738 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.6450066566467285, "learning_rate": 2.3874375775968263e-05, "loss": 0.2654, "step": 13110, "teacher_loss": 0.22318929433822632 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.24692469835281372, "learning_rate": 2.3872544562610586e-05, "loss": 0.2872, "step": 13111, "teacher_loss": 0.29168492555618286 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.2794077694416046, "learning_rate": 2.3870713145832057e-05, "loss": 0.1885, "step": 13112, "teacher_loss": 0.1783529818058014 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 1.1267932653427124, "learning_rate": 2.3868881525674674e-05, "loss": 0.4188, "step": 13113, "teacher_loss": 0.34011781215667725 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.937947154045105, "learning_rate": 2.3867049702180428e-05, "loss": 0.434, "step": 13114, "teacher_loss": 0.37800148129463196 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.5105807781219482, "learning_rate": 2.3865217675391315e-05, "loss": 0.2486, "step": 13115, "teacher_loss": 0.21944299340248108 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.614952027797699, "learning_rate": 2.3863385445349342e-05, "loss": 0.2856, "step": 13116, "teacher_loss": 0.24900464713573456 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.446087121963501, "learning_rate": 2.386155301209651e-05, "loss": 0.3331, "step": 13117, "teacher_loss": 0.3205166459083557 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.6839063167572021, "learning_rate": 2.3859720375674843e-05, "loss": 0.3027, "step": 13118, "teacher_loss": 0.26030343770980835 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.25020039081573486, "learning_rate": 2.385788753612635e-05, "loss": 0.1681, "step": 13119, "teacher_loss": 0.1590082347393036 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.7375144362449646, "learning_rate": 2.3856054493493048e-05, "loss": 0.266, "step": 13120, "teacher_loss": 0.2136373370885849 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.5715293884277344, "learning_rate": 2.385422124781697e-05, "loss": 0.3339, "step": 13121, "teacher_loss": 0.30750131607055664 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.7466154098510742, "learning_rate": 2.385238779914015e-05, "loss": 0.2962, "step": 13122, "teacher_loss": 0.24614648520946503 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.22063696384429932, "learning_rate": 2.3850554147504614e-05, "loss": 0.1653, "step": 13123, "teacher_loss": 0.15916435420513153 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.4357588589191437, "learning_rate": 2.384872029295241e-05, "loss": 0.2787, "step": 13124, "teacher_loss": 0.2612892687320709 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.5342859625816345, "learning_rate": 2.3846886235525573e-05, "loss": 0.4384, "step": 13125, "teacher_loss": 0.4277440309524536 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.29129987955093384, "learning_rate": 2.3845051975266162e-05, "loss": 0.1558, "step": 13126, "teacher_loss": 0.14073993265628815 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.16417798399925232, "learning_rate": 2.384321751221623e-05, "loss": 0.222, "step": 13127, "teacher_loss": 0.2284564971923828 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.5167200565338135, "learning_rate": 2.3841382846417835e-05, "loss": 0.2678, "step": 13128, "teacher_loss": 0.2401035726070404 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.5882191061973572, "learning_rate": 2.3839547977913036e-05, "loss": 0.2696, "step": 13129, "teacher_loss": 0.23424233496189117 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 1.027330994606018, "learning_rate": 2.3837712906743905e-05, "loss": 0.4813, "step": 13130, "teacher_loss": 0.4206332862377167 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.26825690269470215, "learning_rate": 2.3835877632952516e-05, "loss": 0.2326, "step": 13131, "teacher_loss": 0.2286532074213028 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.30091434717178345, "learning_rate": 2.383404215658094e-05, "loss": 0.2452, "step": 13132, "teacher_loss": 0.23905707895755768 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.48598629236221313, "learning_rate": 2.383220647767127e-05, "loss": 0.3119, "step": 13133, "teacher_loss": 0.29258108139038086 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 1.1943050622940063, "learning_rate": 2.3830370596265576e-05, "loss": 0.384, "step": 13134, "teacher_loss": 0.2940070629119873 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.4006240963935852, "learning_rate": 2.3828534512405968e-05, "loss": 0.2519, "step": 13135, "teacher_loss": 0.2353377640247345 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.5557321310043335, "learning_rate": 2.382669822613453e-05, "loss": 0.5776, "step": 13136, "teacher_loss": 0.5800773501396179 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.2372012585401535, "learning_rate": 2.3824861737493362e-05, "loss": 0.2197, "step": 13137, "teacher_loss": 0.21771565079689026 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.5158334374427795, "learning_rate": 2.3823025046524576e-05, "loss": 0.2303, "step": 13138, "teacher_loss": 0.19856229424476624 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.4155901074409485, "learning_rate": 2.382118815327028e-05, "loss": 0.19, "step": 13139, "teacher_loss": 0.1649080216884613 }, { "compression_loss": 0.0, "epoch": 2.37, "label_loss": 0.6908024549484253, "learning_rate": 2.381935105777258e-05, "loss": 0.3165, "step": 13140, "teacher_loss": 0.27495890855789185 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.42068690061569214, "learning_rate": 2.381751376007361e-05, "loss": 0.2223, "step": 13141, "teacher_loss": 0.2002897709608078 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.6808729767799377, "learning_rate": 2.381567626021548e-05, "loss": 0.3067, "step": 13142, "teacher_loss": 0.2651064693927765 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.30923861265182495, "learning_rate": 2.381383855824033e-05, "loss": 0.3385, "step": 13143, "teacher_loss": 0.3417533040046692 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5256384015083313, "learning_rate": 2.3812000654190286e-05, "loss": 0.2833, "step": 13144, "teacher_loss": 0.25634124875068665 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.4820489287376404, "learning_rate": 2.381016254810748e-05, "loss": 0.3384, "step": 13145, "teacher_loss": 0.3224857449531555 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.4537546932697296, "learning_rate": 2.3808324240034066e-05, "loss": 0.2344, "step": 13146, "teacher_loss": 0.21003296971321106 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.30748042464256287, "learning_rate": 2.380648573001219e-05, "loss": 0.2476, "step": 13147, "teacher_loss": 0.2409113347530365 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.4503805637359619, "learning_rate": 2.3804647018083992e-05, "loss": 0.376, "step": 13148, "teacher_loss": 0.36771833896636963 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.6322369575500488, "learning_rate": 2.380280810429164e-05, "loss": 0.3802, "step": 13149, "teacher_loss": 0.3521774709224701 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.21972379088401794, "learning_rate": 2.3800968988677287e-05, "loss": 0.1823, "step": 13150, "teacher_loss": 0.17814847826957703 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.8011816740036011, "learning_rate": 2.3799129671283104e-05, "loss": 0.2327, "step": 13151, "teacher_loss": 0.1695370078086853 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 1.0780887603759766, "learning_rate": 2.3797290152151257e-05, "loss": 0.4058, "step": 13152, "teacher_loss": 0.33110371232032776 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.3133049011230469, "learning_rate": 2.3795450431323925e-05, "loss": 0.2541, "step": 13153, "teacher_loss": 0.24751552939414978 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.7453523278236389, "learning_rate": 2.3793610508843286e-05, "loss": 0.4024, "step": 13154, "teacher_loss": 0.3643397092819214 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.1672964245080948, "learning_rate": 2.3791770384751516e-05, "loss": 0.2, "step": 13155, "teacher_loss": 0.20362040400505066 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.22130584716796875, "learning_rate": 2.3789930059090814e-05, "loss": 0.1694, "step": 13156, "teacher_loss": 0.16361220180988312 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.4113225042819977, "learning_rate": 2.3788089531903372e-05, "loss": 0.2569, "step": 13157, "teacher_loss": 0.23979498445987701 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.3953251242637634, "learning_rate": 2.3786248803231383e-05, "loss": 0.2404, "step": 13158, "teacher_loss": 0.22322587668895721 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5500034689903259, "learning_rate": 2.378440787311705e-05, "loss": 0.2831, "step": 13159, "teacher_loss": 0.2533941864967346 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.23266354203224182, "learning_rate": 2.3782566741602585e-05, "loss": 0.2217, "step": 13160, "teacher_loss": 0.220467209815979 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.3935345411300659, "learning_rate": 2.378072540873019e-05, "loss": 0.2525, "step": 13161, "teacher_loss": 0.23687484860420227 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.3831459879875183, "learning_rate": 2.3778883874542098e-05, "loss": 0.2121, "step": 13162, "teacher_loss": 0.1931067705154419 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.24893715977668762, "learning_rate": 2.377704213908051e-05, "loss": 0.2141, "step": 13163, "teacher_loss": 0.2101958841085434 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.4469989538192749, "learning_rate": 2.3775200202387663e-05, "loss": 0.2188, "step": 13164, "teacher_loss": 0.1934095174074173 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.46684351563453674, "learning_rate": 2.3773358064505784e-05, "loss": 0.2413, "step": 13165, "teacher_loss": 0.21619799733161926 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.28017908334732056, "learning_rate": 2.377151572547711e-05, "loss": 0.1762, "step": 13166, "teacher_loss": 0.16465477645397186 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5015286803245544, "learning_rate": 2.3769673185343877e-05, "loss": 0.3183, "step": 13167, "teacher_loss": 0.2979453504085541 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.23387598991394043, "learning_rate": 2.3767830444148335e-05, "loss": 0.1898, "step": 13168, "teacher_loss": 0.1848611682653427 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5822409391403198, "learning_rate": 2.3765987501932724e-05, "loss": 0.3214, "step": 13169, "teacher_loss": 0.2923853397369385 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5652356147766113, "learning_rate": 2.3764144358739302e-05, "loss": 0.3594, "step": 13170, "teacher_loss": 0.3365795314311981 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.7127039432525635, "learning_rate": 2.3762301014610326e-05, "loss": 0.3148, "step": 13171, "teacher_loss": 0.27053916454315186 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5804683566093445, "learning_rate": 2.376045746958806e-05, "loss": 0.304, "step": 13172, "teacher_loss": 0.27332770824432373 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.22764703631401062, "learning_rate": 2.375861372371477e-05, "loss": 0.1507, "step": 13173, "teacher_loss": 0.1422058343887329 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.4561974108219147, "learning_rate": 2.3756769777032726e-05, "loss": 0.1991, "step": 13174, "teacher_loss": 0.17057496309280396 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.30623212456703186, "learning_rate": 2.3754925629584204e-05, "loss": 0.169, "step": 13175, "teacher_loss": 0.15378184616565704 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.2700462341308594, "learning_rate": 2.3753081281411483e-05, "loss": 0.2424, "step": 13176, "teacher_loss": 0.23936600983142853 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.23005768656730652, "learning_rate": 2.3751236732556857e-05, "loss": 0.3319, "step": 13177, "teacher_loss": 0.3431857228279114 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5549268126487732, "learning_rate": 2.374939198306261e-05, "loss": 0.2093, "step": 13178, "teacher_loss": 0.17088985443115234 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.23624272644519806, "learning_rate": 2.3747547032971036e-05, "loss": 0.2157, "step": 13179, "teacher_loss": 0.21345439553260803 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.27924948930740356, "learning_rate": 2.374570188232443e-05, "loss": 0.2226, "step": 13180, "teacher_loss": 0.21627815067768097 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.43899205327033997, "learning_rate": 2.374385653116511e-05, "loss": 0.2046, "step": 13181, "teacher_loss": 0.1785382330417633 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.48236730694770813, "learning_rate": 2.3742010979535366e-05, "loss": 0.3228, "step": 13182, "teacher_loss": 0.3050217628479004 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5388857126235962, "learning_rate": 2.3740165227477523e-05, "loss": 0.431, "step": 13183, "teacher_loss": 0.41895946860313416 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.6362184882164001, "learning_rate": 2.3738319275033903e-05, "loss": 0.3217, "step": 13184, "teacher_loss": 0.2868010997772217 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.2992188036441803, "learning_rate": 2.3736473122246812e-05, "loss": 0.2017, "step": 13185, "teacher_loss": 0.19082872569561005 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.6334612965583801, "learning_rate": 2.3734626769158587e-05, "loss": 0.2914, "step": 13186, "teacher_loss": 0.2533993422985077 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5217605829238892, "learning_rate": 2.3732780215811563e-05, "loss": 0.3696, "step": 13187, "teacher_loss": 0.3526615500450134 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.3592368960380554, "learning_rate": 2.3730933462248065e-05, "loss": 0.2565, "step": 13188, "teacher_loss": 0.24505166709423065 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.32157981395721436, "learning_rate": 2.372908650851044e-05, "loss": 0.1859, "step": 13189, "teacher_loss": 0.1707925796508789 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.6584714651107788, "learning_rate": 2.372723935464104e-05, "loss": 0.379, "step": 13190, "teacher_loss": 0.3479844331741333 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.35353463888168335, "learning_rate": 2.37253920006822e-05, "loss": 0.3739, "step": 13191, "teacher_loss": 0.3761264681816101 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5478827357292175, "learning_rate": 2.3723544446676283e-05, "loss": 0.2573, "step": 13192, "teacher_loss": 0.22501471638679504 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.14495782554149628, "learning_rate": 2.3721696692665644e-05, "loss": 0.1674, "step": 13193, "teacher_loss": 0.16993612051010132 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.18254454433918, "learning_rate": 2.3719848738692653e-05, "loss": 0.2128, "step": 13194, "teacher_loss": 0.21612313389778137 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.8071692585945129, "learning_rate": 2.371800058479967e-05, "loss": 0.2532, "step": 13195, "teacher_loss": 0.19159573316574097 }, { "compression_loss": 0.0, "epoch": 2.38, "label_loss": 0.5467922687530518, "learning_rate": 2.3716152231029077e-05, "loss": 0.2462, "step": 13196, "teacher_loss": 0.21282029151916504 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.3530251085758209, "learning_rate": 2.3714303677423242e-05, "loss": 0.2971, "step": 13197, "teacher_loss": 0.2909325063228607 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.23572660982608795, "learning_rate": 2.371245492402455e-05, "loss": 0.1372, "step": 13198, "teacher_loss": 0.12627235054969788 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.533949613571167, "learning_rate": 2.3710605970875388e-05, "loss": 0.3334, "step": 13199, "teacher_loss": 0.31114646792411804 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.7894918322563171, "learning_rate": 2.3708756818018147e-05, "loss": 0.3494, "step": 13200, "teacher_loss": 0.30044567584991455 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.47003427147865295, "learning_rate": 2.370690746549522e-05, "loss": 0.2602, "step": 13201, "teacher_loss": 0.23685386776924133 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5281930565834045, "learning_rate": 2.3705057913349017e-05, "loss": 0.3049, "step": 13202, "teacher_loss": 0.2801341414451599 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.41883549094200134, "learning_rate": 2.3703208161621932e-05, "loss": 0.1874, "step": 13203, "teacher_loss": 0.16173753142356873 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.28319305181503296, "learning_rate": 2.3701358210356375e-05, "loss": 0.2921, "step": 13204, "teacher_loss": 0.2930489182472229 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.3948586583137512, "learning_rate": 2.3699508059594762e-05, "loss": 0.276, "step": 13205, "teacher_loss": 0.26273927092552185 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 1.120369791984558, "learning_rate": 2.3697657709379513e-05, "loss": 0.3739, "step": 13206, "teacher_loss": 0.2910025119781494 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 1.2317451238632202, "learning_rate": 2.369580715975305e-05, "loss": 0.4627, "step": 13207, "teacher_loss": 0.37722089886665344 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.18874123692512512, "learning_rate": 2.36939564107578e-05, "loss": 0.1608, "step": 13208, "teacher_loss": 0.1576671600341797 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5665850639343262, "learning_rate": 2.3692105462436198e-05, "loss": 0.3509, "step": 13209, "teacher_loss": 0.3269417881965637 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.46877068281173706, "learning_rate": 2.3690254314830674e-05, "loss": 0.2421, "step": 13210, "teacher_loss": 0.2169049084186554 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.18176183104515076, "learning_rate": 2.3688402967983672e-05, "loss": 0.203, "step": 13211, "teacher_loss": 0.2054072767496109 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5467663407325745, "learning_rate": 2.3686551421937642e-05, "loss": 0.2183, "step": 13212, "teacher_loss": 0.1817675083875656 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.4091041088104248, "learning_rate": 2.3684699676735033e-05, "loss": 0.2503, "step": 13213, "teacher_loss": 0.23265963792800903 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.2440621256828308, "learning_rate": 2.3682847732418295e-05, "loss": 0.2594, "step": 13214, "teacher_loss": 0.26114779710769653 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.29172462224960327, "learning_rate": 2.3680995589029895e-05, "loss": 0.1875, "step": 13215, "teacher_loss": 0.1759473830461502 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5511808395385742, "learning_rate": 2.367914324661229e-05, "loss": 0.2796, "step": 13216, "teacher_loss": 0.24937835335731506 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.2184123694896698, "learning_rate": 2.367729070520795e-05, "loss": 0.2604, "step": 13217, "teacher_loss": 0.2650441527366638 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.7929906845092773, "learning_rate": 2.3675437964859355e-05, "loss": 0.4554, "step": 13218, "teacher_loss": 0.4179278016090393 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 1.0236337184906006, "learning_rate": 2.3673585025608976e-05, "loss": 0.4708, "step": 13219, "teacher_loss": 0.4093579649925232 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.4887635409832001, "learning_rate": 2.3671731887499296e-05, "loss": 0.2945, "step": 13220, "teacher_loss": 0.27289286255836487 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.46457576751708984, "learning_rate": 2.3669878550572803e-05, "loss": 0.322, "step": 13221, "teacher_loss": 0.3061993718147278 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.2688802480697632, "learning_rate": 2.3668025014871987e-05, "loss": 0.2852, "step": 13222, "teacher_loss": 0.28706392645835876 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.4268842935562134, "learning_rate": 2.366617128043935e-05, "loss": 0.1877, "step": 13223, "teacher_loss": 0.16117416322231293 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.3449594974517822, "learning_rate": 2.3664317347317382e-05, "loss": 0.3286, "step": 13224, "teacher_loss": 0.32683229446411133 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5010718107223511, "learning_rate": 2.3662463215548602e-05, "loss": 0.2413, "step": 13225, "teacher_loss": 0.21245194971561432 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.3536940813064575, "learning_rate": 2.3660608885175505e-05, "loss": 0.1708, "step": 13226, "teacher_loss": 0.15052807331085205 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.6104978919029236, "learning_rate": 2.3658754356240613e-05, "loss": 0.3089, "step": 13227, "teacher_loss": 0.27539628744125366 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.682197093963623, "learning_rate": 2.365689962878645e-05, "loss": 0.2431, "step": 13228, "teacher_loss": 0.19426822662353516 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.8661444187164307, "learning_rate": 2.3655044702855526e-05, "loss": 0.2865, "step": 13229, "teacher_loss": 0.22214969992637634 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.6998417973518372, "learning_rate": 2.365318957849038e-05, "loss": 0.3742, "step": 13230, "teacher_loss": 0.33803316950798035 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.6644032001495361, "learning_rate": 2.365133425573354e-05, "loss": 0.6048, "step": 13231, "teacher_loss": 0.5982170104980469 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5087882280349731, "learning_rate": 2.3649478734627543e-05, "loss": 0.3571, "step": 13232, "teacher_loss": 0.34019148349761963 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.12305235117673874, "learning_rate": 2.364762301521493e-05, "loss": 0.1624, "step": 13233, "teacher_loss": 0.1667397916316986 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5764172077178955, "learning_rate": 2.3645767097538252e-05, "loss": 0.3396, "step": 13234, "teacher_loss": 0.31329360604286194 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.11184930801391602, "learning_rate": 2.3643910981640052e-05, "loss": 0.2183, "step": 13235, "teacher_loss": 0.23007959127426147 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5248562097549438, "learning_rate": 2.364205466756289e-05, "loss": 0.233, "step": 13236, "teacher_loss": 0.20059174299240112 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5020205974578857, "learning_rate": 2.3640198155349324e-05, "loss": 0.3308, "step": 13237, "teacher_loss": 0.3118058443069458 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5215932130813599, "learning_rate": 2.363834144504192e-05, "loss": 0.2583, "step": 13238, "teacher_loss": 0.22909843921661377 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.7117658853530884, "learning_rate": 2.3636484536683246e-05, "loss": 0.3941, "step": 13239, "teacher_loss": 0.35885149240493774 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.7666230797767639, "learning_rate": 2.3634627430315874e-05, "loss": 0.5013, "step": 13240, "teacher_loss": 0.4717782735824585 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.8392297029495239, "learning_rate": 2.363277012598238e-05, "loss": 0.3495, "step": 13241, "teacher_loss": 0.295083224773407 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.3412531018257141, "learning_rate": 2.363091262372536e-05, "loss": 0.2044, "step": 13242, "teacher_loss": 0.18923211097717285 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.7438136339187622, "learning_rate": 2.362905492358738e-05, "loss": 0.3879, "step": 13243, "teacher_loss": 0.3483033776283264 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.21026575565338135, "learning_rate": 2.3627197025611046e-05, "loss": 0.2205, "step": 13244, "teacher_loss": 0.22163403034210205 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.3444966673851013, "learning_rate": 2.3625338929838952e-05, "loss": 0.2076, "step": 13245, "teacher_loss": 0.19241565465927124 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.3984840214252472, "learning_rate": 2.362348063631369e-05, "loss": 0.2926, "step": 13246, "teacher_loss": 0.28083133697509766 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5067142844200134, "learning_rate": 2.362162214507788e-05, "loss": 0.3486, "step": 13247, "teacher_loss": 0.33100634813308716 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.3130027949810028, "learning_rate": 2.3619763456174116e-05, "loss": 0.246, "step": 13248, "teacher_loss": 0.2385254055261612 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.47818103432655334, "learning_rate": 2.361790456964502e-05, "loss": 0.368, "step": 13249, "teacher_loss": 0.35578304529190063 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5812355279922485, "learning_rate": 2.361604548553321e-05, "loss": 0.3013, "step": 13250, "teacher_loss": 0.2701554596424103 }, { "epoch": 2.39, "eval_exact_match": 79.60264900662251, "eval_f1": 87.21485608055386, "step": 13250 }, { "compression_loss": 0.0, "epoch": 2.39, "label_loss": 0.5362725257873535, "learning_rate": 2.361418620388131e-05, "loss": 0.2659, "step": 13251, "teacher_loss": 0.23582231998443604 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.6154779195785522, "learning_rate": 2.361232672473195e-05, "loss": 0.2692, "step": 13252, "teacher_loss": 0.23071923851966858 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.30691590905189514, "learning_rate": 2.3610467048127758e-05, "loss": 0.225, "step": 13253, "teacher_loss": 0.21593570709228516 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.39117273688316345, "learning_rate": 2.3608607174111375e-05, "loss": 0.3015, "step": 13254, "teacher_loss": 0.2915579378604889 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.5042406320571899, "learning_rate": 2.3606747102725433e-05, "loss": 0.3296, "step": 13255, "teacher_loss": 0.3101460933685303 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.3229520320892334, "learning_rate": 2.3604886834012587e-05, "loss": 0.2132, "step": 13256, "teacher_loss": 0.2010575532913208 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.6501572132110596, "learning_rate": 2.3603026368015482e-05, "loss": 0.323, "step": 13257, "teacher_loss": 0.2866288125514984 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.6359395980834961, "learning_rate": 2.3601165704776784e-05, "loss": 0.2745, "step": 13258, "teacher_loss": 0.2343650907278061 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.44736701250076294, "learning_rate": 2.359930484433914e-05, "loss": 0.2478, "step": 13259, "teacher_loss": 0.2256748080253601 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.8413408398628235, "learning_rate": 2.359744378674521e-05, "loss": 0.3767, "step": 13260, "teacher_loss": 0.3251223564147949 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.1133200079202652, "learning_rate": 2.359558253203768e-05, "loss": 0.1862, "step": 13261, "teacher_loss": 0.19428731501102448 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.3415602445602417, "learning_rate": 2.359372108025921e-05, "loss": 0.1611, "step": 13262, "teacher_loss": 0.1410546600818634 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.6447021961212158, "learning_rate": 2.359185943145248e-05, "loss": 0.2584, "step": 13263, "teacher_loss": 0.21545115113258362 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.26609092950820923, "learning_rate": 2.3589997585660174e-05, "loss": 0.2162, "step": 13264, "teacher_loss": 0.21067406237125397 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.37561583518981934, "learning_rate": 2.358813554292498e-05, "loss": 0.2056, "step": 13265, "teacher_loss": 0.18672534823417664 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.29276537895202637, "learning_rate": 2.3586273303289584e-05, "loss": 0.2576, "step": 13266, "teacher_loss": 0.2536574602127075 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.6726639866828918, "learning_rate": 2.3584410866796687e-05, "loss": 0.3798, "step": 13267, "teacher_loss": 0.3472104072570801 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.3950142562389374, "learning_rate": 2.358254823348898e-05, "loss": 0.2666, "step": 13268, "teacher_loss": 0.25232625007629395 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.44658881425857544, "learning_rate": 2.3580685403409177e-05, "loss": 0.261, "step": 13269, "teacher_loss": 0.2403843104839325 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.7590472102165222, "learning_rate": 2.357882237659999e-05, "loss": 0.2677, "step": 13270, "teacher_loss": 0.2130640745162964 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.6913022398948669, "learning_rate": 2.3576959153104115e-05, "loss": 0.2862, "step": 13271, "teacher_loss": 0.24124255776405334 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.25179898738861084, "learning_rate": 2.357509573296429e-05, "loss": 0.2209, "step": 13272, "teacher_loss": 0.21751710772514343 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.16739974915981293, "learning_rate": 2.3573232116223224e-05, "loss": 0.1898, "step": 13273, "teacher_loss": 0.19231358170509338 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.40578174591064453, "learning_rate": 2.3571368302923656e-05, "loss": 0.2363, "step": 13274, "teacher_loss": 0.21745160222053528 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.1465100646018982, "learning_rate": 2.3569504293108304e-05, "loss": 0.2661, "step": 13275, "teacher_loss": 0.27939239144325256 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.46484997868537903, "learning_rate": 2.3567640086819915e-05, "loss": 0.3472, "step": 13276, "teacher_loss": 0.33410775661468506 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.2843043804168701, "learning_rate": 2.356577568410123e-05, "loss": 0.2406, "step": 13277, "teacher_loss": 0.23579701781272888 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.3907982110977173, "learning_rate": 2.3563911084994986e-05, "loss": 0.3182, "step": 13278, "teacher_loss": 0.3101189136505127 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.416643351316452, "learning_rate": 2.3562046289543934e-05, "loss": 0.2802, "step": 13279, "teacher_loss": 0.264987587928772 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 1.1521563529968262, "learning_rate": 2.3560181297790834e-05, "loss": 0.3197, "step": 13280, "teacher_loss": 0.22725586593151093 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.30872660875320435, "learning_rate": 2.3558316109778436e-05, "loss": 0.2603, "step": 13281, "teacher_loss": 0.2549632787704468 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.4649592339992523, "learning_rate": 2.3556450725549514e-05, "loss": 0.202, "step": 13282, "teacher_loss": 0.1727590560913086 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.6302200555801392, "learning_rate": 2.3554585145146833e-05, "loss": 0.3153, "step": 13283, "teacher_loss": 0.28030288219451904 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.5839126110076904, "learning_rate": 2.3552719368613158e-05, "loss": 0.2834, "step": 13284, "teacher_loss": 0.25003737211227417 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.37018340826034546, "learning_rate": 2.355085339599127e-05, "loss": 0.2312, "step": 13285, "teacher_loss": 0.21577343344688416 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.3876428008079529, "learning_rate": 2.3548987227323957e-05, "loss": 0.1963, "step": 13286, "teacher_loss": 0.1750679612159729 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.25159209966659546, "learning_rate": 2.3547120862653995e-05, "loss": 0.2217, "step": 13287, "teacher_loss": 0.218379408121109 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.41399773955345154, "learning_rate": 2.3545254302024177e-05, "loss": 0.234, "step": 13288, "teacher_loss": 0.21398620307445526 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.2447696328163147, "learning_rate": 2.3543387545477295e-05, "loss": 0.1677, "step": 13289, "teacher_loss": 0.15912030637264252 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.24025991559028625, "learning_rate": 2.354152059305615e-05, "loss": 0.1992, "step": 13290, "teacher_loss": 0.19461937248706818 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.07693510502576828, "learning_rate": 2.3539653444803552e-05, "loss": 0.1713, "step": 13291, "teacher_loss": 0.1818252056837082 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.4016713500022888, "learning_rate": 2.35377861007623e-05, "loss": 0.2479, "step": 13292, "teacher_loss": 0.23080652952194214 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.38348233699798584, "learning_rate": 2.353591856097521e-05, "loss": 0.2463, "step": 13293, "teacher_loss": 0.23110228776931763 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.2720690071582794, "learning_rate": 2.3534050825485102e-05, "loss": 0.1789, "step": 13294, "teacher_loss": 0.16853559017181396 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.7292126417160034, "learning_rate": 2.3532182894334793e-05, "loss": 0.3442, "step": 13295, "teacher_loss": 0.30139869451522827 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.4321862757205963, "learning_rate": 2.353031476756711e-05, "loss": 0.2281, "step": 13296, "teacher_loss": 0.2054426074028015 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.6623647212982178, "learning_rate": 2.352844644522489e-05, "loss": 0.347, "step": 13297, "teacher_loss": 0.3119175434112549 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.30049002170562744, "learning_rate": 2.3526577927350956e-05, "loss": 0.1817, "step": 13298, "teacher_loss": 0.16851076483726501 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.48915815353393555, "learning_rate": 2.352470921398816e-05, "loss": 0.2795, "step": 13299, "teacher_loss": 0.2562389373779297 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.7183676362037659, "learning_rate": 2.3522840305179334e-05, "loss": 0.2722, "step": 13300, "teacher_loss": 0.2225809544324875 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.43166446685791016, "learning_rate": 2.3520971200967337e-05, "loss": 0.3333, "step": 13301, "teacher_loss": 0.3224182724952698 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.35628020763397217, "learning_rate": 2.3519101901395016e-05, "loss": 0.2392, "step": 13302, "teacher_loss": 0.22622382640838623 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.22884929180145264, "learning_rate": 2.3517232406505233e-05, "loss": 0.3245, "step": 13303, "teacher_loss": 0.33512285351753235 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.6160867214202881, "learning_rate": 2.3515362716340844e-05, "loss": 0.2689, "step": 13304, "teacher_loss": 0.2303493618965149 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.6933183670043945, "learning_rate": 2.3513492830944718e-05, "loss": 0.2939, "step": 13305, "teacher_loss": 0.24950064718723297 }, { "compression_loss": 0.0, "epoch": 2.4, "label_loss": 0.4467252492904663, "learning_rate": 2.3511622750359726e-05, "loss": 0.2875, "step": 13306, "teacher_loss": 0.2698519229888916 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.2195822298526764, "learning_rate": 2.3509752474628744e-05, "loss": 0.2231, "step": 13307, "teacher_loss": 0.22346210479736328 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.45221322774887085, "learning_rate": 2.3507882003794653e-05, "loss": 0.3031, "step": 13308, "teacher_loss": 0.2864859104156494 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.5276502370834351, "learning_rate": 2.3506011337900333e-05, "loss": 0.3065, "step": 13309, "teacher_loss": 0.2819198668003082 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.4648154675960541, "learning_rate": 2.3504140476988678e-05, "loss": 0.249, "step": 13310, "teacher_loss": 0.2250230759382248 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.5794755220413208, "learning_rate": 2.3502269421102578e-05, "loss": 0.2803, "step": 13311, "teacher_loss": 0.247026726603508 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.4667815566062927, "learning_rate": 2.3500398170284926e-05, "loss": 0.2498, "step": 13312, "teacher_loss": 0.2256973683834076 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.4610523581504822, "learning_rate": 2.3498526724578637e-05, "loss": 0.3301, "step": 13313, "teacher_loss": 0.31560200452804565 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.7091777920722961, "learning_rate": 2.349665508402661e-05, "loss": 0.3493, "step": 13314, "teacher_loss": 0.3093149662017822 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.12012657523155212, "learning_rate": 2.3494783248671747e-05, "loss": 0.1756, "step": 13315, "teacher_loss": 0.18178194761276245 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.5337127447128296, "learning_rate": 2.349291121855698e-05, "loss": 0.3213, "step": 13316, "teacher_loss": 0.29771876335144043 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.5708508491516113, "learning_rate": 2.349103899372522e-05, "loss": 0.2701, "step": 13317, "teacher_loss": 0.2366747409105301 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.8888444900512695, "learning_rate": 2.3489166574219396e-05, "loss": 0.5195, "step": 13318, "teacher_loss": 0.47845685482025146 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.4197937548160553, "learning_rate": 2.3487293960082435e-05, "loss": 0.3263, "step": 13319, "teacher_loss": 0.3159257173538208 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.2690635025501251, "learning_rate": 2.3485421151357264e-05, "loss": 0.1844, "step": 13320, "teacher_loss": 0.17497417330741882 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.5177335739135742, "learning_rate": 2.3483548148086832e-05, "loss": 0.4386, "step": 13321, "teacher_loss": 0.42978376150131226 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.6527668237686157, "learning_rate": 2.348167495031407e-05, "loss": 0.3442, "step": 13322, "teacher_loss": 0.3098776340484619 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.408937931060791, "learning_rate": 2.3479801558081936e-05, "loss": 0.3662, "step": 13323, "teacher_loss": 0.3614322543144226 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.19425126910209656, "learning_rate": 2.3477927971433374e-05, "loss": 0.23, "step": 13324, "teacher_loss": 0.23392419517040253 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.36549636721611023, "learning_rate": 2.3476054190411344e-05, "loss": 0.2518, "step": 13325, "teacher_loss": 0.2392052263021469 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.646171510219574, "learning_rate": 2.3474180215058804e-05, "loss": 0.2778, "step": 13326, "teacher_loss": 0.23688417673110962 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.5570650100708008, "learning_rate": 2.3472306045418715e-05, "loss": 0.4014, "step": 13327, "teacher_loss": 0.3841387629508972 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.23006096482276917, "learning_rate": 2.3470431681534052e-05, "loss": 0.2965, "step": 13328, "teacher_loss": 0.3038666248321533 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.3316557705402374, "learning_rate": 2.3468557123447784e-05, "loss": 0.324, "step": 13329, "teacher_loss": 0.32318708300590515 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.2770814895629883, "learning_rate": 2.3466682371202896e-05, "loss": 0.2277, "step": 13330, "teacher_loss": 0.22219160199165344 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.5649198293685913, "learning_rate": 2.346480742484236e-05, "loss": 0.6044, "step": 13331, "teacher_loss": 0.6087542772293091 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.20918157696723938, "learning_rate": 2.3462932284409174e-05, "loss": 0.1894, "step": 13332, "teacher_loss": 0.18716877698898315 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.47164082527160645, "learning_rate": 2.346105694994632e-05, "loss": 0.219, "step": 13333, "teacher_loss": 0.19089549779891968 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.20764794945716858, "learning_rate": 2.3459181421496797e-05, "loss": 0.2172, "step": 13334, "teacher_loss": 0.218232661485672 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.26438969373703003, "learning_rate": 2.345730569910361e-05, "loss": 0.2291, "step": 13335, "teacher_loss": 0.2252292037010193 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.7862716317176819, "learning_rate": 2.3455429782809756e-05, "loss": 0.8569, "step": 13336, "teacher_loss": 0.8647250533103943 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.9031293392181396, "learning_rate": 2.345355367265825e-05, "loss": 0.34, "step": 13337, "teacher_loss": 0.2773993909358978 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.8896418809890747, "learning_rate": 2.3451677368692098e-05, "loss": 0.9277, "step": 13338, "teacher_loss": 0.9319241642951965 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.4690718650817871, "learning_rate": 2.344980087095433e-05, "loss": 0.3052, "step": 13339, "teacher_loss": 0.2869587540626526 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.5100768804550171, "learning_rate": 2.344792417948796e-05, "loss": 0.2902, "step": 13340, "teacher_loss": 0.2657606601715088 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.40496814250946045, "learning_rate": 2.3446047294336018e-05, "loss": 0.3124, "step": 13341, "teacher_loss": 0.3021667003631592 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.4826464056968689, "learning_rate": 2.3444170215541533e-05, "loss": 0.2932, "step": 13342, "teacher_loss": 0.27211669087409973 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.5637249946594238, "learning_rate": 2.3442292943147543e-05, "loss": 0.338, "step": 13343, "teacher_loss": 0.31291282176971436 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.24867957830429077, "learning_rate": 2.3440415477197083e-05, "loss": 0.1922, "step": 13344, "teacher_loss": 0.18588200211524963 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.6272789835929871, "learning_rate": 2.3438537817733204e-05, "loss": 0.3343, "step": 13345, "teacher_loss": 0.301755428314209 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.3251725435256958, "learning_rate": 2.3436659964798953e-05, "loss": 0.3287, "step": 13346, "teacher_loss": 0.3290470242500305 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.6259438395500183, "learning_rate": 2.3434781918437384e-05, "loss": 0.2872, "step": 13347, "teacher_loss": 0.24957610666751862 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.3182956576347351, "learning_rate": 2.3432903678691555e-05, "loss": 0.2385, "step": 13348, "teacher_loss": 0.22966709733009338 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.5025835037231445, "learning_rate": 2.3431025245604526e-05, "loss": 0.2478, "step": 13349, "teacher_loss": 0.21952247619628906 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.30710211396217346, "learning_rate": 2.342914661921937e-05, "loss": 0.2076, "step": 13350, "teacher_loss": 0.19656185805797577 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.6085900068283081, "learning_rate": 2.342726779957915e-05, "loss": 0.2964, "step": 13351, "teacher_loss": 0.26168394088745117 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.3433447480201721, "learning_rate": 2.3425388786726944e-05, "loss": 0.1928, "step": 13352, "teacher_loss": 0.17605873942375183 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.48761940002441406, "learning_rate": 2.3423509580705838e-05, "loss": 0.2631, "step": 13353, "teacher_loss": 0.23814114928245544 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.6748328804969788, "learning_rate": 2.342163018155891e-05, "loss": 0.2722, "step": 13354, "teacher_loss": 0.22741976380348206 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.3338778614997864, "learning_rate": 2.341975058932925e-05, "loss": 0.2723, "step": 13355, "teacher_loss": 0.2654338777065277 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.88533616065979, "learning_rate": 2.3417870804059953e-05, "loss": 0.2671, "step": 13356, "teacher_loss": 0.19835810363292694 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.9712544679641724, "learning_rate": 2.3415990825794118e-05, "loss": 0.2783, "step": 13357, "teacher_loss": 0.2012614756822586 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.46000081300735474, "learning_rate": 2.341411065457484e-05, "loss": 0.2631, "step": 13358, "teacher_loss": 0.24117402732372284 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.7524264454841614, "learning_rate": 2.341223029044524e-05, "loss": 0.4171, "step": 13359, "teacher_loss": 0.3798047602176666 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.23651687800884247, "learning_rate": 2.341034973344842e-05, "loss": 0.239, "step": 13360, "teacher_loss": 0.23926573991775513 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.26024967432022095, "learning_rate": 2.3408468983627493e-05, "loss": 0.2254, "step": 13361, "teacher_loss": 0.22150051593780518 }, { "compression_loss": 0.0, "epoch": 2.41, "label_loss": 0.31224629282951355, "learning_rate": 2.3406588041025584e-05, "loss": 0.2441, "step": 13362, "teacher_loss": 0.23649385571479797 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.24041607975959778, "learning_rate": 2.340470690568581e-05, "loss": 0.2517, "step": 13363, "teacher_loss": 0.2529626488685608 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.699574887752533, "learning_rate": 2.340282557765131e-05, "loss": 0.3712, "step": 13364, "teacher_loss": 0.33475714921951294 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.28519248962402344, "learning_rate": 2.3400944056965217e-05, "loss": 0.1575, "step": 13365, "teacher_loss": 0.14330917596817017 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 1.6159348487854004, "learning_rate": 2.339906234367066e-05, "loss": 1.1932, "step": 13366, "teacher_loss": 1.1461822986602783 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.743181049823761, "learning_rate": 2.339718043781078e-05, "loss": 0.291, "step": 13367, "teacher_loss": 0.2407325804233551 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.6673940420150757, "learning_rate": 2.3395298339428735e-05, "loss": 0.3579, "step": 13368, "teacher_loss": 0.3235345780849457 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.5583533048629761, "learning_rate": 2.3393416048567666e-05, "loss": 0.3667, "step": 13369, "teacher_loss": 0.34545665979385376 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.37425071001052856, "learning_rate": 2.3391533565270736e-05, "loss": 0.2678, "step": 13370, "teacher_loss": 0.25599926710128784 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.3710380494594574, "learning_rate": 2.3389650889581097e-05, "loss": 0.2606, "step": 13371, "teacher_loss": 0.2483477145433426 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.49056482315063477, "learning_rate": 2.3387768021541914e-05, "loss": 0.2498, "step": 13372, "teacher_loss": 0.22305841743946075 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.25216203927993774, "learning_rate": 2.338588496119636e-05, "loss": 0.2029, "step": 13373, "teacher_loss": 0.1974402517080307 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.3505750894546509, "learning_rate": 2.338400170858761e-05, "loss": 0.2536, "step": 13374, "teacher_loss": 0.24279339611530304 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.5070775747299194, "learning_rate": 2.3382118263758836e-05, "loss": 0.2596, "step": 13375, "teacher_loss": 0.23207318782806396 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.47342830896377563, "learning_rate": 2.338023462675322e-05, "loss": 0.2571, "step": 13376, "teacher_loss": 0.2330133020877838 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.3738238215446472, "learning_rate": 2.3378350797613948e-05, "loss": 0.2336, "step": 13377, "teacher_loss": 0.21803408861160278 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.7364017963409424, "learning_rate": 2.3376466776384212e-05, "loss": 0.2842, "step": 13378, "teacher_loss": 0.23400413990020752 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.324739545583725, "learning_rate": 2.3374582563107207e-05, "loss": 0.2768, "step": 13379, "teacher_loss": 0.2714645266532898 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.4032590091228485, "learning_rate": 2.3372698157826136e-05, "loss": 0.2588, "step": 13380, "teacher_loss": 0.242717906832695 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.31986749172210693, "learning_rate": 2.3370813560584194e-05, "loss": 0.2594, "step": 13381, "teacher_loss": 0.252629816532135 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.3156262934207916, "learning_rate": 2.3368928771424597e-05, "loss": 0.3198, "step": 13382, "teacher_loss": 0.32029280066490173 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.07356128096580505, "learning_rate": 2.3367043790390552e-05, "loss": 0.1474, "step": 13383, "teacher_loss": 0.15559235215187073 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.45045191049575806, "learning_rate": 2.336515861752528e-05, "loss": 0.3141, "step": 13384, "teacher_loss": 0.2990008592605591 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.6026839017868042, "learning_rate": 2.3363273252872003e-05, "loss": 0.3588, "step": 13385, "teacher_loss": 0.33167368173599243 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.5101771354675293, "learning_rate": 2.336138769647394e-05, "loss": 0.2728, "step": 13386, "teacher_loss": 0.24644535779953003 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.8842732906341553, "learning_rate": 2.3359501948374332e-05, "loss": 0.3238, "step": 13387, "teacher_loss": 0.2615572214126587 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 1.1212352514266968, "learning_rate": 2.3357616008616404e-05, "loss": 0.4686, "step": 13388, "teacher_loss": 0.3960660696029663 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.38443708419799805, "learning_rate": 2.3355729877243394e-05, "loss": 0.4303, "step": 13389, "teacher_loss": 0.4354340732097626 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.41377824544906616, "learning_rate": 2.3353843554298555e-05, "loss": 0.2278, "step": 13390, "teacher_loss": 0.2071402668952942 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.31958824396133423, "learning_rate": 2.335195703982513e-05, "loss": 0.2569, "step": 13391, "teacher_loss": 0.24990540742874146 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.8397213220596313, "learning_rate": 2.3350070333866367e-05, "loss": 0.3171, "step": 13392, "teacher_loss": 0.2589814066886902 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.598009467124939, "learning_rate": 2.334818343646553e-05, "loss": 0.7791, "step": 13393, "teacher_loss": 0.7992147207260132 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.6784560680389404, "learning_rate": 2.3346296347665872e-05, "loss": 0.2982, "step": 13394, "teacher_loss": 0.25590789318084717 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.48372238874435425, "learning_rate": 2.3344409067510665e-05, "loss": 0.2908, "step": 13395, "teacher_loss": 0.269344687461853 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.3220786452293396, "learning_rate": 2.3342521596043176e-05, "loss": 0.3307, "step": 13396, "teacher_loss": 0.3316769599914551 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.30690819025039673, "learning_rate": 2.3340633933306677e-05, "loss": 0.1833, "step": 13397, "teacher_loss": 0.16959123313426971 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.6358180046081543, "learning_rate": 2.333874607934445e-05, "loss": 0.2628, "step": 13398, "teacher_loss": 0.22134973108768463 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.1271497905254364, "learning_rate": 2.3336858034199774e-05, "loss": 0.1772, "step": 13399, "teacher_loss": 0.18276116251945496 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.5849890112876892, "learning_rate": 2.333496979791594e-05, "loss": 0.2801, "step": 13400, "teacher_loss": 0.24623671174049377 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.43503814935684204, "learning_rate": 2.3333081370536236e-05, "loss": 0.2974, "step": 13401, "teacher_loss": 0.28212088346481323 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.454201877117157, "learning_rate": 2.333119275210397e-05, "loss": 0.3415, "step": 13402, "teacher_loss": 0.3290107250213623 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.28829506039619446, "learning_rate": 2.332930394266242e-05, "loss": 0.2138, "step": 13403, "teacher_loss": 0.2055424600839615 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.40818512439727783, "learning_rate": 2.332741494225491e-05, "loss": 0.3488, "step": 13404, "teacher_loss": 0.3421688675880432 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.4157203435897827, "learning_rate": 2.3325525750924737e-05, "loss": 0.3272, "step": 13405, "teacher_loss": 0.31736963987350464 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.39925360679626465, "learning_rate": 2.3323636368715227e-05, "loss": 0.2416, "step": 13406, "teacher_loss": 0.22404126822948456 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.32423126697540283, "learning_rate": 2.332174679566968e-05, "loss": 0.2564, "step": 13407, "teacher_loss": 0.24884669482707977 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.4168992042541504, "learning_rate": 2.331985703183144e-05, "loss": 0.1689, "step": 13408, "teacher_loss": 0.14130720496177673 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.3969237208366394, "learning_rate": 2.3317967077243817e-05, "loss": 0.2681, "step": 13409, "teacher_loss": 0.25382718443870544 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.6571687459945679, "learning_rate": 2.3316076931950148e-05, "loss": 0.5307, "step": 13410, "teacher_loss": 0.5166052579879761 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.4727189242839813, "learning_rate": 2.3314186595993767e-05, "loss": 0.3607, "step": 13411, "teacher_loss": 0.3482898473739624 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.2935327887535095, "learning_rate": 2.3312296069418015e-05, "loss": 0.1832, "step": 13412, "teacher_loss": 0.17095747590065002 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.6671868562698364, "learning_rate": 2.3310405352266237e-05, "loss": 0.3581, "step": 13413, "teacher_loss": 0.32378774881362915 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.18998435139656067, "learning_rate": 2.3308514444581784e-05, "loss": 0.191, "step": 13414, "teacher_loss": 0.1911236196756363 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.41326427459716797, "learning_rate": 2.3306623346407996e-05, "loss": 0.2871, "step": 13415, "teacher_loss": 0.2730720341205597 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.2343122661113739, "learning_rate": 2.330473205778825e-05, "loss": 0.3045, "step": 13416, "teacher_loss": 0.312247633934021 }, { "compression_loss": 0.0, "epoch": 2.42, "label_loss": 0.5434048771858215, "learning_rate": 2.3302840578765886e-05, "loss": 0.2699, "step": 13417, "teacher_loss": 0.23954391479492188 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.3696182370185852, "learning_rate": 2.3300948909384283e-05, "loss": 0.2059, "step": 13418, "teacher_loss": 0.18775790929794312 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.29339170455932617, "learning_rate": 2.3299057049686815e-05, "loss": 0.2122, "step": 13419, "teacher_loss": 0.20322604477405548 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.27169322967529297, "learning_rate": 2.3297164999716846e-05, "loss": 0.2363, "step": 13420, "teacher_loss": 0.23241224884986877 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.42531514167785645, "learning_rate": 2.3295272759517758e-05, "loss": 0.3378, "step": 13421, "teacher_loss": 0.3281002640724182 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.27721619606018066, "learning_rate": 2.329338032913294e-05, "loss": 0.2178, "step": 13422, "teacher_loss": 0.2112330049276352 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.30392444133758545, "learning_rate": 2.3291487708605775e-05, "loss": 0.2082, "step": 13423, "teacher_loss": 0.19759348034858704 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.2887619137763977, "learning_rate": 2.3289594897979656e-05, "loss": 0.2316, "step": 13424, "teacher_loss": 0.22525274753570557 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.557485818862915, "learning_rate": 2.3287701897297983e-05, "loss": 0.399, "step": 13425, "teacher_loss": 0.3814348876476288 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.5290688276290894, "learning_rate": 2.3285808706604146e-05, "loss": 0.2347, "step": 13426, "teacher_loss": 0.2020326852798462 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.5916503667831421, "learning_rate": 2.328391532594156e-05, "loss": 0.2481, "step": 13427, "teacher_loss": 0.20994271337985992 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.3555203080177307, "learning_rate": 2.3282021755353637e-05, "loss": 0.1747, "step": 13428, "teacher_loss": 0.1546405851840973 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.2876202166080475, "learning_rate": 2.328012799488378e-05, "loss": 0.2505, "step": 13429, "teacher_loss": 0.24635246396064758 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.5434819459915161, "learning_rate": 2.3278234044575414e-05, "loss": 0.3619, "step": 13430, "teacher_loss": 0.3416920304298401 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.336340069770813, "learning_rate": 2.3276339904471965e-05, "loss": 0.2256, "step": 13431, "teacher_loss": 0.21328291296958923 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.20888549089431763, "learning_rate": 2.3274445574616853e-05, "loss": 0.1931, "step": 13432, "teacher_loss": 0.1913781315088272 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.4624791145324707, "learning_rate": 2.327255105505351e-05, "loss": 0.3213, "step": 13433, "teacher_loss": 0.30563169717788696 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.37911367416381836, "learning_rate": 2.327065634582538e-05, "loss": 0.2878, "step": 13434, "teacher_loss": 0.27770939469337463 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.4340216815471649, "learning_rate": 2.3268761446975888e-05, "loss": 0.3846, "step": 13435, "teacher_loss": 0.37915581464767456 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.5789175033569336, "learning_rate": 2.326686635854849e-05, "loss": 0.2755, "step": 13436, "teacher_loss": 0.2418239712715149 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.6702301502227783, "learning_rate": 2.3264971080586636e-05, "loss": 0.3156, "step": 13437, "teacher_loss": 0.27623870968818665 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.13419650495052338, "learning_rate": 2.326307561313377e-05, "loss": 0.1516, "step": 13438, "teacher_loss": 0.15357431769371033 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.476350873708725, "learning_rate": 2.326117995623336e-05, "loss": 0.261, "step": 13439, "teacher_loss": 0.2370595633983612 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.4550631046295166, "learning_rate": 2.3259284109928853e-05, "loss": 0.2846, "step": 13440, "teacher_loss": 0.2656833231449127 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.6323797702789307, "learning_rate": 2.325738807426373e-05, "loss": 0.3645, "step": 13441, "teacher_loss": 0.33470118045806885 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.726628303527832, "learning_rate": 2.3255491849281454e-05, "loss": 0.4423, "step": 13442, "teacher_loss": 0.41070467233657837 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.5524322390556335, "learning_rate": 2.3253595435025503e-05, "loss": 0.3422, "step": 13443, "teacher_loss": 0.3188698887825012 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.28182151913642883, "learning_rate": 2.3251698831539353e-05, "loss": 0.2372, "step": 13444, "teacher_loss": 0.23220627009868622 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.5198459029197693, "learning_rate": 2.3249802038866487e-05, "loss": 0.2353, "step": 13445, "teacher_loss": 0.20369216799736023 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 1.5448665618896484, "learning_rate": 2.3247905057050395e-05, "loss": 0.4856, "step": 13446, "teacher_loss": 0.36793047189712524 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.5161502361297607, "learning_rate": 2.324600788613457e-05, "loss": 0.354, "step": 13447, "teacher_loss": 0.33603590726852417 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.462638795375824, "learning_rate": 2.324411052616251e-05, "loss": 0.3547, "step": 13448, "teacher_loss": 0.34273117780685425 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 1.106665015220642, "learning_rate": 2.3242212977177705e-05, "loss": 0.3577, "step": 13449, "teacher_loss": 0.27451103925704956 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.25974568724632263, "learning_rate": 2.3240315239223676e-05, "loss": 0.321, "step": 13450, "teacher_loss": 0.3278290033340454 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.4112575054168701, "learning_rate": 2.3238417312343922e-05, "loss": 0.2067, "step": 13451, "teacher_loss": 0.18399155139923096 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.34670233726501465, "learning_rate": 2.3236519196581953e-05, "loss": 0.2298, "step": 13452, "teacher_loss": 0.21681416034698486 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.4199913740158081, "learning_rate": 2.3234620891981297e-05, "loss": 0.3213, "step": 13453, "teacher_loss": 0.31032997369766235 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.44018006324768066, "learning_rate": 2.3232722398585474e-05, "loss": 0.2141, "step": 13454, "teacher_loss": 0.18896540999412537 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.28467023372650146, "learning_rate": 2.323082371643801e-05, "loss": 0.2231, "step": 13455, "teacher_loss": 0.21629387140274048 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.38557907938957214, "learning_rate": 2.3228924845582433e-05, "loss": 0.2445, "step": 13456, "teacher_loss": 0.22882162034511566 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.2279781699180603, "learning_rate": 2.322702578606228e-05, "loss": 0.1784, "step": 13457, "teacher_loss": 0.17284482717514038 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.6300699710845947, "learning_rate": 2.3225126537921094e-05, "loss": 0.3979, "step": 13458, "teacher_loss": 0.3721427321434021 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.23973649740219116, "learning_rate": 2.3223227101202418e-05, "loss": 0.2273, "step": 13459, "teacher_loss": 0.2258821725845337 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.4223008453845978, "learning_rate": 2.32213274759498e-05, "loss": 0.267, "step": 13460, "teacher_loss": 0.24971584975719452 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.42848461866378784, "learning_rate": 2.3219427662206788e-05, "loss": 0.1849, "step": 13461, "teacher_loss": 0.1578855812549591 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.3234953284263611, "learning_rate": 2.3217527660016942e-05, "loss": 0.2099, "step": 13462, "teacher_loss": 0.19728602468967438 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.3779809772968292, "learning_rate": 2.321562746942383e-05, "loss": 0.1923, "step": 13463, "teacher_loss": 0.17169684171676636 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.42064130306243896, "learning_rate": 2.3213727090471008e-05, "loss": 0.2904, "step": 13464, "teacher_loss": 0.27595970034599304 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.8038947582244873, "learning_rate": 2.3211826523202048e-05, "loss": 0.2557, "step": 13465, "teacher_loss": 0.19479334354400635 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.659406304359436, "learning_rate": 2.3209925767660527e-05, "loss": 0.3999, "step": 13466, "teacher_loss": 0.37101155519485474 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.6156027317047119, "learning_rate": 2.3208024823890026e-05, "loss": 0.2063, "step": 13467, "teacher_loss": 0.16078908741474152 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.48168832063674927, "learning_rate": 2.3206123691934117e-05, "loss": 0.1995, "step": 13468, "teacher_loss": 0.16809991002082825 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.23847299814224243, "learning_rate": 2.320422237183641e-05, "loss": 0.2532, "step": 13469, "teacher_loss": 0.25480973720550537 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.3450428545475006, "learning_rate": 2.3202320863640466e-05, "loss": 0.3011, "step": 13470, "teacher_loss": 0.2962424159049988 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.4589805603027344, "learning_rate": 2.32004191673899e-05, "loss": 0.2563, "step": 13471, "teacher_loss": 0.2337377965450287 }, { "compression_loss": 0.0, "epoch": 2.43, "label_loss": 0.5096904039382935, "learning_rate": 2.3198517283128316e-05, "loss": 0.269, "step": 13472, "teacher_loss": 0.24228110909461975 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.4801781177520752, "learning_rate": 2.31966152108993e-05, "loss": 0.2615, "step": 13473, "teacher_loss": 0.23724089562892914 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.17093929648399353, "learning_rate": 2.3194712950746483e-05, "loss": 0.2058, "step": 13474, "teacher_loss": 0.20963357388973236 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.7945429086685181, "learning_rate": 2.319281050271346e-05, "loss": 0.6282, "step": 13475, "teacher_loss": 0.6097357273101807 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.34345659613609314, "learning_rate": 2.319090786684386e-05, "loss": 0.206, "step": 13476, "teacher_loss": 0.1907261312007904 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.22912493348121643, "learning_rate": 2.3189005043181297e-05, "loss": 0.1675, "step": 13477, "teacher_loss": 0.16064202785491943 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.5353869795799255, "learning_rate": 2.3187102031769403e-05, "loss": 0.2536, "step": 13478, "teacher_loss": 0.2222995162010193 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.4261696934700012, "learning_rate": 2.3185198832651802e-05, "loss": 0.2257, "step": 13479, "teacher_loss": 0.20341119170188904 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.6609184741973877, "learning_rate": 2.3183295445872135e-05, "loss": 0.2756, "step": 13480, "teacher_loss": 0.2327386736869812 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.46647441387176514, "learning_rate": 2.3181391871474036e-05, "loss": 0.3196, "step": 13481, "teacher_loss": 0.30325138568878174 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.5660653114318848, "learning_rate": 2.317948810950115e-05, "loss": 0.2939, "step": 13482, "teacher_loss": 0.2636072039604187 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.2720910906791687, "learning_rate": 2.317758415999713e-05, "loss": 0.1981, "step": 13483, "teacher_loss": 0.1898963451385498 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.3639522194862366, "learning_rate": 2.317568002300562e-05, "loss": 0.2676, "step": 13484, "teacher_loss": 0.2568461298942566 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.3364434838294983, "learning_rate": 2.3173775698570277e-05, "loss": 0.2153, "step": 13485, "teacher_loss": 0.20181438326835632 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.41619861125946045, "learning_rate": 2.3171871186734764e-05, "loss": 0.3277, "step": 13486, "teacher_loss": 0.317893922328949 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.8406264781951904, "learning_rate": 2.3169966487542746e-05, "loss": 0.2715, "step": 13487, "teacher_loss": 0.20826585590839386 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.2673688232898712, "learning_rate": 2.31680616010379e-05, "loss": 0.2173, "step": 13488, "teacher_loss": 0.21178071200847626 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.41777849197387695, "learning_rate": 2.3166156527263876e-05, "loss": 0.2386, "step": 13489, "teacher_loss": 0.21866193413734436 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.2996369004249573, "learning_rate": 2.3164251266264374e-05, "loss": 0.255, "step": 13490, "teacher_loss": 0.2500323951244354 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.4252222776412964, "learning_rate": 2.3162345818083065e-05, "loss": 0.2196, "step": 13491, "teacher_loss": 0.19680823385715485 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.5988937616348267, "learning_rate": 2.316044018276364e-05, "loss": 0.3332, "step": 13492, "teacher_loss": 0.30370596051216125 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.3215354382991791, "learning_rate": 2.315853436034979e-05, "loss": 0.2342, "step": 13493, "teacher_loss": 0.22448039054870605 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.4576791524887085, "learning_rate": 2.3156628350885205e-05, "loss": 0.3783, "step": 13494, "teacher_loss": 0.3694588541984558 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.3960202932357788, "learning_rate": 2.3154722154413585e-05, "loss": 0.2449, "step": 13495, "teacher_loss": 0.22812967002391815 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.565636932849884, "learning_rate": 2.315281577097864e-05, "loss": 0.279, "step": 13496, "teacher_loss": 0.24714219570159912 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.4581831097602844, "learning_rate": 2.315090920062407e-05, "loss": 0.2982, "step": 13497, "teacher_loss": 0.2804328203201294 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.4776240587234497, "learning_rate": 2.314900244339359e-05, "loss": 0.301, "step": 13498, "teacher_loss": 0.2813946008682251 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.7765868306159973, "learning_rate": 2.314709549933092e-05, "loss": 0.2592, "step": 13499, "teacher_loss": 0.20169463753700256 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.5115863680839539, "learning_rate": 2.314518836847977e-05, "loss": 0.4981, "step": 13500, "teacher_loss": 0.49662870168685913 }, { "epoch": 2.44, "eval_exact_match": 79.82024597918638, "eval_f1": 87.37283128266982, "step": 13500 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.7831138372421265, "learning_rate": 2.3143281050883872e-05, "loss": 0.3632, "step": 13501, "teacher_loss": 0.31658798456192017 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.8992929458618164, "learning_rate": 2.314137354658696e-05, "loss": 0.2628, "step": 13502, "teacher_loss": 0.19202309846878052 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.31505823135375977, "learning_rate": 2.3139465855632753e-05, "loss": 0.2832, "step": 13503, "teacher_loss": 0.27961137890815735 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.8971832990646362, "learning_rate": 2.3137557978065003e-05, "loss": 0.3337, "step": 13504, "teacher_loss": 0.27111852169036865 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.6072332859039307, "learning_rate": 2.3135649913927445e-05, "loss": 0.3208, "step": 13505, "teacher_loss": 0.2889575660228729 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.3238345682621002, "learning_rate": 2.3133741663263827e-05, "loss": 0.2501, "step": 13506, "teacher_loss": 0.24186615645885468 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.38973528146743774, "learning_rate": 2.31318332261179e-05, "loss": 0.2776, "step": 13507, "teacher_loss": 0.2650982141494751 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.5254389643669128, "learning_rate": 2.3129924602533413e-05, "loss": 0.2597, "step": 13508, "teacher_loss": 0.23022761940956116 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.4465681314468384, "learning_rate": 2.3128015792554137e-05, "loss": 0.2886, "step": 13509, "teacher_loss": 0.2710148096084595 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.49445420503616333, "learning_rate": 2.3126106796223823e-05, "loss": 0.227, "step": 13510, "teacher_loss": 0.19727496802806854 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.6732682585716248, "learning_rate": 2.312419761358624e-05, "loss": 0.6361, "step": 13511, "teacher_loss": 0.6319177150726318 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.62847900390625, "learning_rate": 2.312228824468517e-05, "loss": 0.2597, "step": 13512, "teacher_loss": 0.2186773121356964 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.2787730395793915, "learning_rate": 2.312037868956438e-05, "loss": 0.2234, "step": 13513, "teacher_loss": 0.21728336811065674 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.6420366168022156, "learning_rate": 2.3118468948267653e-05, "loss": 0.2894, "step": 13514, "teacher_loss": 0.2502191960811615 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.15983831882476807, "learning_rate": 2.3116559020838772e-05, "loss": 0.1696, "step": 13515, "teacher_loss": 0.17067524790763855 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.2552987337112427, "learning_rate": 2.311464890732153e-05, "loss": 0.2877, "step": 13516, "teacher_loss": 0.29128482937812805 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.20469996333122253, "learning_rate": 2.3112738607759713e-05, "loss": 0.2357, "step": 13517, "teacher_loss": 0.2391444742679596 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.5051414966583252, "learning_rate": 2.311082812219713e-05, "loss": 0.2403, "step": 13518, "teacher_loss": 0.2108653485774994 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.8079785108566284, "learning_rate": 2.310891745067757e-05, "loss": 0.7205, "step": 13519, "teacher_loss": 0.710726261138916 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.6751030683517456, "learning_rate": 2.3107006593244848e-05, "loss": 0.3371, "step": 13520, "teacher_loss": 0.2995164394378662 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.17315822839736938, "learning_rate": 2.3105095549942768e-05, "loss": 0.23, "step": 13521, "teacher_loss": 0.23635897040367126 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.7756048440933228, "learning_rate": 2.310318432081515e-05, "loss": 0.5477, "step": 13522, "teacher_loss": 0.5224227905273438 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.8406456112861633, "learning_rate": 2.3101272905905808e-05, "loss": 0.2652, "step": 13523, "teacher_loss": 0.2012418508529663 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 1.1960383653640747, "learning_rate": 2.309936130525857e-05, "loss": 0.4351, "step": 13524, "teacher_loss": 0.3505184054374695 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.8032013177871704, "learning_rate": 2.3097449518917257e-05, "loss": 0.4553, "step": 13525, "teacher_loss": 0.4166238307952881 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.3685430884361267, "learning_rate": 2.3095537546925705e-05, "loss": 0.2547, "step": 13526, "teacher_loss": 0.24200661480426788 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.35262101888656616, "learning_rate": 2.309362538932775e-05, "loss": 0.299, "step": 13527, "teacher_loss": 0.29305779933929443 }, { "compression_loss": 0.0, "epoch": 2.44, "label_loss": 0.7022177577018738, "learning_rate": 2.309171304616723e-05, "loss": 0.2438, "step": 13528, "teacher_loss": 0.19287629425525665 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.3267899751663208, "learning_rate": 2.308980051748799e-05, "loss": 0.225, "step": 13529, "teacher_loss": 0.21364432573318481 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.6883553266525269, "learning_rate": 2.3087887803333878e-05, "loss": 0.302, "step": 13530, "teacher_loss": 0.2590651214122772 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.5634912252426147, "learning_rate": 2.3085974903748746e-05, "loss": 0.3152, "step": 13531, "teacher_loss": 0.2875780463218689 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.8322135210037231, "learning_rate": 2.3084061818776458e-05, "loss": 0.3014, "step": 13532, "teacher_loss": 0.2423807978630066 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.5890605449676514, "learning_rate": 2.3082148548460862e-05, "loss": 0.2577, "step": 13533, "teacher_loss": 0.22084945440292358 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.27247971296310425, "learning_rate": 2.3080235092845838e-05, "loss": 0.3106, "step": 13534, "teacher_loss": 0.31482094526290894 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.27078306674957275, "learning_rate": 2.307832145197525e-05, "loss": 0.1948, "step": 13535, "teacher_loss": 0.18635180592536926 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.53783118724823, "learning_rate": 2.3076407625892964e-05, "loss": 0.2465, "step": 13536, "teacher_loss": 0.21417482197284698 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.2757284939289093, "learning_rate": 2.307449361464287e-05, "loss": 0.1609, "step": 13537, "teacher_loss": 0.14813555777072906 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.5906493067741394, "learning_rate": 2.3072579418268843e-05, "loss": 0.2749, "step": 13538, "teacher_loss": 0.23986464738845825 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 1.0673515796661377, "learning_rate": 2.3070665036814776e-05, "loss": 0.5467, "step": 13539, "teacher_loss": 0.4888056218624115 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.24932169914245605, "learning_rate": 2.3068750470324556e-05, "loss": 0.1885, "step": 13540, "teacher_loss": 0.18175092339515686 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.7235797047615051, "learning_rate": 2.306683571884208e-05, "loss": 0.2743, "step": 13541, "teacher_loss": 0.2243853211402893 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.452308714389801, "learning_rate": 2.3064920782411243e-05, "loss": 0.2124, "step": 13542, "teacher_loss": 0.18578092753887177 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.07145362347364426, "learning_rate": 2.3063005661075957e-05, "loss": 0.1981, "step": 13543, "teacher_loss": 0.21221689879894257 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.5338555574417114, "learning_rate": 2.3061090354880125e-05, "loss": 0.2268, "step": 13544, "teacher_loss": 0.19263795018196106 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.11490755528211594, "learning_rate": 2.3059174863867656e-05, "loss": 0.1627, "step": 13545, "teacher_loss": 0.16803517937660217 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.623649001121521, "learning_rate": 2.3057259188082475e-05, "loss": 0.2679, "step": 13546, "teacher_loss": 0.2283647358417511 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.5715545415878296, "learning_rate": 2.305534332756849e-05, "loss": 0.3059, "step": 13547, "teacher_loss": 0.27636194229125977 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.4184514582157135, "learning_rate": 2.305342728236964e-05, "loss": 0.2936, "step": 13548, "teacher_loss": 0.27975472807884216 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.4545406699180603, "learning_rate": 2.3051511052529847e-05, "loss": 0.2879, "step": 13549, "teacher_loss": 0.2693905830383301 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.7193959951400757, "learning_rate": 2.3049594638093044e-05, "loss": 0.3071, "step": 13550, "teacher_loss": 0.2613123953342438 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.40149033069610596, "learning_rate": 2.304767803910317e-05, "loss": 0.1873, "step": 13551, "teacher_loss": 0.16345813870429993 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.8751025199890137, "learning_rate": 2.3045761255604168e-05, "loss": 0.4507, "step": 13552, "teacher_loss": 0.40358966588974 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.45521363615989685, "learning_rate": 2.304384428763998e-05, "loss": 0.3242, "step": 13553, "teacher_loss": 0.3096349835395813 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.39776796102523804, "learning_rate": 2.3041927135254564e-05, "loss": 0.3577, "step": 13554, "teacher_loss": 0.3532451391220093 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.2618064284324646, "learning_rate": 2.304000979849186e-05, "loss": 0.1873, "step": 13555, "teacher_loss": 0.17898762226104736 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.8584522008895874, "learning_rate": 2.3038092277395847e-05, "loss": 0.3402, "step": 13556, "teacher_loss": 0.2826330065727234 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.5318832397460938, "learning_rate": 2.3036174572010474e-05, "loss": 0.2413, "step": 13557, "teacher_loss": 0.20902976393699646 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.7420346736907959, "learning_rate": 2.303425668237971e-05, "loss": 0.3905, "step": 13558, "teacher_loss": 0.35139989852905273 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.20058172941207886, "learning_rate": 2.303233860854753e-05, "loss": 0.1864, "step": 13559, "teacher_loss": 0.18485969305038452 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.301930695772171, "learning_rate": 2.303042035055791e-05, "loss": 0.2167, "step": 13560, "teacher_loss": 0.20727437734603882 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.4468812942504883, "learning_rate": 2.3028501908454827e-05, "loss": 0.238, "step": 13561, "teacher_loss": 0.21475017070770264 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.9812082648277283, "learning_rate": 2.3026583282282262e-05, "loss": 0.311, "step": 13562, "teacher_loss": 0.2364833950996399 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.5296786427497864, "learning_rate": 2.302466447208421e-05, "loss": 0.2354, "step": 13563, "teacher_loss": 0.2026805579662323 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.4274221956729889, "learning_rate": 2.302274547790466e-05, "loss": 0.2771, "step": 13564, "teacher_loss": 0.26036199927330017 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.4614905118942261, "learning_rate": 2.302082629978761e-05, "loss": 0.3495, "step": 13565, "teacher_loss": 0.33708882331848145 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.16040746867656708, "learning_rate": 2.301890693777706e-05, "loss": 0.1789, "step": 13566, "teacher_loss": 0.18090671300888062 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.1445402354001999, "learning_rate": 2.3016987391917016e-05, "loss": 0.1956, "step": 13567, "teacher_loss": 0.20126408338546753 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.574134349822998, "learning_rate": 2.301506766225149e-05, "loss": 0.5372, "step": 13568, "teacher_loss": 0.5330567359924316 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.6888896822929382, "learning_rate": 2.301314774882449e-05, "loss": 0.3609, "step": 13569, "teacher_loss": 0.32447364926338196 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.5721328854560852, "learning_rate": 2.301122765168004e-05, "loss": 0.2373, "step": 13570, "teacher_loss": 0.20014673471450806 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.33922451734542847, "learning_rate": 2.3009307370862154e-05, "loss": 0.3426, "step": 13571, "teacher_loss": 0.34297117590904236 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.3162120282649994, "learning_rate": 2.3007386906414864e-05, "loss": 0.2089, "step": 13572, "teacher_loss": 0.1970038115978241 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.49213704466819763, "learning_rate": 2.3005466258382202e-05, "loss": 0.2659, "step": 13573, "teacher_loss": 0.2407526820898056 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.3871278166770935, "learning_rate": 2.3003545426808204e-05, "loss": 0.2548, "step": 13574, "teacher_loss": 0.24011310935020447 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 1.0373250246047974, "learning_rate": 2.3001624411736894e-05, "loss": 0.3503, "step": 13575, "teacher_loss": 0.27401435375213623 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.436976820230484, "learning_rate": 2.299970321321234e-05, "loss": 0.2298, "step": 13576, "teacher_loss": 0.20673570036888123 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.39382803440093994, "learning_rate": 2.2997781831278565e-05, "loss": 0.2198, "step": 13577, "teacher_loss": 0.20043224096298218 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.7106810808181763, "learning_rate": 2.2995860265979636e-05, "loss": 0.2714, "step": 13578, "teacher_loss": 0.2225722074508667 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.39781245589256287, "learning_rate": 2.2993938517359604e-05, "loss": 0.2693, "step": 13579, "teacher_loss": 0.25506889820098877 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.312117338180542, "learning_rate": 2.2992016585462522e-05, "loss": 0.2289, "step": 13580, "teacher_loss": 0.21964344382286072 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.24402910470962524, "learning_rate": 2.299009447033247e-05, "loss": 0.2791, "step": 13581, "teacher_loss": 0.28297752141952515 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.3927075266838074, "learning_rate": 2.2988172172013507e-05, "loss": 0.2684, "step": 13582, "teacher_loss": 0.2546292543411255 }, { "compression_loss": 0.0, "epoch": 2.45, "label_loss": 0.5710468292236328, "learning_rate": 2.29862496905497e-05, "loss": 0.2603, "step": 13583, "teacher_loss": 0.22572194039821625 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.43110892176628113, "learning_rate": 2.2984327025985138e-05, "loss": 0.2206, "step": 13584, "teacher_loss": 0.1971992701292038 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.5896751880645752, "learning_rate": 2.298240417836389e-05, "loss": 0.3364, "step": 13585, "teacher_loss": 0.3082408308982849 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.5967991352081299, "learning_rate": 2.298048114773005e-05, "loss": 0.3002, "step": 13586, "teacher_loss": 0.26725292205810547 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.24697265028953552, "learning_rate": 2.2978557934127704e-05, "loss": 0.2375, "step": 13587, "teacher_loss": 0.2364168018102646 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.41696450114250183, "learning_rate": 2.297663453760094e-05, "loss": 0.2509, "step": 13588, "teacher_loss": 0.23244819045066833 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 1.0541925430297852, "learning_rate": 2.297471095819387e-05, "loss": 0.4058, "step": 13589, "teacher_loss": 0.33377522230148315 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.24102690815925598, "learning_rate": 2.297278719595058e-05, "loss": 0.1738, "step": 13590, "teacher_loss": 0.1663818061351776 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.6702922582626343, "learning_rate": 2.2970863250915187e-05, "loss": 0.3997, "step": 13591, "teacher_loss": 0.3696141242980957 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.607141375541687, "learning_rate": 2.29689391231318e-05, "loss": 0.2109, "step": 13592, "teacher_loss": 0.16683481633663177 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.3341156840324402, "learning_rate": 2.2967014812644525e-05, "loss": 0.2032, "step": 13593, "teacher_loss": 0.18869765102863312 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.6203186511993408, "learning_rate": 2.296509031949749e-05, "loss": 0.2597, "step": 13594, "teacher_loss": 0.21967898309230804 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.33753299713134766, "learning_rate": 2.2963165643734813e-05, "loss": 0.2123, "step": 13595, "teacher_loss": 0.1983848512172699 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.26318567991256714, "learning_rate": 2.2961240785400623e-05, "loss": 0.1827, "step": 13596, "teacher_loss": 0.17380431294441223 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.3265713155269623, "learning_rate": 2.295931574453905e-05, "loss": 0.2955, "step": 13597, "teacher_loss": 0.2920163571834564 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.38764330744743347, "learning_rate": 2.2957390521194234e-05, "loss": 0.2875, "step": 13598, "teacher_loss": 0.2763361632823944 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.5720712542533875, "learning_rate": 2.295546511541031e-05, "loss": 0.335, "step": 13599, "teacher_loss": 0.3086398243904114 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.8484443426132202, "learning_rate": 2.2953539527231423e-05, "loss": 0.3601, "step": 13600, "teacher_loss": 0.30589479207992554 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.3951161205768585, "learning_rate": 2.2951613756701716e-05, "loss": 0.3482, "step": 13601, "teacher_loss": 0.34295085072517395 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.486197829246521, "learning_rate": 2.2949687803865346e-05, "loss": 0.3401, "step": 13602, "teacher_loss": 0.3238406777381897 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.4647293984889984, "learning_rate": 2.2947761668766472e-05, "loss": 0.2806, "step": 13603, "teacher_loss": 0.26011455059051514 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.4722694158554077, "learning_rate": 2.2945835351449252e-05, "loss": 0.3578, "step": 13604, "teacher_loss": 0.34504833817481995 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.18278971314430237, "learning_rate": 2.294390885195785e-05, "loss": 0.1776, "step": 13605, "teacher_loss": 0.1770172268152237 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.2531588673591614, "learning_rate": 2.2941982170336434e-05, "loss": 0.2413, "step": 13606, "teacher_loss": 0.23996543884277344 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.6120121479034424, "learning_rate": 2.2940055306629174e-05, "loss": 0.2723, "step": 13607, "teacher_loss": 0.23450899124145508 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.49934884905815125, "learning_rate": 2.2938128260880256e-05, "loss": 0.242, "step": 13608, "teacher_loss": 0.21343611180782318 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.5729543566703796, "learning_rate": 2.293620103313386e-05, "loss": 0.3002, "step": 13609, "teacher_loss": 0.2698464095592499 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.44234800338745117, "learning_rate": 2.2934273623434163e-05, "loss": 0.261, "step": 13610, "teacher_loss": 0.24080948531627655 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.30538347363471985, "learning_rate": 2.2932346031825362e-05, "loss": 0.4011, "step": 13611, "teacher_loss": 0.4117584824562073 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.4321520924568176, "learning_rate": 2.2930418258351648e-05, "loss": 0.263, "step": 13612, "teacher_loss": 0.24420154094696045 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.3798518478870392, "learning_rate": 2.292849030305722e-05, "loss": 0.2787, "step": 13613, "teacher_loss": 0.2674141526222229 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.8263616561889648, "learning_rate": 2.2926562165986285e-05, "loss": 0.3107, "step": 13614, "teacher_loss": 0.25343558192253113 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.4937240779399872, "learning_rate": 2.2924633847183042e-05, "loss": 0.2351, "step": 13615, "teacher_loss": 0.2063518464565277 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.3143256604671478, "learning_rate": 2.29227053466917e-05, "loss": 0.3111, "step": 13616, "teacher_loss": 0.31070607900619507 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.434296190738678, "learning_rate": 2.292077666455648e-05, "loss": 0.2511, "step": 13617, "teacher_loss": 0.2307668924331665 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.5667064189910889, "learning_rate": 2.29188478008216e-05, "loss": 0.2154, "step": 13618, "teacher_loss": 0.17634794116020203 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.5959628820419312, "learning_rate": 2.291691875553128e-05, "loss": 0.39, "step": 13619, "teacher_loss": 0.3671639859676361 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.49233677983283997, "learning_rate": 2.291498952872975e-05, "loss": 0.2882, "step": 13620, "teacher_loss": 0.26552340388298035 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.21298331022262573, "learning_rate": 2.2913060120461244e-05, "loss": 0.2252, "step": 13621, "teacher_loss": 0.22653484344482422 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.5891911387443542, "learning_rate": 2.2911130530769988e-05, "loss": 0.3977, "step": 13622, "teacher_loss": 0.3764520287513733 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.2632080018520355, "learning_rate": 2.290920075970023e-05, "loss": 0.2905, "step": 13623, "teacher_loss": 0.29351699352264404 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.28599345684051514, "learning_rate": 2.2907270807296214e-05, "loss": 0.146, "step": 13624, "teacher_loss": 0.13047613203525543 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.8331006765365601, "learning_rate": 2.2905340673602184e-05, "loss": 0.3826, "step": 13625, "teacher_loss": 0.3325856029987335 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.3796558380126953, "learning_rate": 2.2903410358662392e-05, "loss": 0.4013, "step": 13626, "teacher_loss": 0.40374141931533813 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.5125864744186401, "learning_rate": 2.2901479862521094e-05, "loss": 0.3082, "step": 13627, "teacher_loss": 0.285462886095047 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.42441636323928833, "learning_rate": 2.2899549185222562e-05, "loss": 0.2401, "step": 13628, "teacher_loss": 0.21959525346755981 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.5916315913200378, "learning_rate": 2.2897618326811042e-05, "loss": 0.2817, "step": 13629, "teacher_loss": 0.24720904231071472 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.10614937543869019, "learning_rate": 2.2895687287330817e-05, "loss": 0.1914, "step": 13630, "teacher_loss": 0.20090511441230774 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.6010932922363281, "learning_rate": 2.2893756066826153e-05, "loss": 0.251, "step": 13631, "teacher_loss": 0.21209104359149933 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.47895514965057373, "learning_rate": 2.2891824665341333e-05, "loss": 0.2046, "step": 13632, "teacher_loss": 0.17416900396347046 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.7816649675369263, "learning_rate": 2.288989308292063e-05, "loss": 0.3334, "step": 13633, "teacher_loss": 0.2835521996021271 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.6701295971870422, "learning_rate": 2.2887961319608335e-05, "loss": 0.3152, "step": 13634, "teacher_loss": 0.2758147716522217 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 1.0728983879089355, "learning_rate": 2.2886029375448733e-05, "loss": 0.6157, "step": 13635, "teacher_loss": 0.5649242997169495 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.3278043568134308, "learning_rate": 2.2884097250486127e-05, "loss": 0.255, "step": 13636, "teacher_loss": 0.2469637095928192 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.30848586559295654, "learning_rate": 2.2882164944764805e-05, "loss": 0.2144, "step": 13637, "teacher_loss": 0.20397846400737762 }, { "compression_loss": 0.0, "epoch": 2.46, "label_loss": 0.49018996953964233, "learning_rate": 2.2880232458329073e-05, "loss": 0.2669, "step": 13638, "teacher_loss": 0.24208678305149078 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.293617308139801, "learning_rate": 2.287829979122324e-05, "loss": 0.2031, "step": 13639, "teacher_loss": 0.19307485222816467 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.6620434522628784, "learning_rate": 2.2876366943491606e-05, "loss": 0.6101, "step": 13640, "teacher_loss": 0.604293942451477 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.3760091960430145, "learning_rate": 2.2874433915178502e-05, "loss": 0.2106, "step": 13641, "teacher_loss": 0.19220757484436035 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.3357684314250946, "learning_rate": 2.2872500706328234e-05, "loss": 0.2174, "step": 13642, "teacher_loss": 0.20422470569610596 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.7679245471954346, "learning_rate": 2.287056731698512e-05, "loss": 0.3028, "step": 13643, "teacher_loss": 0.25116145610809326 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.3222368657588959, "learning_rate": 2.2868633747193503e-05, "loss": 0.2017, "step": 13644, "teacher_loss": 0.18833978474140167 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.35985779762268066, "learning_rate": 2.28666999969977e-05, "loss": 0.3384, "step": 13645, "teacher_loss": 0.335971474647522 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.2317141890525818, "learning_rate": 2.2864766066442056e-05, "loss": 0.1853, "step": 13646, "teacher_loss": 0.18008866906166077 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.35469698905944824, "learning_rate": 2.2862831955570908e-05, "loss": 0.2761, "step": 13647, "teacher_loss": 0.2673995792865753 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.2339063584804535, "learning_rate": 2.286089766442859e-05, "loss": 0.2357, "step": 13648, "teacher_loss": 0.23589617013931274 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.3616529107093811, "learning_rate": 2.285896319305946e-05, "loss": 0.1652, "step": 13649, "teacher_loss": 0.14334672689437866 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.6805123686790466, "learning_rate": 2.2857028541507873e-05, "loss": 0.3, "step": 13650, "teacher_loss": 0.25769805908203125 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.5926121473312378, "learning_rate": 2.285509370981817e-05, "loss": 0.4163, "step": 13651, "teacher_loss": 0.3967229127883911 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.26732316613197327, "learning_rate": 2.285315869803472e-05, "loss": 0.2134, "step": 13652, "teacher_loss": 0.2074199914932251 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.23500683903694153, "learning_rate": 2.2851223506201887e-05, "loss": 0.1476, "step": 13653, "teacher_loss": 0.13788092136383057 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.35115116834640503, "learning_rate": 2.2849288134364036e-05, "loss": 0.2364, "step": 13654, "teacher_loss": 0.22362536191940308 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.46718618273735046, "learning_rate": 2.2847352582565547e-05, "loss": 0.2771, "step": 13655, "teacher_loss": 0.2560134828090668 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.316895455121994, "learning_rate": 2.2845416850850786e-05, "loss": 0.185, "step": 13656, "teacher_loss": 0.17029224336147308 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.46598106622695923, "learning_rate": 2.2843480939264143e-05, "loss": 0.2348, "step": 13657, "teacher_loss": 0.20910638570785522 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.6750378012657166, "learning_rate": 2.2841544847849994e-05, "loss": 0.4622, "step": 13658, "teacher_loss": 0.4385721683502197 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.647241473197937, "learning_rate": 2.2839608576652735e-05, "loss": 0.2646, "step": 13659, "teacher_loss": 0.22206813097000122 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.7658380270004272, "learning_rate": 2.2837672125716752e-05, "loss": 0.2504, "step": 13660, "teacher_loss": 0.1931057572364807 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.6259948015213013, "learning_rate": 2.2835735495086446e-05, "loss": 0.296, "step": 13661, "teacher_loss": 0.2593657076358795 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.1222652792930603, "learning_rate": 2.2833798684806222e-05, "loss": 0.1589, "step": 13662, "teacher_loss": 0.16296085715293884 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.25351467728614807, "learning_rate": 2.2831861694920478e-05, "loss": 0.3216, "step": 13663, "teacher_loss": 0.32919585704803467 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.3411373496055603, "learning_rate": 2.2829924525473628e-05, "loss": 0.2963, "step": 13664, "teacher_loss": 0.2913018763065338 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.27812865376472473, "learning_rate": 2.2827987176510082e-05, "loss": 0.1447, "step": 13665, "teacher_loss": 0.12988203763961792 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.4434473216533661, "learning_rate": 2.282604964807426e-05, "loss": 0.2986, "step": 13666, "teacher_loss": 0.28251171112060547 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 1.2014683485031128, "learning_rate": 2.2824111940210587e-05, "loss": 0.4971, "step": 13667, "teacher_loss": 0.4188101887702942 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.592219352722168, "learning_rate": 2.2822174052963478e-05, "loss": 0.2979, "step": 13668, "teacher_loss": 0.26520490646362305 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.8525668382644653, "learning_rate": 2.2820235986377376e-05, "loss": 0.3358, "step": 13669, "teacher_loss": 0.2783851623535156 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.6147552728652954, "learning_rate": 2.2818297740496704e-05, "loss": 0.212, "step": 13670, "teacher_loss": 0.1672194004058838 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.9210973978042603, "learning_rate": 2.281635931536591e-05, "loss": 0.4474, "step": 13671, "teacher_loss": 0.39478808641433716 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.5139021873474121, "learning_rate": 2.2814420711029432e-05, "loss": 0.266, "step": 13672, "teacher_loss": 0.23847809433937073 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 1.3612399101257324, "learning_rate": 2.281248192753171e-05, "loss": 0.309, "step": 13673, "teacher_loss": 0.1921277791261673 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.5665510296821594, "learning_rate": 2.2810542964917205e-05, "loss": 0.2636, "step": 13674, "teacher_loss": 0.22989961504936218 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.27369365096092224, "learning_rate": 2.2808603823230368e-05, "loss": 0.2091, "step": 13675, "teacher_loss": 0.20193517208099365 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.2124057561159134, "learning_rate": 2.2806664502515657e-05, "loss": 0.1843, "step": 13676, "teacher_loss": 0.1811448484659195 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.9935253858566284, "learning_rate": 2.280472500281753e-05, "loss": 0.3078, "step": 13677, "teacher_loss": 0.23161442577838898 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.5461657643318176, "learning_rate": 2.2802785324180458e-05, "loss": 0.3221, "step": 13678, "teacher_loss": 0.2972163259983063 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.4159039556980133, "learning_rate": 2.280084546664892e-05, "loss": 0.2603, "step": 13679, "teacher_loss": 0.2430488020181656 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.4179351329803467, "learning_rate": 2.279890543026738e-05, "loss": 0.2169, "step": 13680, "teacher_loss": 0.1945488452911377 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.4101651906967163, "learning_rate": 2.2796965215080317e-05, "loss": 0.2323, "step": 13681, "teacher_loss": 0.21253517270088196 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.3306651711463928, "learning_rate": 2.2795024821132225e-05, "loss": 0.1986, "step": 13682, "teacher_loss": 0.1839318573474884 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.2958616316318512, "learning_rate": 2.2793084248467577e-05, "loss": 0.2224, "step": 13683, "teacher_loss": 0.21426919102668762 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.20312482118606567, "learning_rate": 2.2791143497130878e-05, "loss": 0.2683, "step": 13684, "teacher_loss": 0.2755442261695862 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.3776360750198364, "learning_rate": 2.2789202567166615e-05, "loss": 0.328, "step": 13685, "teacher_loss": 0.3225070536136627 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.38910961151123047, "learning_rate": 2.2787261458619292e-05, "loss": 0.2786, "step": 13686, "teacher_loss": 0.2662811875343323 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.39175790548324585, "learning_rate": 2.278532017153341e-05, "loss": 0.2521, "step": 13687, "teacher_loss": 0.23654311895370483 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.8858953714370728, "learning_rate": 2.2783378705953477e-05, "loss": 0.2982, "step": 13688, "teacher_loss": 0.2328588217496872 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.5204565525054932, "learning_rate": 2.278143706192401e-05, "loss": 0.3268, "step": 13689, "teacher_loss": 0.30532315373420715 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.831377387046814, "learning_rate": 2.2779495239489513e-05, "loss": 0.3376, "step": 13690, "teacher_loss": 0.2827402353286743 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.5325830578804016, "learning_rate": 2.277755323869452e-05, "loss": 0.2401, "step": 13691, "teacher_loss": 0.20755130052566528 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 1.0558143854141235, "learning_rate": 2.277561105958355e-05, "loss": 0.4765, "step": 13692, "teacher_loss": 0.41213589906692505 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 1.0601139068603516, "learning_rate": 2.277366870220113e-05, "loss": 0.902, "step": 13693, "teacher_loss": 0.8844602108001709 }, { "compression_loss": 0.0, "epoch": 2.47, "label_loss": 0.25625261664390564, "learning_rate": 2.2771726166591796e-05, "loss": 0.2883, "step": 13694, "teacher_loss": 0.2918834388256073 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.5181944966316223, "learning_rate": 2.2769783452800073e-05, "loss": 0.2375, "step": 13695, "teacher_loss": 0.20632264018058777 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.3610687851905823, "learning_rate": 2.276784056087052e-05, "loss": 0.2379, "step": 13696, "teacher_loss": 0.22426950931549072 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.582071840763092, "learning_rate": 2.2765897490847668e-05, "loss": 0.3164, "step": 13697, "teacher_loss": 0.28687548637390137 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.18225809931755066, "learning_rate": 2.2763954242776067e-05, "loss": 0.1943, "step": 13698, "teacher_loss": 0.19561699032783508 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.16500063240528107, "learning_rate": 2.276201081670028e-05, "loss": 0.1392, "step": 13699, "teacher_loss": 0.13629081845283508 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.24366965889930725, "learning_rate": 2.2760067212664852e-05, "loss": 0.1877, "step": 13700, "teacher_loss": 0.18144258856773376 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.32557782530784607, "learning_rate": 2.2758123430714347e-05, "loss": 0.2748, "step": 13701, "teacher_loss": 0.26913610100746155 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.370863139629364, "learning_rate": 2.275617947089334e-05, "loss": 0.2356, "step": 13702, "teacher_loss": 0.220546692609787 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.35808438062667847, "learning_rate": 2.2754235333246382e-05, "loss": 0.2261, "step": 13703, "teacher_loss": 0.2114272117614746 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.8877078294754028, "learning_rate": 2.2752291017818064e-05, "loss": 0.3735, "step": 13704, "teacher_loss": 0.316368043422699 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.3893854022026062, "learning_rate": 2.2750346524652953e-05, "loss": 0.2087, "step": 13705, "teacher_loss": 0.1886221468448639 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.5065631866455078, "learning_rate": 2.2748401853795628e-05, "loss": 0.2736, "step": 13706, "teacher_loss": 0.247752383351326 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 1.1485528945922852, "learning_rate": 2.274645700529069e-05, "loss": 1.0053, "step": 13707, "teacher_loss": 0.9893661141395569 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.6708950996398926, "learning_rate": 2.274451197918271e-05, "loss": 0.2983, "step": 13708, "teacher_loss": 0.2568681836128235 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.47390109300613403, "learning_rate": 2.2742566775516287e-05, "loss": 0.3766, "step": 13709, "teacher_loss": 0.36576753854751587 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.24836969375610352, "learning_rate": 2.2740621394336027e-05, "loss": 0.1655, "step": 13710, "teacher_loss": 0.15625017881393433 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.5187473297119141, "learning_rate": 2.2738675835686522e-05, "loss": 0.5512, "step": 13711, "teacher_loss": 0.5548242926597595 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.3925529420375824, "learning_rate": 2.2736730099612387e-05, "loss": 0.2497, "step": 13712, "teacher_loss": 0.2338358759880066 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.27833569049835205, "learning_rate": 2.2734784186158225e-05, "loss": 0.196, "step": 13713, "teacher_loss": 0.18688370287418365 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.45011603832244873, "learning_rate": 2.273283809536865e-05, "loss": 0.2082, "step": 13714, "teacher_loss": 0.18135929107666016 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.3631151616573334, "learning_rate": 2.273089182728828e-05, "loss": 0.2369, "step": 13715, "teacher_loss": 0.22282442450523376 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.37081897258758545, "learning_rate": 2.2728945381961744e-05, "loss": 0.2882, "step": 13716, "teacher_loss": 0.2789686918258667 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.2927539646625519, "learning_rate": 2.272699875943366e-05, "loss": 0.3029, "step": 13717, "teacher_loss": 0.3040761649608612 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.22381678223609924, "learning_rate": 2.2725051959748658e-05, "loss": 0.1623, "step": 13718, "teacher_loss": 0.15544864535331726 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.5651026964187622, "learning_rate": 2.272310498295138e-05, "loss": 0.2576, "step": 13719, "teacher_loss": 0.2234533429145813 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.35448452830314636, "learning_rate": 2.2721157829086458e-05, "loss": 0.3098, "step": 13720, "teacher_loss": 0.30482715368270874 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.4623692035675049, "learning_rate": 2.2719210498198537e-05, "loss": 0.2901, "step": 13721, "teacher_loss": 0.27094146609306335 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.4959922730922699, "learning_rate": 2.2717262990332266e-05, "loss": 0.278, "step": 13722, "teacher_loss": 0.2537287473678589 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.22338610887527466, "learning_rate": 2.2715315305532282e-05, "loss": 0.2336, "step": 13723, "teacher_loss": 0.23469537496566772 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.4517146646976471, "learning_rate": 2.2713367443843263e-05, "loss": 0.2339, "step": 13724, "teacher_loss": 0.20966269075870514 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.44919121265411377, "learning_rate": 2.2711419405309845e-05, "loss": 0.2228, "step": 13725, "teacher_loss": 0.1976395845413208 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.32712122797966003, "learning_rate": 2.2709471189976704e-05, "loss": 0.1793, "step": 13726, "teacher_loss": 0.16283798217773438 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.5903319716453552, "learning_rate": 2.27075227978885e-05, "loss": 0.3131, "step": 13727, "teacher_loss": 0.28224337100982666 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.4159919321537018, "learning_rate": 2.270557422908991e-05, "loss": 0.205, "step": 13728, "teacher_loss": 0.18154002726078033 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.2592383921146393, "learning_rate": 2.2703625483625603e-05, "loss": 0.2099, "step": 13729, "teacher_loss": 0.20439793169498444 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.574203372001648, "learning_rate": 2.2701676561540263e-05, "loss": 0.4423, "step": 13730, "teacher_loss": 0.4276687502861023 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.6093292236328125, "learning_rate": 2.2699727462878565e-05, "loss": 0.4333, "step": 13731, "teacher_loss": 0.4137030839920044 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.9656829833984375, "learning_rate": 2.269777818768521e-05, "loss": 0.2966, "step": 13732, "teacher_loss": 0.2222839891910553 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.6082431077957153, "learning_rate": 2.269582873600487e-05, "loss": 0.3037, "step": 13733, "teacher_loss": 0.26983505487442017 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.5657515525817871, "learning_rate": 2.2693879107882255e-05, "loss": 0.303, "step": 13734, "teacher_loss": 0.2737973928451538 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.662318229675293, "learning_rate": 2.2691929303362063e-05, "loss": 0.6768, "step": 13735, "teacher_loss": 0.6783580780029297 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.25294923782348633, "learning_rate": 2.2689979322488988e-05, "loss": 0.1719, "step": 13736, "teacher_loss": 0.16286274790763855 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.3036515414714813, "learning_rate": 2.268802916530775e-05, "loss": 0.2048, "step": 13737, "teacher_loss": 0.193766251206398 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.23262326419353485, "learning_rate": 2.2686078831863044e-05, "loss": 0.3249, "step": 13738, "teacher_loss": 0.335180401802063 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.48552048206329346, "learning_rate": 2.26841283221996e-05, "loss": 0.3244, "step": 13739, "teacher_loss": 0.30652815103530884 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.48697346448898315, "learning_rate": 2.268217763636213e-05, "loss": 0.2754, "step": 13740, "teacher_loss": 0.2519321143627167 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.30147188901901245, "learning_rate": 2.2680226774395357e-05, "loss": 0.1623, "step": 13741, "teacher_loss": 0.14687126874923706 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.20066815614700317, "learning_rate": 2.2678275736344014e-05, "loss": 0.1912, "step": 13742, "teacher_loss": 0.19010785222053528 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.3933402895927429, "learning_rate": 2.267632452225283e-05, "loss": 0.2541, "step": 13743, "teacher_loss": 0.23868155479431152 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.21283546090126038, "learning_rate": 2.267437313216654e-05, "loss": 0.2109, "step": 13744, "teacher_loss": 0.21064089238643646 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.2045329213142395, "learning_rate": 2.267242156612988e-05, "loss": 0.2289, "step": 13745, "teacher_loss": 0.23158694803714752 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.30674394965171814, "learning_rate": 2.2670469824187596e-05, "loss": 0.2202, "step": 13746, "teacher_loss": 0.2105536162853241 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.6440914273262024, "learning_rate": 2.266851790638444e-05, "loss": 0.2709, "step": 13747, "teacher_loss": 0.22947090864181519 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.46461862325668335, "learning_rate": 2.2666565812765157e-05, "loss": 0.2194, "step": 13748, "teacher_loss": 0.19215229153633118 }, { "compression_loss": 0.0, "epoch": 2.48, "label_loss": 0.3110160827636719, "learning_rate": 2.2664613543374507e-05, "loss": 0.2044, "step": 13749, "teacher_loss": 0.19259901344776154 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.18404620885849, "learning_rate": 2.2662661098257247e-05, "loss": 0.1634, "step": 13750, "teacher_loss": 0.1611512005329132 }, { "epoch": 2.49, "eval_exact_match": 79.85808893093662, "eval_f1": 87.14025084262454, "step": 13750 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.353465735912323, "learning_rate": 2.2660708477458144e-05, "loss": 0.1459, "step": 13751, "teacher_loss": 0.12278446555137634 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.37910670042037964, "learning_rate": 2.265875568102196e-05, "loss": 0.2143, "step": 13752, "teacher_loss": 0.19596660137176514 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.3110623359680176, "learning_rate": 2.265680270899347e-05, "loss": 0.2479, "step": 13753, "teacher_loss": 0.24086806178092957 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.2679804265499115, "learning_rate": 2.2654849561417452e-05, "loss": 0.1729, "step": 13754, "teacher_loss": 0.16235005855560303 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.370853066444397, "learning_rate": 2.2652896238338687e-05, "loss": 0.2217, "step": 13755, "teacher_loss": 0.2051302194595337 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5036153197288513, "learning_rate": 2.2650942739801953e-05, "loss": 0.2392, "step": 13756, "teacher_loss": 0.20976772904396057 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.13447241485118866, "learning_rate": 2.264898906585204e-05, "loss": 0.1902, "step": 13757, "teacher_loss": 0.1964164674282074 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.26214712858200073, "learning_rate": 2.2647035216533742e-05, "loss": 0.1683, "step": 13758, "teacher_loss": 0.15786007046699524 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.20998218655586243, "learning_rate": 2.264508119189185e-05, "loss": 0.2367, "step": 13759, "teacher_loss": 0.23972123861312866 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.9091475009918213, "learning_rate": 2.2643126991971172e-05, "loss": 0.3433, "step": 13760, "teacher_loss": 0.28047046065330505 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5114991664886475, "learning_rate": 2.2641172616816507e-05, "loss": 0.2387, "step": 13761, "teacher_loss": 0.2084435373544693 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.610339343547821, "learning_rate": 2.263921806647266e-05, "loss": 0.3082, "step": 13762, "teacher_loss": 0.2746211886405945 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.8649090528488159, "learning_rate": 2.2637263340984446e-05, "loss": 0.2771, "step": 13763, "teacher_loss": 0.21173523366451263 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5670951008796692, "learning_rate": 2.2635308440396687e-05, "loss": 0.3814, "step": 13764, "teacher_loss": 0.36081233620643616 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.4336152672767639, "learning_rate": 2.2633353364754194e-05, "loss": 0.2036, "step": 13765, "teacher_loss": 0.17801907658576965 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.34038621187210083, "learning_rate": 2.2631398114101792e-05, "loss": 0.2442, "step": 13766, "teacher_loss": 0.23353464901447296 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.462745726108551, "learning_rate": 2.2629442688484316e-05, "loss": 0.2407, "step": 13767, "teacher_loss": 0.21602681279182434 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5976167917251587, "learning_rate": 2.262748708794659e-05, "loss": 0.3463, "step": 13768, "teacher_loss": 0.31833136081695557 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.34929579496383667, "learning_rate": 2.2625531312533457e-05, "loss": 0.2755, "step": 13769, "teacher_loss": 0.2672916054725647 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.23493260145187378, "learning_rate": 2.262357536228975e-05, "loss": 0.2202, "step": 13770, "teacher_loss": 0.21853691339492798 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5107652544975281, "learning_rate": 2.2621619237260318e-05, "loss": 0.2521, "step": 13771, "teacher_loss": 0.22330498695373535 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 1.034281611442566, "learning_rate": 2.2619662937490004e-05, "loss": 0.3058, "step": 13772, "teacher_loss": 0.22491145133972168 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.6619790196418762, "learning_rate": 2.2617706463023668e-05, "loss": 0.3018, "step": 13773, "teacher_loss": 0.2617333233356476 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.33394694328308105, "learning_rate": 2.2615749813906158e-05, "loss": 0.2634, "step": 13774, "teacher_loss": 0.25554752349853516 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.42445796728134155, "learning_rate": 2.261379299018234e-05, "loss": 0.1992, "step": 13775, "teacher_loss": 0.17422622442245483 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.49240079522132874, "learning_rate": 2.261183599189708e-05, "loss": 0.4841, "step": 13776, "teacher_loss": 0.48315268754959106 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.8021292686462402, "learning_rate": 2.2609878819095237e-05, "loss": 0.3014, "step": 13777, "teacher_loss": 0.24581149220466614 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5289261341094971, "learning_rate": 2.2607921471821697e-05, "loss": 0.2824, "step": 13778, "teacher_loss": 0.254984587430954 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5143373012542725, "learning_rate": 2.2605963950121315e-05, "loss": 0.2222, "step": 13779, "teacher_loss": 0.18969523906707764 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.6002171635627747, "learning_rate": 2.260400625403899e-05, "loss": 0.3024, "step": 13780, "teacher_loss": 0.26932841539382935 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.9642322063446045, "learning_rate": 2.26020483836196e-05, "loss": 0.2991, "step": 13781, "teacher_loss": 0.22516149282455444 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.4277884364128113, "learning_rate": 2.260009033890803e-05, "loss": 0.2518, "step": 13782, "teacher_loss": 0.2322588562965393 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.3205012381076813, "learning_rate": 2.2598132119949176e-05, "loss": 0.2112, "step": 13783, "teacher_loss": 0.19909392297267914 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.34331250190734863, "learning_rate": 2.259617372678793e-05, "loss": 0.2927, "step": 13784, "teacher_loss": 0.2870316207408905 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.542759358882904, "learning_rate": 2.2594215159469205e-05, "loss": 0.2207, "step": 13785, "teacher_loss": 0.18489298224449158 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.9033209085464478, "learning_rate": 2.259225641803788e-05, "loss": 0.4198, "step": 13786, "teacher_loss": 0.36607107520103455 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.3481941223144531, "learning_rate": 2.2590297502538896e-05, "loss": 0.2074, "step": 13787, "teacher_loss": 0.19178421795368195 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5806370973587036, "learning_rate": 2.2588338413017133e-05, "loss": 0.2218, "step": 13788, "teacher_loss": 0.18187814950942993 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 1.3168507814407349, "learning_rate": 2.2586379149517528e-05, "loss": 0.5132, "step": 13789, "teacher_loss": 0.4238593876361847 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.18170928955078125, "learning_rate": 2.2584419712084992e-05, "loss": 0.2235, "step": 13790, "teacher_loss": 0.2281537652015686 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.3284096419811249, "learning_rate": 2.2582460100764445e-05, "loss": 0.2623, "step": 13791, "teacher_loss": 0.2549816966056824 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5958716869354248, "learning_rate": 2.2580500315600832e-05, "loss": 0.3446, "step": 13792, "teacher_loss": 0.3167067766189575 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.34571486711502075, "learning_rate": 2.257854035663907e-05, "loss": 0.2342, "step": 13793, "teacher_loss": 0.2218383550643921 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.7008187174797058, "learning_rate": 2.25765802239241e-05, "loss": 0.36, "step": 13794, "teacher_loss": 0.3221738934516907 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.32773566246032715, "learning_rate": 2.2574619917500858e-05, "loss": 0.2589, "step": 13795, "teacher_loss": 0.25120246410369873 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5814257264137268, "learning_rate": 2.2572659437414295e-05, "loss": 0.2812, "step": 13796, "teacher_loss": 0.24784106016159058 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.44662514328956604, "learning_rate": 2.2570698783709355e-05, "loss": 0.2275, "step": 13797, "teacher_loss": 0.20317766070365906 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.7091720700263977, "learning_rate": 2.2568737956430987e-05, "loss": 0.278, "step": 13798, "teacher_loss": 0.23007085919380188 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.5034940242767334, "learning_rate": 2.256677695562415e-05, "loss": 0.3261, "step": 13799, "teacher_loss": 0.30634692311286926 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.14518913626670837, "learning_rate": 2.2564815781333805e-05, "loss": 0.155, "step": 13800, "teacher_loss": 0.15605610609054565 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.302400678396225, "learning_rate": 2.2562854433604915e-05, "loss": 0.2485, "step": 13801, "teacher_loss": 0.2425551414489746 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.26147955656051636, "learning_rate": 2.2560892912482445e-05, "loss": 0.2623, "step": 13802, "teacher_loss": 0.26239651441574097 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.3270567059516907, "learning_rate": 2.2558931218011376e-05, "loss": 0.2503, "step": 13803, "teacher_loss": 0.2417515218257904 }, { "compression_loss": 0.0, "epoch": 2.49, "label_loss": 0.31223657727241516, "learning_rate": 2.2556969350236668e-05, "loss": 0.2259, "step": 13804, "teacher_loss": 0.21632379293441772 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.16385126113891602, "learning_rate": 2.255500730920332e-05, "loss": 0.1852, "step": 13805, "teacher_loss": 0.18751777708530426 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.9464585781097412, "learning_rate": 2.25530450949563e-05, "loss": 0.3604, "step": 13806, "teacher_loss": 0.29525020718574524 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.5212752819061279, "learning_rate": 2.25510827075406e-05, "loss": 0.2523, "step": 13807, "teacher_loss": 0.22245824337005615 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.36930155754089355, "learning_rate": 2.254912014700121e-05, "loss": 0.2137, "step": 13808, "teacher_loss": 0.19640091061592102 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.7803667783737183, "learning_rate": 2.254715741338313e-05, "loss": 0.421, "step": 13809, "teacher_loss": 0.38107338547706604 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.5447787642478943, "learning_rate": 2.2545194506731365e-05, "loss": 0.3626, "step": 13810, "teacher_loss": 0.3423329293727875 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.17876482009887695, "learning_rate": 2.2543231427090908e-05, "loss": 0.2377, "step": 13811, "teacher_loss": 0.2442304939031601 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.32919400930404663, "learning_rate": 2.2541268174506768e-05, "loss": 0.2341, "step": 13812, "teacher_loss": 0.22353880107402802 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.5688897967338562, "learning_rate": 2.2539304749023958e-05, "loss": 0.4087, "step": 13813, "teacher_loss": 0.39087629318237305 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.6939910650253296, "learning_rate": 2.2537341150687497e-05, "loss": 0.2913, "step": 13814, "teacher_loss": 0.2465360164642334 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.44366300106048584, "learning_rate": 2.25353773795424e-05, "loss": 0.3027, "step": 13815, "teacher_loss": 0.2869938015937805 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.9140196442604065, "learning_rate": 2.2533413435633692e-05, "loss": 0.3869, "step": 13816, "teacher_loss": 0.3283763527870178 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.40716859698295593, "learning_rate": 2.25314493190064e-05, "loss": 0.2227, "step": 13817, "teacher_loss": 0.20223474502563477 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.27802783250808716, "learning_rate": 2.2529485029705558e-05, "loss": 0.2661, "step": 13818, "teacher_loss": 0.26473915576934814 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.5900223255157471, "learning_rate": 2.2527520567776195e-05, "loss": 0.2364, "step": 13819, "teacher_loss": 0.19709137082099915 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.40474605560302734, "learning_rate": 2.2525555933263354e-05, "loss": 0.2599, "step": 13820, "teacher_loss": 0.24381205439567566 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.433512806892395, "learning_rate": 2.2523591126212084e-05, "loss": 0.2123, "step": 13821, "teacher_loss": 0.1876736283302307 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.40282338857650757, "learning_rate": 2.2521626146667423e-05, "loss": 0.285, "step": 13822, "teacher_loss": 0.2719587981700897 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.3457735478878021, "learning_rate": 2.2519660994674423e-05, "loss": 0.297, "step": 13823, "teacher_loss": 0.2915331721305847 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 1.319657802581787, "learning_rate": 2.2517695670278143e-05, "loss": 0.3618, "step": 13824, "teacher_loss": 0.2554050385951996 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.857620120048523, "learning_rate": 2.2515730173523645e-05, "loss": 0.5446, "step": 13825, "teacher_loss": 0.5097886919975281 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.3763587474822998, "learning_rate": 2.251376450445598e-05, "loss": 0.2912, "step": 13826, "teacher_loss": 0.28168806433677673 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.39884382486343384, "learning_rate": 2.251179866312023e-05, "loss": 0.2826, "step": 13827, "teacher_loss": 0.2696320712566376 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.3768623173236847, "learning_rate": 2.2509832649561453e-05, "loss": 0.2883, "step": 13828, "teacher_loss": 0.2784094512462616 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.22470203042030334, "learning_rate": 2.2507866463824727e-05, "loss": 0.2636, "step": 13829, "teacher_loss": 0.26796603202819824 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.21355083584785461, "learning_rate": 2.250590010595514e-05, "loss": 0.2247, "step": 13830, "teacher_loss": 0.22596681118011475 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.7543628215789795, "learning_rate": 2.2503933575997763e-05, "loss": 0.4384, "step": 13831, "teacher_loss": 0.403306245803833 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.3063744306564331, "learning_rate": 2.2501966873997685e-05, "loss": 0.2187, "step": 13832, "teacher_loss": 0.20897531509399414 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.7782771587371826, "learning_rate": 2.25e-05, "loss": 0.4179, "step": 13833, "teacher_loss": 0.37787577509880066 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.5411162972450256, "learning_rate": 2.24980329540498e-05, "loss": 0.5695, "step": 13834, "teacher_loss": 0.5727081298828125 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.4747687578201294, "learning_rate": 2.249606573619219e-05, "loss": 0.2324, "step": 13835, "teacher_loss": 0.2054956555366516 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.33572036027908325, "learning_rate": 2.2494098346472264e-05, "loss": 0.3275, "step": 13836, "teacher_loss": 0.32656246423721313 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.2985009253025055, "learning_rate": 2.249213078493513e-05, "loss": 0.161, "step": 13837, "teacher_loss": 0.14569967985153198 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.7003904581069946, "learning_rate": 2.2490163051625898e-05, "loss": 0.2573, "step": 13838, "teacher_loss": 0.20812034606933594 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.27782052755355835, "learning_rate": 2.248819514658969e-05, "loss": 0.1816, "step": 13839, "teacher_loss": 0.1709585040807724 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.3467024564743042, "learning_rate": 2.2486227069871614e-05, "loss": 0.3182, "step": 13840, "teacher_loss": 0.3149855434894562 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.4810560345649719, "learning_rate": 2.24842588215168e-05, "loss": 0.2754, "step": 13841, "teacher_loss": 0.25256556272506714 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.627578854560852, "learning_rate": 2.2482290401570368e-05, "loss": 0.2671, "step": 13842, "teacher_loss": 0.22709615528583527 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.2952554225921631, "learning_rate": 2.2480321810077445e-05, "loss": 0.2107, "step": 13843, "teacher_loss": 0.20135092735290527 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.6124990582466125, "learning_rate": 2.2478353047083176e-05, "loss": 0.4082, "step": 13844, "teacher_loss": 0.3854805529117584 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.5393595695495605, "learning_rate": 2.2476384112632692e-05, "loss": 0.3305, "step": 13845, "teacher_loss": 0.3072936534881592 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.3037719130516052, "learning_rate": 2.247441500677113e-05, "loss": 0.1813, "step": 13846, "teacher_loss": 0.16771341860294342 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.5305928587913513, "learning_rate": 2.247244572954365e-05, "loss": 0.364, "step": 13847, "teacher_loss": 0.34546053409576416 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.667974591255188, "learning_rate": 2.247047628099539e-05, "loss": 0.3568, "step": 13848, "teacher_loss": 0.3222300410270691 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.34892773628234863, "learning_rate": 2.246850666117151e-05, "loss": 0.2988, "step": 13849, "teacher_loss": 0.29325512051582336 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.1705968976020813, "learning_rate": 2.2466536870117157e-05, "loss": 0.2039, "step": 13850, "teacher_loss": 0.20757699012756348 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.506560742855072, "learning_rate": 2.24645669078775e-05, "loss": 0.5226, "step": 13851, "teacher_loss": 0.5243268013000488 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.969771146774292, "learning_rate": 2.2462596774497707e-05, "loss": 0.5286, "step": 13852, "teacher_loss": 0.4795331358909607 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.2480800300836563, "learning_rate": 2.2460626470022944e-05, "loss": 0.2472, "step": 13853, "teacher_loss": 0.24715490639209747 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.615435004234314, "learning_rate": 2.245865599449838e-05, "loss": 0.3108, "step": 13854, "teacher_loss": 0.27692246437072754 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.3770451247692108, "learning_rate": 2.2456685347969206e-05, "loss": 0.2258, "step": 13855, "teacher_loss": 0.20894969999790192 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.4612610936164856, "learning_rate": 2.2454714530480582e-05, "loss": 0.3029, "step": 13856, "teacher_loss": 0.28530219197273254 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.4208654761314392, "learning_rate": 2.2452743542077712e-05, "loss": 0.283, "step": 13857, "teacher_loss": 0.26767972111701965 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.23390749096870422, "learning_rate": 2.245077238280577e-05, "loss": 0.1964, "step": 13858, "teacher_loss": 0.19224053621292114 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.11860658973455429, "learning_rate": 2.244880105270996e-05, "loss": 0.157, "step": 13859, "teacher_loss": 0.16124776005744934 }, { "compression_loss": 0.0, "epoch": 2.5, "label_loss": 0.44247114658355713, "learning_rate": 2.2446829551835476e-05, "loss": 0.2532, "step": 13860, "teacher_loss": 0.232208251953125 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.3132500648498535, "learning_rate": 2.2444857880227516e-05, "loss": 0.2303, "step": 13861, "teacher_loss": 0.2211143523454666 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.7542303800582886, "learning_rate": 2.2442886037931284e-05, "loss": 0.2713, "step": 13862, "teacher_loss": 0.2176954448223114 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.2016574889421463, "learning_rate": 2.2440914024991994e-05, "loss": 0.2009, "step": 13863, "teacher_loss": 0.20086276531219482 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.5363192558288574, "learning_rate": 2.2438941841454847e-05, "loss": 0.3049, "step": 13864, "teacher_loss": 0.27919143438339233 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.5406144261360168, "learning_rate": 2.2436969487365073e-05, "loss": 0.313, "step": 13865, "teacher_loss": 0.2877134084701538 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.419107586145401, "learning_rate": 2.2434996962767884e-05, "loss": 0.2735, "step": 13866, "teacher_loss": 0.25729072093963623 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.3836519122123718, "learning_rate": 2.2433024267708506e-05, "loss": 0.2202, "step": 13867, "teacher_loss": 0.2020922601222992 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.38623061776161194, "learning_rate": 2.2431051402232164e-05, "loss": 0.2425, "step": 13868, "teacher_loss": 0.22649943828582764 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.6129484176635742, "learning_rate": 2.2429078366384096e-05, "loss": 0.2391, "step": 13869, "teacher_loss": 0.19754809141159058 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.6627760529518127, "learning_rate": 2.2427105160209534e-05, "loss": 0.4407, "step": 13870, "teacher_loss": 0.41598284244537354 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.4372401535511017, "learning_rate": 2.2425131783753723e-05, "loss": 0.2835, "step": 13871, "teacher_loss": 0.26644349098205566 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.2921396493911743, "learning_rate": 2.24231582370619e-05, "loss": 0.22, "step": 13872, "teacher_loss": 0.21194377541542053 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.4336320757865906, "learning_rate": 2.242118452017931e-05, "loss": 0.278, "step": 13873, "teacher_loss": 0.2607576251029968 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.339330792427063, "learning_rate": 2.241921063315121e-05, "loss": 0.1971, "step": 13874, "teacher_loss": 0.18126636743545532 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.24718916416168213, "learning_rate": 2.2417236576022856e-05, "loss": 0.4253, "step": 13875, "teacher_loss": 0.44512125849723816 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.3884221315383911, "learning_rate": 2.2415262348839503e-05, "loss": 0.3467, "step": 13876, "teacher_loss": 0.3421027958393097 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.813828706741333, "learning_rate": 2.2413287951646418e-05, "loss": 0.3237, "step": 13877, "teacher_loss": 0.26923543214797974 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.8708653450012207, "learning_rate": 2.2411313384488864e-05, "loss": 0.3716, "step": 13878, "teacher_loss": 0.3160707354545593 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.4535222351551056, "learning_rate": 2.2409338647412116e-05, "loss": 0.2606, "step": 13879, "teacher_loss": 0.23920580744743347 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.4159276485443115, "learning_rate": 2.2407363740461448e-05, "loss": 0.1893, "step": 13880, "teacher_loss": 0.16413141787052155 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.26848524808883667, "learning_rate": 2.2405388663682137e-05, "loss": 0.1855, "step": 13881, "teacher_loss": 0.17623446881771088 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.702314555644989, "learning_rate": 2.240341341711947e-05, "loss": 0.2934, "step": 13882, "teacher_loss": 0.2480011284351349 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.6371479034423828, "learning_rate": 2.2401438000818724e-05, "loss": 0.3352, "step": 13883, "teacher_loss": 0.3015964925289154 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.5277643203735352, "learning_rate": 2.2399462414825195e-05, "loss": 0.3026, "step": 13884, "teacher_loss": 0.2776230573654175 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.3296002447605133, "learning_rate": 2.2397486659184186e-05, "loss": 0.2327, "step": 13885, "teacher_loss": 0.2219845950603485 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.238526850938797, "learning_rate": 2.239551073394098e-05, "loss": 0.2698, "step": 13886, "teacher_loss": 0.2733253240585327 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.6469662189483643, "learning_rate": 2.239353463914089e-05, "loss": 0.3707, "step": 13887, "teacher_loss": 0.3399810194969177 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.5941764116287231, "learning_rate": 2.2391558374829212e-05, "loss": 0.3402, "step": 13888, "teacher_loss": 0.31203338503837585 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.37006253004074097, "learning_rate": 2.2389581941051264e-05, "loss": 0.2107, "step": 13889, "teacher_loss": 0.19299374520778656 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.4980618357658386, "learning_rate": 2.2387605337852358e-05, "loss": 0.3494, "step": 13890, "teacher_loss": 0.3328579068183899 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.28581681847572327, "learning_rate": 2.2385628565277813e-05, "loss": 0.2599, "step": 13891, "teacher_loss": 0.2570514678955078 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.9599497318267822, "learning_rate": 2.2383651623372945e-05, "loss": 0.2801, "step": 13892, "teacher_loss": 0.20454248785972595 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.2410169094800949, "learning_rate": 2.2381674512183085e-05, "loss": 0.2174, "step": 13893, "teacher_loss": 0.21477477252483368 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.5100368857383728, "learning_rate": 2.2379697231753554e-05, "loss": 0.3211, "step": 13894, "teacher_loss": 0.30011799931526184 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.3362150192260742, "learning_rate": 2.2377719782129698e-05, "loss": 0.2298, "step": 13895, "teacher_loss": 0.21792559325695038 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.46865421533584595, "learning_rate": 2.2375742163356844e-05, "loss": 0.2404, "step": 13896, "teacher_loss": 0.21506252884864807 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.6836999654769897, "learning_rate": 2.2373764375480335e-05, "loss": 0.261, "step": 13897, "teacher_loss": 0.21405695378780365 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.29324865341186523, "learning_rate": 2.2371786418545515e-05, "loss": 0.268, "step": 13898, "teacher_loss": 0.26519161462783813 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.2970011830329895, "learning_rate": 2.236980829259774e-05, "loss": 0.297, "step": 13899, "teacher_loss": 0.29696834087371826 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.2556993365287781, "learning_rate": 2.2367829997682353e-05, "loss": 0.2167, "step": 13900, "teacher_loss": 0.21231606602668762 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.3662340044975281, "learning_rate": 2.2365851533844715e-05, "loss": 0.2837, "step": 13901, "teacher_loss": 0.27453556656837463 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.22476446628570557, "learning_rate": 2.236387290113018e-05, "loss": 0.2005, "step": 13902, "teacher_loss": 0.197752445936203 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.5312143564224243, "learning_rate": 2.2361894099584126e-05, "loss": 0.297, "step": 13903, "teacher_loss": 0.27095258235931396 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.48935821652412415, "learning_rate": 2.2359915129251905e-05, "loss": 0.3174, "step": 13904, "teacher_loss": 0.29834309220314026 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.5989590287208557, "learning_rate": 2.2357935990178904e-05, "loss": 0.2785, "step": 13905, "teacher_loss": 0.2429085671901703 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.3596482276916504, "learning_rate": 2.2355956682410485e-05, "loss": 0.2428, "step": 13906, "teacher_loss": 0.2297956645488739 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.3609699308872223, "learning_rate": 2.2353977205992036e-05, "loss": 0.232, "step": 13907, "teacher_loss": 0.21771734952926636 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.6307642459869385, "learning_rate": 2.2351997560968935e-05, "loss": 0.4019, "step": 13908, "teacher_loss": 0.376476526260376 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.689670979976654, "learning_rate": 2.2350017747386573e-05, "loss": 0.2795, "step": 13909, "teacher_loss": 0.23390254378318787 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.6576633453369141, "learning_rate": 2.2348037765290346e-05, "loss": 0.3032, "step": 13910, "teacher_loss": 0.26383835077285767 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.6056357622146606, "learning_rate": 2.234605761472564e-05, "loss": 0.2608, "step": 13911, "teacher_loss": 0.2225216180086136 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.7158815264701843, "learning_rate": 2.2344077295737856e-05, "loss": 0.383, "step": 13912, "teacher_loss": 0.345986008644104 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.18104025721549988, "learning_rate": 2.2342096808372398e-05, "loss": 0.2508, "step": 13913, "teacher_loss": 0.25856393575668335 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.3073023855686188, "learning_rate": 2.2340116152674677e-05, "loss": 0.2601, "step": 13914, "teacher_loss": 0.25490254163742065 }, { "compression_loss": 0.0, "epoch": 2.51, "label_loss": 0.17231950163841248, "learning_rate": 2.23381353286901e-05, "loss": 0.1967, "step": 13915, "teacher_loss": 0.19945842027664185 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.5917526483535767, "learning_rate": 2.2336154336464074e-05, "loss": 0.2995, "step": 13916, "teacher_loss": 0.26708242297172546 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.6367902755737305, "learning_rate": 2.233417317604203e-05, "loss": 0.3458, "step": 13917, "teacher_loss": 0.3134750425815582 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.48613840341567993, "learning_rate": 2.2332191847469384e-05, "loss": 0.2938, "step": 13918, "teacher_loss": 0.2724721133708954 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.7414456605911255, "learning_rate": 2.2330210350791555e-05, "loss": 0.3253, "step": 13919, "teacher_loss": 0.2790381908416748 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.5379503965377808, "learning_rate": 2.2328228686053987e-05, "loss": 0.2255, "step": 13920, "teacher_loss": 0.19075697660446167 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.44399380683898926, "learning_rate": 2.23262468533021e-05, "loss": 0.2911, "step": 13921, "teacher_loss": 0.2741544246673584 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.540751576423645, "learning_rate": 2.2324264852581346e-05, "loss": 0.266, "step": 13922, "teacher_loss": 0.2355162352323532 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.2988436222076416, "learning_rate": 2.2322282683937155e-05, "loss": 0.2501, "step": 13923, "teacher_loss": 0.2446373552083969 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4422075152397156, "learning_rate": 2.232030034741497e-05, "loss": 0.6024, "step": 13924, "teacher_loss": 0.6202346086502075 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.38594767451286316, "learning_rate": 2.2318317843060254e-05, "loss": 0.1837, "step": 13925, "teacher_loss": 0.1612045168876648 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.47010648250579834, "learning_rate": 2.2316335170918446e-05, "loss": 0.3128, "step": 13926, "teacher_loss": 0.29532408714294434 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4728595018386841, "learning_rate": 2.2314352331035007e-05, "loss": 0.2786, "step": 13927, "teacher_loss": 0.257002592086792 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4968397617340088, "learning_rate": 2.23123693234554e-05, "loss": 0.3283, "step": 13928, "teacher_loss": 0.3095853328704834 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4015643298625946, "learning_rate": 2.231038614822509e-05, "loss": 0.2228, "step": 13929, "teacher_loss": 0.20291663706302643 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4670286774635315, "learning_rate": 2.2308402805389545e-05, "loss": 0.2843, "step": 13930, "teacher_loss": 0.2640414237976074 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.40011703968048096, "learning_rate": 2.230641929499423e-05, "loss": 0.2087, "step": 13931, "teacher_loss": 0.18744783103466034 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.5586017966270447, "learning_rate": 2.230443561708463e-05, "loss": 0.3682, "step": 13932, "teacher_loss": 0.34705692529678345 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.5576722621917725, "learning_rate": 2.230245177170622e-05, "loss": 0.2571, "step": 13933, "teacher_loss": 0.2236793488264084 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4924560487270355, "learning_rate": 2.2300467758904486e-05, "loss": 0.3365, "step": 13934, "teacher_loss": 0.3192119002342224 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.47274962067604065, "learning_rate": 2.2298483578724914e-05, "loss": 0.2099, "step": 13935, "teacher_loss": 0.18071559071540833 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4030134081840515, "learning_rate": 2.229649923121299e-05, "loss": 0.3484, "step": 13936, "teacher_loss": 0.3423698842525482 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.283250629901886, "learning_rate": 2.229451471641422e-05, "loss": 0.2776, "step": 13937, "teacher_loss": 0.27701130509376526 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.5338020324707031, "learning_rate": 2.2292530034374097e-05, "loss": 0.2752, "step": 13938, "teacher_loss": 0.24645012617111206 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.8010367155075073, "learning_rate": 2.2290545185138125e-05, "loss": 0.2999, "step": 13939, "teacher_loss": 0.24420180916786194 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.6379849910736084, "learning_rate": 2.2288560168751812e-05, "loss": 0.2999, "step": 13940, "teacher_loss": 0.26234519481658936 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.6040122509002686, "learning_rate": 2.228657498526066e-05, "loss": 0.2929, "step": 13941, "teacher_loss": 0.2582801878452301 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.2518012523651123, "learning_rate": 2.2284589634710197e-05, "loss": 0.3108, "step": 13942, "teacher_loss": 0.31741055846214294 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4290907680988312, "learning_rate": 2.2282604117145934e-05, "loss": 0.2254, "step": 13943, "teacher_loss": 0.2027406394481659 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.2751498818397522, "learning_rate": 2.2280618432613385e-05, "loss": 0.2062, "step": 13944, "teacher_loss": 0.19855996966362 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.6176972985267639, "learning_rate": 2.2278632581158095e-05, "loss": 0.2512, "step": 13945, "teacher_loss": 0.21047306060791016 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4580097794532776, "learning_rate": 2.2276646562825572e-05, "loss": 0.3946, "step": 13946, "teacher_loss": 0.387581467628479 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.2912094295024872, "learning_rate": 2.227466037766136e-05, "loss": 0.2476, "step": 13947, "teacher_loss": 0.24274703860282898 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.6111304759979248, "learning_rate": 2.2272674025711004e-05, "loss": 0.326, "step": 13948, "teacher_loss": 0.2942809760570526 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.19553899765014648, "learning_rate": 2.227068750702003e-05, "loss": 0.2301, "step": 13949, "teacher_loss": 0.23393535614013672 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.5711072683334351, "learning_rate": 2.2268700821633996e-05, "loss": 0.2438, "step": 13950, "teacher_loss": 0.207398921251297 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.26664450764656067, "learning_rate": 2.2266713969598442e-05, "loss": 0.1735, "step": 13951, "teacher_loss": 0.16310890018939972 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.5667017102241516, "learning_rate": 2.226472695095892e-05, "loss": 0.3738, "step": 13952, "teacher_loss": 0.3523963391780853 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.601837158203125, "learning_rate": 2.2262739765761e-05, "loss": 0.4228, "step": 13953, "teacher_loss": 0.40289610624313354 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 1.0174680948257446, "learning_rate": 2.226075241405022e-05, "loss": 0.3578, "step": 13954, "teacher_loss": 0.28445762395858765 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.324079304933548, "learning_rate": 2.225876489587216e-05, "loss": 0.1981, "step": 13955, "teacher_loss": 0.18404968082904816 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.38662248849868774, "learning_rate": 2.225677721127239e-05, "loss": 0.3806, "step": 13956, "teacher_loss": 0.37989723682403564 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.5130556225776672, "learning_rate": 2.2254789360296468e-05, "loss": 0.2628, "step": 13957, "teacher_loss": 0.2349393665790558 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.2537984549999237, "learning_rate": 2.225280134298998e-05, "loss": 0.1561, "step": 13958, "teacher_loss": 0.1452627182006836 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.2435506284236908, "learning_rate": 2.2250813159398505e-05, "loss": 0.1902, "step": 13959, "teacher_loss": 0.18429747223854065 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.5710318088531494, "learning_rate": 2.2248824809567618e-05, "loss": 0.27, "step": 13960, "teacher_loss": 0.23659634590148926 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.48236602544784546, "learning_rate": 2.224683629354291e-05, "loss": 0.2298, "step": 13961, "teacher_loss": 0.20179128646850586 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.5113157629966736, "learning_rate": 2.224484761136998e-05, "loss": 0.2497, "step": 13962, "teacher_loss": 0.22058531641960144 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.11343131959438324, "learning_rate": 2.224285876309441e-05, "loss": 0.254, "step": 13963, "teacher_loss": 0.2695688307285309 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.33425742387771606, "learning_rate": 2.224086974876181e-05, "loss": 0.1897, "step": 13964, "teacher_loss": 0.17364037036895752 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4697314500808716, "learning_rate": 2.223888056841777e-05, "loss": 0.3836, "step": 13965, "teacher_loss": 0.37405329942703247 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.1945856213569641, "learning_rate": 2.22368912221079e-05, "loss": 0.184, "step": 13966, "teacher_loss": 0.18284344673156738 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.21014806628227234, "learning_rate": 2.2234901709877814e-05, "loss": 0.2664, "step": 13967, "teacher_loss": 0.2726455628871918 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 1.305837631225586, "learning_rate": 2.223291203177313e-05, "loss": 0.385, "step": 13968, "teacher_loss": 0.2826395034790039 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.2691567540168762, "learning_rate": 2.2230922187839447e-05, "loss": 0.1736, "step": 13969, "teacher_loss": 0.16298556327819824 }, { "compression_loss": 0.0, "epoch": 2.52, "label_loss": 0.4690394997596741, "learning_rate": 2.2228932178122407e-05, "loss": 0.1872, "step": 13970, "teacher_loss": 0.1558523178100586 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.2960183024406433, "learning_rate": 2.2226942002667622e-05, "loss": 0.375, "step": 13971, "teacher_loss": 0.38380488753318787 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.4008580446243286, "learning_rate": 2.2224951661520718e-05, "loss": 0.3499, "step": 13972, "teacher_loss": 0.3442028760910034 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.6065565347671509, "learning_rate": 2.2222961154727346e-05, "loss": 0.2717, "step": 13973, "teacher_loss": 0.23452654480934143 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.4173336327075958, "learning_rate": 2.222097048233312e-05, "loss": 0.2276, "step": 13974, "teacher_loss": 0.2064652442932129 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.6351914405822754, "learning_rate": 2.22189796443837e-05, "loss": 0.4771, "step": 13975, "teacher_loss": 0.4594815969467163 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.8871963620185852, "learning_rate": 2.2216988640924716e-05, "loss": 0.3788, "step": 13976, "teacher_loss": 0.3223353624343872 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 1.298387050628662, "learning_rate": 2.2214997472001814e-05, "loss": 0.8011, "step": 13977, "teacher_loss": 0.7458642721176147 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.23911911249160767, "learning_rate": 2.221300613766066e-05, "loss": 0.1841, "step": 13978, "teacher_loss": 0.1779319941997528 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.8469879627227783, "learning_rate": 2.2211014637946896e-05, "loss": 0.5976, "step": 13979, "teacher_loss": 0.5698357820510864 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.7309197187423706, "learning_rate": 2.220902297290619e-05, "loss": 0.2763, "step": 13980, "teacher_loss": 0.22574323415756226 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.4994797110557556, "learning_rate": 2.22070311425842e-05, "loss": 0.2166, "step": 13981, "teacher_loss": 0.18515023589134216 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.7803881764411926, "learning_rate": 2.220503914702659e-05, "loss": 0.4206, "step": 13982, "teacher_loss": 0.3806071877479553 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.8564822673797607, "learning_rate": 2.2203046986279038e-05, "loss": 0.3427, "step": 13983, "teacher_loss": 0.28565141558647156 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.37768858671188354, "learning_rate": 2.2201054660387212e-05, "loss": 0.2751, "step": 13984, "teacher_loss": 0.2637355327606201 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.5590472221374512, "learning_rate": 2.2199062169396796e-05, "loss": 0.2392, "step": 13985, "teacher_loss": 0.20362427830696106 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.19404137134552002, "learning_rate": 2.219706951335347e-05, "loss": 0.1982, "step": 13986, "teacher_loss": 0.19869671761989594 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.2864931523799896, "learning_rate": 2.219507669230291e-05, "loss": 0.2871, "step": 13987, "teacher_loss": 0.287209689617157 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.3811852037906647, "learning_rate": 2.219308370629082e-05, "loss": 0.3106, "step": 13988, "teacher_loss": 0.30280041694641113 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.400515615940094, "learning_rate": 2.2191090555362882e-05, "loss": 0.2075, "step": 13989, "teacher_loss": 0.18609312176704407 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.2266104519367218, "learning_rate": 2.2189097239564803e-05, "loss": 0.2045, "step": 13990, "teacher_loss": 0.20202240347862244 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.24703636765480042, "learning_rate": 2.2187103758942274e-05, "loss": 0.2319, "step": 13991, "teacher_loss": 0.23016972839832306 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.5101150274276733, "learning_rate": 2.2185110113541005e-05, "loss": 0.2699, "step": 13992, "teacher_loss": 0.24318143725395203 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.3280387818813324, "learning_rate": 2.2183116303406705e-05, "loss": 0.28, "step": 13993, "teacher_loss": 0.2746083438396454 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.5447311401367188, "learning_rate": 2.2181122328585077e-05, "loss": 0.3099, "step": 13994, "teacher_loss": 0.2838304042816162 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.18166238069534302, "learning_rate": 2.2179128189121853e-05, "loss": 0.2451, "step": 13995, "teacher_loss": 0.2521204650402069 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.6368261575698853, "learning_rate": 2.217713388506274e-05, "loss": 0.2513, "step": 13996, "teacher_loss": 0.20843097567558289 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.5650144219398499, "learning_rate": 2.2175139416453466e-05, "loss": 0.2872, "step": 13997, "teacher_loss": 0.25634029507637024 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.4835836589336395, "learning_rate": 2.2173144783339756e-05, "loss": 0.2544, "step": 13998, "teacher_loss": 0.228973388671875 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.5226331353187561, "learning_rate": 2.217114998576734e-05, "loss": 0.2471, "step": 13999, "teacher_loss": 0.21647889912128448 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.43752187490463257, "learning_rate": 2.2169155023781962e-05, "loss": 0.2285, "step": 14000, "teacher_loss": 0.2052355259656906 }, { "epoch": 2.53, "eval_exact_match": 79.5837275307474, "eval_f1": 87.03383772269521, "step": 14000 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.5957362055778503, "learning_rate": 2.2167159897429352e-05, "loss": 0.3026, "step": 14001, "teacher_loss": 0.27007582783699036 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.8391679525375366, "learning_rate": 2.2165164606755247e-05, "loss": 0.5381, "step": 14002, "teacher_loss": 0.5046356916427612 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.33081912994384766, "learning_rate": 2.216316915180541e-05, "loss": 0.346, "step": 14003, "teacher_loss": 0.3477044105529785 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.3575651943683624, "learning_rate": 2.2161173532625573e-05, "loss": 0.1968, "step": 14004, "teacher_loss": 0.1789817214012146 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.910348117351532, "learning_rate": 2.21591777492615e-05, "loss": 0.5704, "step": 14005, "teacher_loss": 0.5325928926467896 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.48551708459854126, "learning_rate": 2.2157181801758945e-05, "loss": 0.3222, "step": 14006, "teacher_loss": 0.3040800094604492 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.5723710656166077, "learning_rate": 2.215518569016367e-05, "loss": 0.3423, "step": 14007, "teacher_loss": 0.31678885221481323 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.43259888887405396, "learning_rate": 2.215318941452144e-05, "loss": 0.2864, "step": 14008, "teacher_loss": 0.27018433809280396 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.28980234265327454, "learning_rate": 2.215119297487802e-05, "loss": 0.2184, "step": 14009, "teacher_loss": 0.21044719219207764 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.6581799983978271, "learning_rate": 2.214919637127919e-05, "loss": 0.3287, "step": 14010, "teacher_loss": 0.29211726784706116 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.35981911420822144, "learning_rate": 2.2147199603770717e-05, "loss": 0.3267, "step": 14011, "teacher_loss": 0.3230147659778595 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.5385850667953491, "learning_rate": 2.2145202672398387e-05, "loss": 0.3265, "step": 14012, "teacher_loss": 0.3028862476348877 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.29459571838378906, "learning_rate": 2.2143205577207985e-05, "loss": 0.1835, "step": 14013, "teacher_loss": 0.17117883265018463 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.34881746768951416, "learning_rate": 2.2141208318245294e-05, "loss": 0.2401, "step": 14014, "teacher_loss": 0.22796830534934998 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.5071390867233276, "learning_rate": 2.2139210895556104e-05, "loss": 0.3275, "step": 14015, "teacher_loss": 0.30757445096969604 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.6363747715950012, "learning_rate": 2.2137213309186214e-05, "loss": 0.3166, "step": 14016, "teacher_loss": 0.281048983335495 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.17972978949546814, "learning_rate": 2.2135215559181423e-05, "loss": 0.2294, "step": 14017, "teacher_loss": 0.23494967818260193 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.45542657375335693, "learning_rate": 2.213321764558753e-05, "loss": 0.2368, "step": 14018, "teacher_loss": 0.2124527394771576 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.3774620592594147, "learning_rate": 2.2131219568450344e-05, "loss": 0.248, "step": 14019, "teacher_loss": 0.2336605191230774 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.11900528520345688, "learning_rate": 2.2129221327815675e-05, "loss": 0.2273, "step": 14020, "teacher_loss": 0.23933851718902588 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.43570637702941895, "learning_rate": 2.2127222923729332e-05, "loss": 0.2738, "step": 14021, "teacher_loss": 0.255815327167511 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.5605031847953796, "learning_rate": 2.2125224356237136e-05, "loss": 0.256, "step": 14022, "teacher_loss": 0.22221572697162628 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.7855397462844849, "learning_rate": 2.2123225625384913e-05, "loss": 0.2777, "step": 14023, "teacher_loss": 0.22123906016349792 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.23428291082382202, "learning_rate": 2.212122673121848e-05, "loss": 0.1426, "step": 14024, "teacher_loss": 0.13236159086227417 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.1735738217830658, "learning_rate": 2.2119227673783668e-05, "loss": 0.2417, "step": 14025, "teacher_loss": 0.24928683042526245 }, { "compression_loss": 0.0, "epoch": 2.53, "label_loss": 0.1873970329761505, "learning_rate": 2.2117228453126308e-05, "loss": 0.292, "step": 14026, "teacher_loss": 0.3035784363746643 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.627112627029419, "learning_rate": 2.2115229069292246e-05, "loss": 0.3128, "step": 14027, "teacher_loss": 0.2778727412223816 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.3382111191749573, "learning_rate": 2.211322952232731e-05, "loss": 0.2839, "step": 14028, "teacher_loss": 0.2779051661491394 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.410192608833313, "learning_rate": 2.211122981227734e-05, "loss": 0.2619, "step": 14029, "teacher_loss": 0.2454051971435547 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.30241554975509644, "learning_rate": 2.2109229939188202e-05, "loss": 0.2389, "step": 14030, "teacher_loss": 0.23184853792190552 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.3824981451034546, "learning_rate": 2.210722990310573e-05, "loss": 0.3287, "step": 14031, "teacher_loss": 0.32273900508880615 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.7765858173370361, "learning_rate": 2.210522970407578e-05, "loss": 0.3054, "step": 14032, "teacher_loss": 0.2530953586101532 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.3652813732624054, "learning_rate": 2.2103229342144225e-05, "loss": 0.2904, "step": 14033, "teacher_loss": 0.2821311950683594 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.5748870372772217, "learning_rate": 2.2101228817356912e-05, "loss": 0.2505, "step": 14034, "teacher_loss": 0.2144603133201599 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.6449252367019653, "learning_rate": 2.2099228129759718e-05, "loss": 0.2449, "step": 14035, "teacher_loss": 0.2004542201757431 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.3554195761680603, "learning_rate": 2.2097227279398506e-05, "loss": 0.223, "step": 14036, "teacher_loss": 0.20832259953022003 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.39301687479019165, "learning_rate": 2.2095226266319145e-05, "loss": 0.2595, "step": 14037, "teacher_loss": 0.24470297992229462 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.4760388135910034, "learning_rate": 2.209322509056753e-05, "loss": 0.2285, "step": 14038, "teacher_loss": 0.20096711814403534 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.5733082890510559, "learning_rate": 2.2091223752189522e-05, "loss": 0.2264, "step": 14039, "teacher_loss": 0.1878947913646698 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.15415360033512115, "learning_rate": 2.2089222251231014e-05, "loss": 0.1193, "step": 14040, "teacher_loss": 0.11538200080394745 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.2853025794029236, "learning_rate": 2.2087220587737896e-05, "loss": 0.2228, "step": 14041, "teacher_loss": 0.21588841080665588 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.4536241292953491, "learning_rate": 2.2085218761756058e-05, "loss": 0.3283, "step": 14042, "teacher_loss": 0.3143380880355835 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.6075706481933594, "learning_rate": 2.2083216773331394e-05, "loss": 0.2907, "step": 14043, "teacher_loss": 0.2555469870567322 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.3903805613517761, "learning_rate": 2.208121462250981e-05, "loss": 0.2476, "step": 14044, "teacher_loss": 0.23174268007278442 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.48584216833114624, "learning_rate": 2.2079212309337205e-05, "loss": 0.2621, "step": 14045, "teacher_loss": 0.23720073699951172 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.31547147035598755, "learning_rate": 2.207720983385948e-05, "loss": 0.1865, "step": 14046, "teacher_loss": 0.1721748411655426 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.523248553276062, "learning_rate": 2.207520719612256e-05, "loss": 0.3568, "step": 14047, "teacher_loss": 0.3383296728134155 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.4042520225048065, "learning_rate": 2.207320439617235e-05, "loss": 0.3066, "step": 14048, "teacher_loss": 0.2956960201263428 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.5169178247451782, "learning_rate": 2.2071201434054772e-05, "loss": 0.2454, "step": 14049, "teacher_loss": 0.21523939073085785 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.17877496778964996, "learning_rate": 2.2069198309815738e-05, "loss": 0.1766, "step": 14050, "teacher_loss": 0.17639514803886414 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.5083367824554443, "learning_rate": 2.2067195023501187e-05, "loss": 0.2518, "step": 14051, "teacher_loss": 0.22329753637313843 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.7437264919281006, "learning_rate": 2.206519157515704e-05, "loss": 0.3912, "step": 14052, "teacher_loss": 0.35206368565559387 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.31929442286491394, "learning_rate": 2.2063187964829237e-05, "loss": 0.2075, "step": 14053, "teacher_loss": 0.1950480192899704 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.3612954318523407, "learning_rate": 2.2061184192563702e-05, "loss": 0.2389, "step": 14054, "teacher_loss": 0.22525353729724884 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.3310979902744293, "learning_rate": 2.2059180258406394e-05, "loss": 0.2471, "step": 14055, "teacher_loss": 0.23775094747543335 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.246351420879364, "learning_rate": 2.205717616240324e-05, "loss": 0.2109, "step": 14056, "teacher_loss": 0.20690733194351196 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.3076486587524414, "learning_rate": 2.2055171904600202e-05, "loss": 0.186, "step": 14057, "teacher_loss": 0.17251771688461304 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.29886895418167114, "learning_rate": 2.205316748504322e-05, "loss": 0.3678, "step": 14058, "teacher_loss": 0.37546294927597046 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.21419644355773926, "learning_rate": 2.2051162903778252e-05, "loss": 0.2041, "step": 14059, "teacher_loss": 0.20295082032680511 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.9435220956802368, "learning_rate": 2.2049158160851263e-05, "loss": 0.3585, "step": 14060, "teacher_loss": 0.2934580147266388 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.4388246536254883, "learning_rate": 2.2047153256308212e-05, "loss": 0.2681, "step": 14061, "teacher_loss": 0.2491452395915985 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.808463454246521, "learning_rate": 2.204514819019506e-05, "loss": 0.2297, "step": 14062, "teacher_loss": 0.1653864085674286 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.26344990730285645, "learning_rate": 2.204314296255779e-05, "loss": 0.1913, "step": 14063, "teacher_loss": 0.18326006829738617 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.12900444865226746, "learning_rate": 2.204113757344236e-05, "loss": 0.1973, "step": 14064, "teacher_loss": 0.20484323799610138 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.5820817351341248, "learning_rate": 2.2039132022894763e-05, "loss": 0.2763, "step": 14065, "teacher_loss": 0.24229255318641663 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.2785002887248993, "learning_rate": 2.203712631096097e-05, "loss": 0.1749, "step": 14066, "teacher_loss": 0.16340306401252747 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.6004549264907837, "learning_rate": 2.203512043768697e-05, "loss": 0.286, "step": 14067, "teacher_loss": 0.25104573369026184 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.726807713508606, "learning_rate": 2.2033114403118754e-05, "loss": 0.4151, "step": 14068, "teacher_loss": 0.38051921129226685 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.8822823762893677, "learning_rate": 2.2031108207302303e-05, "loss": 0.4227, "step": 14069, "teacher_loss": 0.37167200446128845 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.30861830711364746, "learning_rate": 2.2029101850283628e-05, "loss": 0.208, "step": 14070, "teacher_loss": 0.19686290621757507 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.5329614281654358, "learning_rate": 2.202709533210872e-05, "loss": 0.4612, "step": 14071, "teacher_loss": 0.4532531201839447 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.347231924533844, "learning_rate": 2.202508865282358e-05, "loss": 0.1809, "step": 14072, "teacher_loss": 0.1624601185321808 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.681158185005188, "learning_rate": 2.2023081812474224e-05, "loss": 0.2956, "step": 14073, "teacher_loss": 0.2527763247489929 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.38196295499801636, "learning_rate": 2.202107481110666e-05, "loss": 0.2956, "step": 14074, "teacher_loss": 0.285977303981781 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.5220724940299988, "learning_rate": 2.2019067648766895e-05, "loss": 0.2158, "step": 14075, "teacher_loss": 0.18174313008785248 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.4009571373462677, "learning_rate": 2.201706032550096e-05, "loss": 0.2927, "step": 14076, "teacher_loss": 0.2806204855442047 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.20406538248062134, "learning_rate": 2.2015052841354866e-05, "loss": 0.1668, "step": 14077, "teacher_loss": 0.16261330246925354 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.507172703742981, "learning_rate": 2.2013045196374645e-05, "loss": 0.2537, "step": 14078, "teacher_loss": 0.22549480199813843 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.44694024324417114, "learning_rate": 2.201103739060632e-05, "loss": 0.2156, "step": 14079, "teacher_loss": 0.18989154696464539 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 0.7450734376907349, "learning_rate": 2.200902942409593e-05, "loss": 0.6223, "step": 14080, "teacher_loss": 0.608710527420044 }, { "compression_loss": 0.0, "epoch": 2.54, "label_loss": 1.007805585861206, "learning_rate": 2.200702129688951e-05, "loss": 0.3065, "step": 14081, "teacher_loss": 0.22858496010303497 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.3384752571582794, "learning_rate": 2.20050130090331e-05, "loss": 0.2325, "step": 14082, "teacher_loss": 0.22076211869716644 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.5357757806777954, "learning_rate": 2.200300456057274e-05, "loss": 0.4807, "step": 14083, "teacher_loss": 0.47454819083213806 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.23482458293437958, "learning_rate": 2.2000995951554487e-05, "loss": 0.1895, "step": 14084, "teacher_loss": 0.18445265293121338 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.24944043159484863, "learning_rate": 2.1998987182024384e-05, "loss": 0.2395, "step": 14085, "teacher_loss": 0.23838230967521667 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.2411850094795227, "learning_rate": 2.199697825202849e-05, "loss": 0.3374, "step": 14086, "teacher_loss": 0.3481428325176239 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.23642152547836304, "learning_rate": 2.199496916161286e-05, "loss": 0.2187, "step": 14087, "teacher_loss": 0.21671488881111145 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.35776764154434204, "learning_rate": 2.1992959910823563e-05, "loss": 0.2284, "step": 14088, "teacher_loss": 0.21400777995586395 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.4272218346595764, "learning_rate": 2.199095049970666e-05, "loss": 0.229, "step": 14089, "teacher_loss": 0.20693504810333252 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.36526215076446533, "learning_rate": 2.1988940928308223e-05, "loss": 0.2763, "step": 14090, "teacher_loss": 0.26637548208236694 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.4052009582519531, "learning_rate": 2.198693119667432e-05, "loss": 0.2017, "step": 14091, "teacher_loss": 0.17905010282993317 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.38812676072120667, "learning_rate": 2.1984921304851035e-05, "loss": 0.275, "step": 14092, "teacher_loss": 0.26238229870796204 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.9409998655319214, "learning_rate": 2.198291125288445e-05, "loss": 0.3418, "step": 14093, "teacher_loss": 0.2752014398574829 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.2680700421333313, "learning_rate": 2.198090104082064e-05, "loss": 0.1783, "step": 14094, "teacher_loss": 0.1683673858642578 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.4754878282546997, "learning_rate": 2.19788906687057e-05, "loss": 0.3228, "step": 14095, "teacher_loss": 0.30585891008377075 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.4327659010887146, "learning_rate": 2.1976880136585725e-05, "loss": 0.415, "step": 14096, "teacher_loss": 0.41297274827957153 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.44516468048095703, "learning_rate": 2.19748694445068e-05, "loss": 0.223, "step": 14097, "teacher_loss": 0.19832664728164673 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.31076425313949585, "learning_rate": 2.1972858592515037e-05, "loss": 0.2559, "step": 14098, "teacher_loss": 0.2497912347316742 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.6550500392913818, "learning_rate": 2.197084758065653e-05, "loss": 0.5141, "step": 14099, "teacher_loss": 0.49847814440727234 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.5302172899246216, "learning_rate": 2.1968836408977384e-05, "loss": 0.2613, "step": 14100, "teacher_loss": 0.2314019650220871 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.5367562770843506, "learning_rate": 2.1966825077523718e-05, "loss": 0.3119, "step": 14101, "teacher_loss": 0.28687891364097595 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.34987181425094604, "learning_rate": 2.1964813586341636e-05, "loss": 0.3052, "step": 14102, "teacher_loss": 0.30019691586494446 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.305778443813324, "learning_rate": 2.1962801935477263e-05, "loss": 0.1896, "step": 14103, "teacher_loss": 0.17672264575958252 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.6921484470367432, "learning_rate": 2.196079012497672e-05, "loss": 0.2512, "step": 14104, "teacher_loss": 0.20222973823547363 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.28185778856277466, "learning_rate": 2.195877815488612e-05, "loss": 0.2756, "step": 14105, "teacher_loss": 0.2749515771865845 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.8075480461120605, "learning_rate": 2.1956766025251607e-05, "loss": 0.3559, "step": 14106, "teacher_loss": 0.30566275119781494 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.30305686593055725, "learning_rate": 2.1954753736119306e-05, "loss": 0.2659, "step": 14107, "teacher_loss": 0.2617884576320648 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.7195541858673096, "learning_rate": 2.1952741287535356e-05, "loss": 0.2958, "step": 14108, "teacher_loss": 0.24873146414756775 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.5910072922706604, "learning_rate": 2.1950728679545887e-05, "loss": 0.2327, "step": 14109, "teacher_loss": 0.19285470247268677 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.4437260627746582, "learning_rate": 2.1948715912197052e-05, "loss": 0.2729, "step": 14110, "teacher_loss": 0.2539476752281189 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.3492084741592407, "learning_rate": 2.1946702985534994e-05, "loss": 0.2295, "step": 14111, "teacher_loss": 0.21623246371746063 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.12968352437019348, "learning_rate": 2.1944689899605867e-05, "loss": 0.2091, "step": 14112, "teacher_loss": 0.21796298027038574 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.5501447916030884, "learning_rate": 2.194267665445582e-05, "loss": 0.2078, "step": 14113, "teacher_loss": 0.16978394985198975 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.3403867483139038, "learning_rate": 2.1940663250131008e-05, "loss": 0.1744, "step": 14114, "teacher_loss": 0.15599872171878815 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.5268205404281616, "learning_rate": 2.19386496866776e-05, "loss": 0.4073, "step": 14115, "teacher_loss": 0.39399218559265137 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.22666972875595093, "learning_rate": 2.1936635964141763e-05, "loss": 0.2997, "step": 14116, "teacher_loss": 0.30776381492614746 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.3363306224346161, "learning_rate": 2.1934622082569655e-05, "loss": 0.2523, "step": 14117, "teacher_loss": 0.24298641085624695 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.5341652035713196, "learning_rate": 2.1932608042007453e-05, "loss": 0.3433, "step": 14118, "teacher_loss": 0.32211798429489136 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 1.2450436353683472, "learning_rate": 2.1930593842501335e-05, "loss": 0.3397, "step": 14119, "teacher_loss": 0.23913028836250305 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.41809555888175964, "learning_rate": 2.192857948409748e-05, "loss": 0.2989, "step": 14120, "teacher_loss": 0.28570622205734253 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.2445700615644455, "learning_rate": 2.192656496684207e-05, "loss": 0.2921, "step": 14121, "teacher_loss": 0.2974112629890442 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.45156484842300415, "learning_rate": 2.192455029078129e-05, "loss": 0.3054, "step": 14122, "teacher_loss": 0.28919684886932373 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.4840802252292633, "learning_rate": 2.1922535455961333e-05, "loss": 0.2587, "step": 14123, "teacher_loss": 0.23364229500293732 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.34423699975013733, "learning_rate": 2.19205204624284e-05, "loss": 0.2658, "step": 14124, "teacher_loss": 0.25707298517227173 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.29873815178871155, "learning_rate": 2.191850531022867e-05, "loss": 0.228, "step": 14125, "teacher_loss": 0.22011500597000122 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.24945895373821259, "learning_rate": 2.1916489999408366e-05, "loss": 0.144, "step": 14126, "teacher_loss": 0.13223929703235626 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.3605618476867676, "learning_rate": 2.191447453001368e-05, "loss": 0.2629, "step": 14127, "teacher_loss": 0.25203514099121094 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.34237921237945557, "learning_rate": 2.1912458902090823e-05, "loss": 0.1913, "step": 14128, "teacher_loss": 0.17449024319648743 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.7491214275360107, "learning_rate": 2.191044311568601e-05, "loss": 0.2229, "step": 14129, "teacher_loss": 0.16445466876029968 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.5319511890411377, "learning_rate": 2.190842717084545e-05, "loss": 0.305, "step": 14130, "teacher_loss": 0.27978986501693726 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.40109291672706604, "learning_rate": 2.190641106761537e-05, "loss": 0.1922, "step": 14131, "teacher_loss": 0.16897651553153992 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.362934410572052, "learning_rate": 2.190439480604199e-05, "loss": 0.2259, "step": 14132, "teacher_loss": 0.21069568395614624 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.20084592700004578, "learning_rate": 2.1902378386171542e-05, "loss": 0.2229, "step": 14133, "teacher_loss": 0.2253381311893463 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.42890846729278564, "learning_rate": 2.1900361808050246e-05, "loss": 0.2456, "step": 14134, "teacher_loss": 0.22517837584018707 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.8364607095718384, "learning_rate": 2.1898345071724344e-05, "loss": 0.3253, "step": 14135, "teacher_loss": 0.2684541344642639 }, { "compression_loss": 0.0, "epoch": 2.55, "label_loss": 0.32684433460235596, "learning_rate": 2.1896328177240074e-05, "loss": 0.2289, "step": 14136, "teacher_loss": 0.21796786785125732 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.2907610535621643, "learning_rate": 2.1894311124643675e-05, "loss": 0.2473, "step": 14137, "teacher_loss": 0.2424989938735962 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.2031404823064804, "learning_rate": 2.189229391398139e-05, "loss": 0.1887, "step": 14138, "teacher_loss": 0.18708856403827667 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.31731581687927246, "learning_rate": 2.189027654529947e-05, "loss": 0.2541, "step": 14139, "teacher_loss": 0.24708092212677002 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.4953751564025879, "learning_rate": 2.1888259018644167e-05, "loss": 0.3317, "step": 14140, "teacher_loss": 0.3135678768157959 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.0603305846452713, "learning_rate": 2.1886241334061737e-05, "loss": 0.1308, "step": 14141, "teacher_loss": 0.13861612975597382 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.6661931276321411, "learning_rate": 2.1884223491598443e-05, "loss": 0.3738, "step": 14142, "teacher_loss": 0.34127065539360046 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.45140206813812256, "learning_rate": 2.1882205491300542e-05, "loss": 0.3614, "step": 14143, "teacher_loss": 0.35140180587768555 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.31395697593688965, "learning_rate": 2.1880187333214297e-05, "loss": 0.2926, "step": 14144, "teacher_loss": 0.2902813255786896 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5264514684677124, "learning_rate": 2.187816901738599e-05, "loss": 0.265, "step": 14145, "teacher_loss": 0.23598219454288483 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.2271890640258789, "learning_rate": 2.187615054386189e-05, "loss": 0.2937, "step": 14146, "teacher_loss": 0.3011230528354645 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.44491517543792725, "learning_rate": 2.187413191268827e-05, "loss": 0.2348, "step": 14147, "teacher_loss": 0.21148845553398132 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.22685542702674866, "learning_rate": 2.187211312391142e-05, "loss": 0.2119, "step": 14148, "teacher_loss": 0.2102307677268982 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.31112754344940186, "learning_rate": 2.1870094177577614e-05, "loss": 0.1886, "step": 14149, "teacher_loss": 0.17495453357696533 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.1820644736289978, "learning_rate": 2.1868075073733147e-05, "loss": 0.217, "step": 14150, "teacher_loss": 0.22091570496559143 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.777638852596283, "learning_rate": 2.1866055812424317e-05, "loss": 0.3151, "step": 14151, "teacher_loss": 0.2636851370334625 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.3828042447566986, "learning_rate": 2.1864036393697405e-05, "loss": 0.2685, "step": 14152, "teacher_loss": 0.2558153569698334 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.49958133697509766, "learning_rate": 2.186201681759872e-05, "loss": 0.1867, "step": 14153, "teacher_loss": 0.15198293328285217 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.2810642719268799, "learning_rate": 2.1859997084174562e-05, "loss": 0.2653, "step": 14154, "teacher_loss": 0.2635181248188019 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.1996690034866333, "learning_rate": 2.185797719347124e-05, "loss": 0.2062, "step": 14155, "teacher_loss": 0.2069195955991745 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.34417709708213806, "learning_rate": 2.1855957145535068e-05, "loss": 0.1465, "step": 14156, "teacher_loss": 0.124505415558815 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.4435366690158844, "learning_rate": 2.1853936940412342e-05, "loss": 0.2345, "step": 14157, "teacher_loss": 0.21125584840774536 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5421137809753418, "learning_rate": 2.18519165781494e-05, "loss": 0.3176, "step": 14158, "teacher_loss": 0.29266357421875 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5976833701133728, "learning_rate": 2.184989605879255e-05, "loss": 0.4075, "step": 14159, "teacher_loss": 0.38640332221984863 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5281933546066284, "learning_rate": 2.184787538238812e-05, "loss": 0.2753, "step": 14160, "teacher_loss": 0.24717864394187927 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.48728421330451965, "learning_rate": 2.184585454898244e-05, "loss": 0.5523, "step": 14161, "teacher_loss": 0.5595563650131226 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.4356938898563385, "learning_rate": 2.184383355862184e-05, "loss": 0.2105, "step": 14162, "teacher_loss": 0.18547439575195312 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.4942814111709595, "learning_rate": 2.1841812411352658e-05, "loss": 0.2739, "step": 14163, "teacher_loss": 0.24946808815002441 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.2307223081588745, "learning_rate": 2.1839791107221228e-05, "loss": 0.1663, "step": 14164, "teacher_loss": 0.15912988781929016 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5112999677658081, "learning_rate": 2.1837769646273892e-05, "loss": 0.2639, "step": 14165, "teacher_loss": 0.23640765249729156 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.8613361120223999, "learning_rate": 2.1835748028557004e-05, "loss": 0.5079, "step": 14166, "teacher_loss": 0.4686659276485443 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5411689877510071, "learning_rate": 2.1833726254116903e-05, "loss": 0.2326, "step": 14167, "teacher_loss": 0.1983032524585724 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5302461385726929, "learning_rate": 2.183170432299995e-05, "loss": 0.3378, "step": 14168, "teacher_loss": 0.3163986802101135 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.44137582182884216, "learning_rate": 2.18296822352525e-05, "loss": 0.2827, "step": 14169, "teacher_loss": 0.2650667428970337 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5907478332519531, "learning_rate": 2.182765999092091e-05, "loss": 0.2359, "step": 14170, "teacher_loss": 0.19652211666107178 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.27910155057907104, "learning_rate": 2.182563759005155e-05, "loss": 0.2895, "step": 14171, "teacher_loss": 0.29067856073379517 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.4879056513309479, "learning_rate": 2.1823615032690786e-05, "loss": 0.3507, "step": 14172, "teacher_loss": 0.33540865778923035 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.7178019285202026, "learning_rate": 2.1821592318884983e-05, "loss": 0.223, "step": 14173, "teacher_loss": 0.16799131035804749 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.39833736419677734, "learning_rate": 2.181956944868052e-05, "loss": 0.2742, "step": 14174, "teacher_loss": 0.26039671897888184 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.3856354057788849, "learning_rate": 2.1817546422123777e-05, "loss": 0.2339, "step": 14175, "teacher_loss": 0.21701905131340027 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5523322224617004, "learning_rate": 2.1815523239261137e-05, "loss": 0.2863, "step": 14176, "teacher_loss": 0.25678563117980957 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.38658204674720764, "learning_rate": 2.181349990013898e-05, "loss": 0.2747, "step": 14177, "teacher_loss": 0.26232317090034485 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.40355247259140015, "learning_rate": 2.18114764048037e-05, "loss": 0.271, "step": 14178, "teacher_loss": 0.2563096880912781 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.411508172750473, "learning_rate": 2.180945275330169e-05, "loss": 0.2851, "step": 14179, "teacher_loss": 0.2710726857185364 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.48566436767578125, "learning_rate": 2.180742894567934e-05, "loss": 0.3208, "step": 14180, "teacher_loss": 0.3025311231613159 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.20909595489501953, "learning_rate": 2.180540498198306e-05, "loss": 0.2366, "step": 14181, "teacher_loss": 0.23965993523597717 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.4929698705673218, "learning_rate": 2.180338086225924e-05, "loss": 0.2245, "step": 14182, "teacher_loss": 0.1946302354335785 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 1.0289430618286133, "learning_rate": 2.1801356586554298e-05, "loss": 0.3652, "step": 14183, "teacher_loss": 0.29142096638679504 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5295517444610596, "learning_rate": 2.179933215491464e-05, "loss": 0.5305, "step": 14184, "teacher_loss": 0.5305664539337158 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.3709298372268677, "learning_rate": 2.179730756738668e-05, "loss": 0.2261, "step": 14185, "teacher_loss": 0.2100437581539154 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.5170134902000427, "learning_rate": 2.1795282824016845e-05, "loss": 0.2354, "step": 14186, "teacher_loss": 0.20409107208251953 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.4345567226409912, "learning_rate": 2.1793257924851534e-05, "loss": 0.3006, "step": 14187, "teacher_loss": 0.2856846749782562 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.2764303982257843, "learning_rate": 2.17912328699372e-05, "loss": 0.1969, "step": 14188, "teacher_loss": 0.18808524310588837 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.8379099369049072, "learning_rate": 2.178920765932025e-05, "loss": 0.4773, "step": 14189, "teacher_loss": 0.43725278973579407 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.3498167395591736, "learning_rate": 2.1787182293047123e-05, "loss": 0.2576, "step": 14190, "teacher_loss": 0.2473965585231781 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.2589060664176941, "learning_rate": 2.1785156771164256e-05, "loss": 0.2593, "step": 14191, "teacher_loss": 0.2593681216239929 }, { "compression_loss": 0.0, "epoch": 2.56, "label_loss": 0.7577589154243469, "learning_rate": 2.1783131093718086e-05, "loss": 0.3266, "step": 14192, "teacher_loss": 0.27871954441070557 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.4304571747779846, "learning_rate": 2.178110526075506e-05, "loss": 0.3244, "step": 14193, "teacher_loss": 0.31262266635894775 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5653932094573975, "learning_rate": 2.1779079272321623e-05, "loss": 0.2497, "step": 14194, "teacher_loss": 0.21467682719230652 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.7908973693847656, "learning_rate": 2.177705312846422e-05, "loss": 0.2804, "step": 14195, "teacher_loss": 0.2236693799495697 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.45874154567718506, "learning_rate": 2.1775026829229306e-05, "loss": 0.2316, "step": 14196, "teacher_loss": 0.2064008265733719 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5312547087669373, "learning_rate": 2.177300037466334e-05, "loss": 0.2646, "step": 14197, "teacher_loss": 0.23500633239746094 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.19308380782604218, "learning_rate": 2.1770973764812785e-05, "loss": 0.2159, "step": 14198, "teacher_loss": 0.21842306852340698 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.22375868260860443, "learning_rate": 2.17689469997241e-05, "loss": 0.279, "step": 14199, "teacher_loss": 0.28512609004974365 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.6033531427383423, "learning_rate": 2.1766920079443753e-05, "loss": 0.277, "step": 14200, "teacher_loss": 0.24078398942947388 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5749766826629639, "learning_rate": 2.176489300401822e-05, "loss": 0.3308, "step": 14201, "teacher_loss": 0.3036767244338989 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.3687785267829895, "learning_rate": 2.176286577349397e-05, "loss": 0.2244, "step": 14202, "teacher_loss": 0.2083839774131775 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5712060928344727, "learning_rate": 2.1760838387917485e-05, "loss": 0.3084, "step": 14203, "teacher_loss": 0.27922940254211426 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.8496137857437134, "learning_rate": 2.1758810847335243e-05, "loss": 0.2912, "step": 14204, "teacher_loss": 0.22920575737953186 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.0672571212053299, "learning_rate": 2.1756783151793737e-05, "loss": 0.1311, "step": 14205, "teacher_loss": 0.1381981074810028 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.47197240591049194, "learning_rate": 2.1754755301339453e-05, "loss": 0.2643, "step": 14206, "teacher_loss": 0.2412358522415161 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5092145204544067, "learning_rate": 2.1752727296018872e-05, "loss": 0.3291, "step": 14207, "teacher_loss": 0.30904000997543335 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.4917996823787689, "learning_rate": 2.1750699135878507e-05, "loss": 0.2319, "step": 14208, "teacher_loss": 0.2030460238456726 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.3462759554386139, "learning_rate": 2.174867082096485e-05, "loss": 0.2419, "step": 14209, "teacher_loss": 0.23030412197113037 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.49086758494377136, "learning_rate": 2.1746642351324402e-05, "loss": 0.3238, "step": 14210, "teacher_loss": 0.3052000403404236 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.3395147919654846, "learning_rate": 2.1744613727003677e-05, "loss": 0.2274, "step": 14211, "teacher_loss": 0.21495041251182556 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.36897778511047363, "learning_rate": 2.1742584948049175e-05, "loss": 0.2934, "step": 14212, "teacher_loss": 0.2849540710449219 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.3142859637737274, "learning_rate": 2.174055601450742e-05, "loss": 0.2039, "step": 14213, "teacher_loss": 0.19160521030426025 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5150015354156494, "learning_rate": 2.1738526926424925e-05, "loss": 0.2692, "step": 14214, "teacher_loss": 0.241885244846344 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.2842918634414673, "learning_rate": 2.1736497683848204e-05, "loss": 0.2968, "step": 14215, "teacher_loss": 0.29816338419914246 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.6323486566543579, "learning_rate": 2.17344682868238e-05, "loss": 0.3458, "step": 14216, "teacher_loss": 0.3139745593070984 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5285513401031494, "learning_rate": 2.173243873539822e-05, "loss": 0.2804, "step": 14217, "teacher_loss": 0.2528434693813324 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.448128879070282, "learning_rate": 2.1730409029618004e-05, "loss": 0.2399, "step": 14218, "teacher_loss": 0.2167319655418396 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5915016531944275, "learning_rate": 2.1728379169529693e-05, "loss": 0.2414, "step": 14219, "teacher_loss": 0.20250959694385529 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.6359156370162964, "learning_rate": 2.1726349155179817e-05, "loss": 0.4224, "step": 14220, "teacher_loss": 0.39868664741516113 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.14802348613739014, "learning_rate": 2.172431898661492e-05, "loss": 0.1713, "step": 14221, "teacher_loss": 0.17383426427841187 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5322903394699097, "learning_rate": 2.1722288663881555e-05, "loss": 0.5258, "step": 14222, "teacher_loss": 0.525030255317688 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.3155434727668762, "learning_rate": 2.1720258187026258e-05, "loss": 0.2677, "step": 14223, "teacher_loss": 0.2623414993286133 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5718631744384766, "learning_rate": 2.17182275560956e-05, "loss": 0.3774, "step": 14224, "teacher_loss": 0.3557378053665161 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.22144627571105957, "learning_rate": 2.1716196771136115e-05, "loss": 0.2091, "step": 14225, "teacher_loss": 0.20774388313293457 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.37791627645492554, "learning_rate": 2.171416583219438e-05, "loss": 0.2642, "step": 14226, "teacher_loss": 0.25157007575035095 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.20667049288749695, "learning_rate": 2.1712134739316955e-05, "loss": 0.1768, "step": 14227, "teacher_loss": 0.17345619201660156 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.33477669954299927, "learning_rate": 2.1710103492550396e-05, "loss": 0.2359, "step": 14228, "teacher_loss": 0.22491490840911865 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.928906261920929, "learning_rate": 2.170807209194129e-05, "loss": 0.3721, "step": 14229, "teacher_loss": 0.31026798486709595 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.9561784267425537, "learning_rate": 2.17060405375362e-05, "loss": 0.3168, "step": 14230, "teacher_loss": 0.24571369588375092 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.770319402217865, "learning_rate": 2.170400882938171e-05, "loss": 0.5305, "step": 14231, "teacher_loss": 0.5038201808929443 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.49957719445228577, "learning_rate": 2.1701976967524388e-05, "loss": 0.2385, "step": 14232, "teacher_loss": 0.2095131278038025 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.2698243260383606, "learning_rate": 2.1699944952010832e-05, "loss": 0.2198, "step": 14233, "teacher_loss": 0.21420526504516602 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.6613113880157471, "learning_rate": 2.1697912782887627e-05, "loss": 0.2718, "step": 14234, "teacher_loss": 0.2284790277481079 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.34646034240722656, "learning_rate": 2.1695880460201364e-05, "loss": 0.2587, "step": 14235, "teacher_loss": 0.24891141057014465 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.4663724899291992, "learning_rate": 2.1693847983998636e-05, "loss": 0.2451, "step": 14236, "teacher_loss": 0.22052177786827087 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.32446038722991943, "learning_rate": 2.1691815354326037e-05, "loss": 0.2313, "step": 14237, "teacher_loss": 0.22098299860954285 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.6764269471168518, "learning_rate": 2.1689782571230184e-05, "loss": 0.3134, "step": 14238, "teacher_loss": 0.27308785915374756 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.8967772722244263, "learning_rate": 2.168774963475767e-05, "loss": 0.3302, "step": 14239, "teacher_loss": 0.2672373354434967 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.830818772315979, "learning_rate": 2.1685716544955108e-05, "loss": 0.3703, "step": 14240, "teacher_loss": 0.3191695809364319 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.38655751943588257, "learning_rate": 2.1683683301869116e-05, "loss": 0.26, "step": 14241, "teacher_loss": 0.24599100649356842 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.7583180665969849, "learning_rate": 2.1681649905546298e-05, "loss": 0.3078, "step": 14242, "teacher_loss": 0.25770893692970276 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.8816455602645874, "learning_rate": 2.167961635603328e-05, "loss": 0.9039, "step": 14243, "teacher_loss": 0.9063464403152466 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.8013314604759216, "learning_rate": 2.167758265337669e-05, "loss": 0.3109, "step": 14244, "teacher_loss": 0.2563667297363281 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.43961644172668457, "learning_rate": 2.1675548797623144e-05, "loss": 0.2405, "step": 14245, "teacher_loss": 0.2183992564678192 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.5126307010650635, "learning_rate": 2.1673514788819283e-05, "loss": 0.2553, "step": 14246, "teacher_loss": 0.22670122981071472 }, { "compression_loss": 0.0, "epoch": 2.57, "label_loss": 0.9087996482849121, "learning_rate": 2.1671480627011734e-05, "loss": 0.2797, "step": 14247, "teacher_loss": 0.20979446172714233 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.7375493049621582, "learning_rate": 2.1669446312247138e-05, "loss": 0.2837, "step": 14248, "teacher_loss": 0.23330962657928467 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.17408889532089233, "learning_rate": 2.166741184457214e-05, "loss": 0.1773, "step": 14249, "teacher_loss": 0.17760753631591797 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.5018510222434998, "learning_rate": 2.1665377224033365e-05, "loss": 0.2026, "step": 14250, "teacher_loss": 0.16940000653266907 }, { "epoch": 2.58, "eval_exact_match": 79.81078524124882, "eval_f1": 87.07273064226436, "step": 14250 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.44981861114501953, "learning_rate": 2.166334245067748e-05, "loss": 0.2027, "step": 14251, "teacher_loss": 0.17528456449508667 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.2728629410266876, "learning_rate": 2.1661307524551134e-05, "loss": 0.238, "step": 14252, "teacher_loss": 0.23409652709960938 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.8141746520996094, "learning_rate": 2.1659272445700972e-05, "loss": 0.3649, "step": 14253, "teacher_loss": 0.3149639666080475 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.5118100047111511, "learning_rate": 2.1657237214173664e-05, "loss": 0.3378, "step": 14254, "teacher_loss": 0.318419873714447 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.500604510307312, "learning_rate": 2.1655201830015864e-05, "loss": 0.2957, "step": 14255, "teacher_loss": 0.2728869915008545 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.4157668948173523, "learning_rate": 2.165316629327424e-05, "loss": 0.2265, "step": 14256, "teacher_loss": 0.20551112294197083 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.5615580081939697, "learning_rate": 2.165113060399546e-05, "loss": 0.2507, "step": 14257, "teacher_loss": 0.21621017158031464 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.7114548683166504, "learning_rate": 2.1649094762226195e-05, "loss": 0.2624, "step": 14258, "teacher_loss": 0.21254561841487885 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.6866539716720581, "learning_rate": 2.1647058768013125e-05, "loss": 0.2453, "step": 14259, "teacher_loss": 0.1963009536266327 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.47950679063796997, "learning_rate": 2.1645022621402923e-05, "loss": 0.2007, "step": 14260, "teacher_loss": 0.1697450578212738 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.5898662209510803, "learning_rate": 2.1642986322442276e-05, "loss": 0.2704, "step": 14261, "teacher_loss": 0.23486298322677612 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.7955036759376526, "learning_rate": 2.1640949871177868e-05, "loss": 0.5313, "step": 14262, "teacher_loss": 0.5019314885139465 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.9793264865875244, "learning_rate": 2.1638913267656393e-05, "loss": 0.8924, "step": 14263, "teacher_loss": 0.8827388286590576 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.8474443554878235, "learning_rate": 2.1636876511924542e-05, "loss": 0.4419, "step": 14264, "teacher_loss": 0.3968254625797272 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.397510826587677, "learning_rate": 2.1634839604029005e-05, "loss": 0.323, "step": 14265, "teacher_loss": 0.3147730827331543 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.4114142656326294, "learning_rate": 2.1632802544016495e-05, "loss": 0.2586, "step": 14266, "teacher_loss": 0.24160446226596832 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.16140009462833405, "learning_rate": 2.1630765331933703e-05, "loss": 0.1986, "step": 14267, "teacher_loss": 0.20274126529693604 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.7105826139450073, "learning_rate": 2.1628727967827346e-05, "loss": 0.3881, "step": 14268, "teacher_loss": 0.35231661796569824 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.40489107370376587, "learning_rate": 2.1626690451744128e-05, "loss": 0.2028, "step": 14269, "teacher_loss": 0.1803997904062271 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.5906544327735901, "learning_rate": 2.162465278373077e-05, "loss": 0.3311, "step": 14270, "teacher_loss": 0.3022775948047638 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.22036081552505493, "learning_rate": 2.162261496383398e-05, "loss": 0.1825, "step": 14271, "teacher_loss": 0.17833009362220764 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.45569610595703125, "learning_rate": 2.162057699210049e-05, "loss": 0.2424, "step": 14272, "teacher_loss": 0.21870452165603638 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.18406696617603302, "learning_rate": 2.161853886857701e-05, "loss": 0.2265, "step": 14273, "teacher_loss": 0.23124441504478455 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.6203475594520569, "learning_rate": 2.1616500593310288e-05, "loss": 0.232, "step": 14274, "teacher_loss": 0.1888052374124527 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.2765890061855316, "learning_rate": 2.1614462166347043e-05, "loss": 0.2329, "step": 14275, "teacher_loss": 0.22800664603710175 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.38806673884391785, "learning_rate": 2.1612423587734007e-05, "loss": 0.2324, "step": 14276, "teacher_loss": 0.21505972743034363 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.7791194915771484, "learning_rate": 2.1610384857517926e-05, "loss": 0.2402, "step": 14277, "teacher_loss": 0.18035873770713806 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.36976537108421326, "learning_rate": 2.1608345975745534e-05, "loss": 0.241, "step": 14278, "teacher_loss": 0.2266956865787506 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.6180068254470825, "learning_rate": 2.1606306942463592e-05, "loss": 0.3944, "step": 14279, "teacher_loss": 0.3695370852947235 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.25726011395454407, "learning_rate": 2.160426775771883e-05, "loss": 0.1602, "step": 14280, "teacher_loss": 0.14943283796310425 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 1.0388661623001099, "learning_rate": 2.1602228421558013e-05, "loss": 0.3267, "step": 14281, "teacher_loss": 0.24756869673728943 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.35848677158355713, "learning_rate": 2.160018893402789e-05, "loss": 0.2496, "step": 14282, "teacher_loss": 0.2375195324420929 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.5121340751647949, "learning_rate": 2.1598149295175224e-05, "loss": 0.231, "step": 14283, "teacher_loss": 0.19981051981449127 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 1.4362032413482666, "learning_rate": 2.1596109505046776e-05, "loss": 0.4103, "step": 14284, "teacher_loss": 0.2963097095489502 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.32695263624191284, "learning_rate": 2.1594069563689316e-05, "loss": 0.1851, "step": 14285, "teacher_loss": 0.16935238242149353 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.42051804065704346, "learning_rate": 2.1592029471149607e-05, "loss": 0.2834, "step": 14286, "teacher_loss": 0.2681804895401001 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.3083737790584564, "learning_rate": 2.1589989227474433e-05, "loss": 0.3206, "step": 14287, "teacher_loss": 0.32191166281700134 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.4337674379348755, "learning_rate": 2.1587948832710557e-05, "loss": 0.2561, "step": 14288, "teacher_loss": 0.23639589548110962 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.24450816214084625, "learning_rate": 2.158590828690477e-05, "loss": 0.2515, "step": 14289, "teacher_loss": 0.25224125385284424 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 1.0289298295974731, "learning_rate": 2.1583867590103848e-05, "loss": 0.4848, "step": 14290, "teacher_loss": 0.42434966564178467 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.3674364686012268, "learning_rate": 2.1581826742354585e-05, "loss": 0.4311, "step": 14291, "teacher_loss": 0.4382132589817047 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.2838875353336334, "learning_rate": 2.1579785743703763e-05, "loss": 0.2158, "step": 14292, "teacher_loss": 0.2082471251487732 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 1.239539623260498, "learning_rate": 2.157774459419819e-05, "loss": 0.3537, "step": 14293, "teacher_loss": 0.25524815917015076 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.5885140299797058, "learning_rate": 2.1575703293884646e-05, "loss": 0.2394, "step": 14294, "teacher_loss": 0.20059171319007874 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.38943448662757874, "learning_rate": 2.1573661842809942e-05, "loss": 0.2814, "step": 14295, "teacher_loss": 0.2694370746612549 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.7366315126419067, "learning_rate": 2.1571620241020884e-05, "loss": 0.4661, "step": 14296, "teacher_loss": 0.4360237121582031 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.678027868270874, "learning_rate": 2.1569578488564275e-05, "loss": 0.3693, "step": 14297, "teacher_loss": 0.3349994719028473 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.293626606464386, "learning_rate": 2.156753658548693e-05, "loss": 0.231, "step": 14298, "teacher_loss": 0.22402653098106384 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.5079373717308044, "learning_rate": 2.156549453183566e-05, "loss": 0.3017, "step": 14299, "teacher_loss": 0.27876919507980347 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.3499205410480499, "learning_rate": 2.156345232765728e-05, "loss": 0.2254, "step": 14300, "teacher_loss": 0.21151064336299896 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.6451599597930908, "learning_rate": 2.1561409972998623e-05, "loss": 0.2648, "step": 14301, "teacher_loss": 0.2225797474384308 }, { "compression_loss": 0.0, "epoch": 2.58, "label_loss": 0.6538830995559692, "learning_rate": 2.1559367467906508e-05, "loss": 0.3495, "step": 14302, "teacher_loss": 0.315729558467865 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.11456164717674255, "learning_rate": 2.1557324812427755e-05, "loss": 0.1702, "step": 14303, "teacher_loss": 0.17638170719146729 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.4473934769630432, "learning_rate": 2.1555282006609216e-05, "loss": 0.2269, "step": 14304, "teacher_loss": 0.20235010981559753 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.5119442939758301, "learning_rate": 2.15532390504977e-05, "loss": 0.2146, "step": 14305, "teacher_loss": 0.18153566122055054 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.6872402429580688, "learning_rate": 2.155119594414007e-05, "loss": 0.2661, "step": 14306, "teacher_loss": 0.2193007469177246 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.39235514402389526, "learning_rate": 2.1549152687583158e-05, "loss": 0.2693, "step": 14307, "teacher_loss": 0.2556533217430115 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.26030904054641724, "learning_rate": 2.1547109280873808e-05, "loss": 0.2881, "step": 14308, "teacher_loss": 0.2912067174911499 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 1.2868714332580566, "learning_rate": 2.1545065724058873e-05, "loss": 0.4348, "step": 14309, "teacher_loss": 0.3400779068470001 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.6231622099876404, "learning_rate": 2.15430220171852e-05, "loss": 0.2925, "step": 14310, "teacher_loss": 0.2557588815689087 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.3664616346359253, "learning_rate": 2.1540978160299656e-05, "loss": 0.1992, "step": 14311, "teacher_loss": 0.18063011765480042 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.6802225112915039, "learning_rate": 2.153893415344909e-05, "loss": 0.3152, "step": 14312, "teacher_loss": 0.27468788623809814 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.7269768714904785, "learning_rate": 2.1536889996680366e-05, "loss": 0.2458, "step": 14313, "teacher_loss": 0.19236302375793457 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.7794076204299927, "learning_rate": 2.1534845690040356e-05, "loss": 0.2817, "step": 14314, "teacher_loss": 0.22641941905021667 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.7687351703643799, "learning_rate": 2.1532801233575926e-05, "loss": 0.2954, "step": 14315, "teacher_loss": 0.24284933507442474 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.8595342636108398, "learning_rate": 2.1530756627333953e-05, "loss": 0.3432, "step": 14316, "teacher_loss": 0.28578054904937744 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.3514205813407898, "learning_rate": 2.1528711871361305e-05, "loss": 0.182, "step": 14317, "teacher_loss": 0.16313891112804413 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.15996265411376953, "learning_rate": 2.1526666965704874e-05, "loss": 0.1155, "step": 14318, "teacher_loss": 0.11055950820446014 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.3024246096611023, "learning_rate": 2.152462191041153e-05, "loss": 0.2081, "step": 14319, "teacher_loss": 0.19764773547649384 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.32902562618255615, "learning_rate": 2.1522576705528173e-05, "loss": 0.2606, "step": 14320, "teacher_loss": 0.25301098823547363 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.49471426010131836, "learning_rate": 2.1520531351101685e-05, "loss": 0.3111, "step": 14321, "teacher_loss": 0.29075026512145996 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 1.3565127849578857, "learning_rate": 2.1518485847178965e-05, "loss": 0.5074, "step": 14322, "teacher_loss": 0.413002073764801 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.3960000276565552, "learning_rate": 2.1516440193806907e-05, "loss": 0.3064, "step": 14323, "teacher_loss": 0.2964242398738861 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.4063524603843689, "learning_rate": 2.1514394391032413e-05, "loss": 0.2245, "step": 14324, "teacher_loss": 0.20426729321479797 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.32127660512924194, "learning_rate": 2.151234843890239e-05, "loss": 0.227, "step": 14325, "teacher_loss": 0.21650615334510803 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.7958909273147583, "learning_rate": 2.151030233746374e-05, "loss": 0.6925, "step": 14326, "teacher_loss": 0.6810251474380493 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.6001822352409363, "learning_rate": 2.1508256086763372e-05, "loss": 0.234, "step": 14327, "teacher_loss": 0.19330760836601257 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.4064871668815613, "learning_rate": 2.1506209686848208e-05, "loss": 0.3108, "step": 14328, "teacher_loss": 0.30015844106674194 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.1884167492389679, "learning_rate": 2.1504163137765164e-05, "loss": 0.2321, "step": 14329, "teacher_loss": 0.23699051141738892 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.29769617319107056, "learning_rate": 2.1502116439561157e-05, "loss": 0.2495, "step": 14330, "teacher_loss": 0.24416258931159973 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.409062922000885, "learning_rate": 2.1500069592283116e-05, "loss": 0.3013, "step": 14331, "teacher_loss": 0.28932878375053406 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.7875873446464539, "learning_rate": 2.1498022595977965e-05, "loss": 0.2724, "step": 14332, "teacher_loss": 0.2152034044265747 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.5639349222183228, "learning_rate": 2.1495975450692642e-05, "loss": 0.3083, "step": 14333, "teacher_loss": 0.2799008786678314 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.6440930366516113, "learning_rate": 2.149392815647408e-05, "loss": 0.2283, "step": 14334, "teacher_loss": 0.18214114010334015 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.5947770476341248, "learning_rate": 2.149188071336921e-05, "loss": 0.2426, "step": 14335, "teacher_loss": 0.20344088971614838 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.5486993193626404, "learning_rate": 2.148983312142498e-05, "loss": 0.3035, "step": 14336, "teacher_loss": 0.27622726559638977 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.3154642879962921, "learning_rate": 2.1487785380688333e-05, "loss": 0.2293, "step": 14337, "teacher_loss": 0.21974530816078186 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.34401002526283264, "learning_rate": 2.1485737491206215e-05, "loss": 0.2079, "step": 14338, "teacher_loss": 0.19275638461112976 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.7187472581863403, "learning_rate": 2.1483689453025593e-05, "loss": 0.4394, "step": 14339, "teacher_loss": 0.40831443667411804 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.473328560590744, "learning_rate": 2.14816412661934e-05, "loss": 0.3027, "step": 14340, "teacher_loss": 0.2837018370628357 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.07045336812734604, "learning_rate": 2.147959293075661e-05, "loss": 0.1856, "step": 14341, "teacher_loss": 0.1983906328678131 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.27863818407058716, "learning_rate": 2.1477544446762184e-05, "loss": 0.3661, "step": 14342, "teacher_loss": 0.375781774520874 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.48159411549568176, "learning_rate": 2.147549581425708e-05, "loss": 0.3009, "step": 14343, "teacher_loss": 0.28086531162261963 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.41333091259002686, "learning_rate": 2.1473447033288275e-05, "loss": 0.2607, "step": 14344, "teacher_loss": 0.24379006028175354 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.26726314425468445, "learning_rate": 2.1471398103902733e-05, "loss": 0.1857, "step": 14345, "teacher_loss": 0.17664334177970886 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.6123522520065308, "learning_rate": 2.1469349026147438e-05, "loss": 0.8066, "step": 14346, "teacher_loss": 0.828173041343689 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.4149326980113983, "learning_rate": 2.1467299800069366e-05, "loss": 0.226, "step": 14347, "teacher_loss": 0.20495428144931793 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.23196470737457275, "learning_rate": 2.14652504257155e-05, "loss": 0.1759, "step": 14348, "teacher_loss": 0.1696886122226715 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.192484050989151, "learning_rate": 2.1463200903132825e-05, "loss": 0.2272, "step": 14349, "teacher_loss": 0.231021910905838 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.699876070022583, "learning_rate": 2.146115123236833e-05, "loss": 0.2922, "step": 14350, "teacher_loss": 0.24689070880413055 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.4879429340362549, "learning_rate": 2.1459101413469008e-05, "loss": 0.3465, "step": 14351, "teacher_loss": 0.33080393075942993 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.32041695713996887, "learning_rate": 2.1457051446481855e-05, "loss": 0.2144, "step": 14352, "teacher_loss": 0.20266872644424438 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.5522844791412354, "learning_rate": 2.1455001331453875e-05, "loss": 0.295, "step": 14353, "teacher_loss": 0.2664240300655365 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.19967886805534363, "learning_rate": 2.145295106843207e-05, "loss": 0.2905, "step": 14354, "teacher_loss": 0.30054694414138794 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.4694608747959137, "learning_rate": 2.1450900657463438e-05, "loss": 0.2432, "step": 14355, "teacher_loss": 0.21810433268547058 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.3208380937576294, "learning_rate": 2.1448850098594998e-05, "loss": 0.3406, "step": 14356, "teacher_loss": 0.3427514433860779 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.5406547784805298, "learning_rate": 2.144679939187376e-05, "loss": 0.239, "step": 14357, "teacher_loss": 0.20547229051589966 }, { "compression_loss": 0.0, "epoch": 2.59, "label_loss": 0.2883642315864563, "learning_rate": 2.1444748537346737e-05, "loss": 0.1977, "step": 14358, "teacher_loss": 0.1876707375049591 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.41519108414649963, "learning_rate": 2.1442697535060957e-05, "loss": 0.2762, "step": 14359, "teacher_loss": 0.26078787446022034 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.3476175367832184, "learning_rate": 2.1440646385063436e-05, "loss": 0.251, "step": 14360, "teacher_loss": 0.2402871698141098 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.2072194516658783, "learning_rate": 2.1438595087401204e-05, "loss": 0.1809, "step": 14361, "teacher_loss": 0.17793533205986023 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.30776265263557434, "learning_rate": 2.1436543642121293e-05, "loss": 0.2234, "step": 14362, "teacher_loss": 0.21404105424880981 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.6086610555648804, "learning_rate": 2.1434492049270725e-05, "loss": 0.6335, "step": 14363, "teacher_loss": 0.6362476944923401 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.4263724088668823, "learning_rate": 2.143244030889656e-05, "loss": 0.2586, "step": 14364, "teacher_loss": 0.24000096321105957 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.5887324213981628, "learning_rate": 2.1430388421045812e-05, "loss": 0.3727, "step": 14365, "teacher_loss": 0.3486897051334381 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.47745761275291443, "learning_rate": 2.142833638576554e-05, "loss": 0.2639, "step": 14366, "teacher_loss": 0.24022571742534637 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.8716928958892822, "learning_rate": 2.1426284203102795e-05, "loss": 0.3581, "step": 14367, "teacher_loss": 0.30102288722991943 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.7652832269668579, "learning_rate": 2.1424231873104613e-05, "loss": 0.6356, "step": 14368, "teacher_loss": 0.6212201714515686 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.595547080039978, "learning_rate": 2.1422179395818058e-05, "loss": 0.3469, "step": 14369, "teacher_loss": 0.3193088173866272 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.28941231966018677, "learning_rate": 2.1420126771290183e-05, "loss": 0.1996, "step": 14370, "teacher_loss": 0.18966497480869293 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.13828805088996887, "learning_rate": 2.1418073999568047e-05, "loss": 0.2127, "step": 14371, "teacher_loss": 0.22102117538452148 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.5491148829460144, "learning_rate": 2.141602108069872e-05, "loss": 0.2621, "step": 14372, "teacher_loss": 0.23024769127368927 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.4534647762775421, "learning_rate": 2.1413968014729264e-05, "loss": 0.3167, "step": 14373, "teacher_loss": 0.30152788758277893 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.30152928829193115, "learning_rate": 2.1411914801706753e-05, "loss": 0.3113, "step": 14374, "teacher_loss": 0.31239378452301025 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.33814698457717896, "learning_rate": 2.1409861441678262e-05, "loss": 0.2688, "step": 14375, "teacher_loss": 0.2611209452152252 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.9511691927909851, "learning_rate": 2.1407807934690857e-05, "loss": 0.3183, "step": 14376, "teacher_loss": 0.248009592294693 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.489962637424469, "learning_rate": 2.1405754280791634e-05, "loss": 0.2828, "step": 14377, "teacher_loss": 0.2598152458667755 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.9335925579071045, "learning_rate": 2.1403700480027672e-05, "loss": 0.4965, "step": 14378, "teacher_loss": 0.44798070192337036 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.5011628866195679, "learning_rate": 2.1401646532446057e-05, "loss": 0.2108, "step": 14379, "teacher_loss": 0.17855922877788544 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.22789734601974487, "learning_rate": 2.139959243809388e-05, "loss": 0.3926, "step": 14380, "teacher_loss": 0.4109404683113098 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.29149484634399414, "learning_rate": 2.139753819701823e-05, "loss": 0.1911, "step": 14381, "teacher_loss": 0.17990244925022125 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.36217203736305237, "learning_rate": 2.1395483809266217e-05, "loss": 0.2235, "step": 14382, "teacher_loss": 0.20811673998832703 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.3703482449054718, "learning_rate": 2.1393429274884933e-05, "loss": 0.1946, "step": 14383, "teacher_loss": 0.17502844333648682 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.5684225559234619, "learning_rate": 2.1391374593921483e-05, "loss": 0.3367, "step": 14384, "teacher_loss": 0.31091558933258057 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.44113093614578247, "learning_rate": 2.1389319766422974e-05, "loss": 0.2379, "step": 14385, "teacher_loss": 0.21526330709457397 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.39999252557754517, "learning_rate": 2.138726479243652e-05, "loss": 0.2324, "step": 14386, "teacher_loss": 0.2137613445520401 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.23954442143440247, "learning_rate": 2.138520967200924e-05, "loss": 0.2249, "step": 14387, "teacher_loss": 0.22324882447719574 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.5895223617553711, "learning_rate": 2.1383154405188235e-05, "loss": 0.2798, "step": 14388, "teacher_loss": 0.24540933966636658 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.6869639158248901, "learning_rate": 2.1381098992020646e-05, "loss": 0.5363, "step": 14389, "teacher_loss": 0.5195050835609436 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.4208378195762634, "learning_rate": 2.1379043432553584e-05, "loss": 0.2933, "step": 14390, "teacher_loss": 0.2790742516517639 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.7477906942367554, "learning_rate": 2.1376987726834188e-05, "loss": 0.3013, "step": 14391, "teacher_loss": 0.2517155408859253 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.6484609842300415, "learning_rate": 2.1374931874909576e-05, "loss": 0.3079, "step": 14392, "teacher_loss": 0.270017147064209 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.7592825889587402, "learning_rate": 2.1372875876826892e-05, "loss": 0.3786, "step": 14393, "teacher_loss": 0.33627286553382874 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.6592007875442505, "learning_rate": 2.137081973263327e-05, "loss": 0.384, "step": 14394, "teacher_loss": 0.3534213900566101 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.5886995792388916, "learning_rate": 2.1368763442375852e-05, "loss": 0.2917, "step": 14395, "teacher_loss": 0.2587509751319885 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.26648765802383423, "learning_rate": 2.1366707006101784e-05, "loss": 0.2018, "step": 14396, "teacher_loss": 0.19461414217948914 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.43760621547698975, "learning_rate": 2.1364650423858216e-05, "loss": 0.3037, "step": 14397, "teacher_loss": 0.28883352875709534 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.33827027678489685, "learning_rate": 2.136259369569229e-05, "loss": 0.2233, "step": 14398, "teacher_loss": 0.21052607893943787 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.4529958963394165, "learning_rate": 2.1360536821651166e-05, "loss": 0.1899, "step": 14399, "teacher_loss": 0.16067785024642944 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.4759179949760437, "learning_rate": 2.135847980178201e-05, "loss": 0.3405, "step": 14400, "teacher_loss": 0.3254011273384094 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.9083163738250732, "learning_rate": 2.1356422636131963e-05, "loss": 0.2323, "step": 14401, "teacher_loss": 0.15721768140792847 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.5911104083061218, "learning_rate": 2.1354365324748218e-05, "loss": 0.3112, "step": 14402, "teacher_loss": 0.2800961434841156 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.624241054058075, "learning_rate": 2.135230786767792e-05, "loss": 0.2939, "step": 14403, "teacher_loss": 0.2571490406990051 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.3971065282821655, "learning_rate": 2.1350250264968245e-05, "loss": 0.293, "step": 14404, "teacher_loss": 0.28142908215522766 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.7087382078170776, "learning_rate": 2.1348192516666376e-05, "loss": 0.4645, "step": 14405, "teacher_loss": 0.4373510479927063 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.37750405073165894, "learning_rate": 2.134613462281948e-05, "loss": 0.5131, "step": 14406, "teacher_loss": 0.5281796455383301 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.21675628423690796, "learning_rate": 2.1344076583474748e-05, "loss": 0.1771, "step": 14407, "teacher_loss": 0.17263884842395782 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.3920409083366394, "learning_rate": 2.1342018398679363e-05, "loss": 0.5068, "step": 14408, "teacher_loss": 0.5195322632789612 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.5038798451423645, "learning_rate": 2.1339960068480503e-05, "loss": 0.3722, "step": 14409, "teacher_loss": 0.3575171232223511 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.22464367747306824, "learning_rate": 2.133790159292537e-05, "loss": 0.2733, "step": 14410, "teacher_loss": 0.27870655059814453 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.600874125957489, "learning_rate": 2.1335842972061158e-05, "loss": 0.4638, "step": 14411, "teacher_loss": 0.4485885500907898 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.2844209671020508, "learning_rate": 2.133378420593506e-05, "loss": 0.1823, "step": 14412, "teacher_loss": 0.17092260718345642 }, { "compression_loss": 0.0, "epoch": 2.6, "label_loss": 0.1765948385000229, "learning_rate": 2.133172529459428e-05, "loss": 0.2582, "step": 14413, "teacher_loss": 0.26729512214660645 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.26239120960235596, "learning_rate": 2.1329666238086024e-05, "loss": 0.2546, "step": 14414, "teacher_loss": 0.25374260544776917 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.19193045794963837, "learning_rate": 2.1327607036457492e-05, "loss": 0.2084, "step": 14415, "teacher_loss": 0.2101939618587494 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.6342462301254272, "learning_rate": 2.132554768975591e-05, "loss": 0.4105, "step": 14416, "teacher_loss": 0.3855966627597809 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.7185182571411133, "learning_rate": 2.1323488198028484e-05, "loss": 0.3025, "step": 14417, "teacher_loss": 0.2562815248966217 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.5414015054702759, "learning_rate": 2.1321428561322428e-05, "loss": 0.3128, "step": 14418, "teacher_loss": 0.2874165177345276 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.4086994230747223, "learning_rate": 2.1319368779684972e-05, "loss": 0.3196, "step": 14419, "teacher_loss": 0.30965232849121094 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.7419878840446472, "learning_rate": 2.1317308853163333e-05, "loss": 0.5899, "step": 14420, "teacher_loss": 0.5729924440383911 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.2743135690689087, "learning_rate": 2.1315248781804743e-05, "loss": 0.2124, "step": 14421, "teacher_loss": 0.20551703870296478 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.26606953144073486, "learning_rate": 2.1313188565656434e-05, "loss": 0.2481, "step": 14422, "teacher_loss": 0.24607792496681213 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.2285497933626175, "learning_rate": 2.1311128204765634e-05, "loss": 0.1854, "step": 14423, "teacher_loss": 0.18065476417541504 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.38702714443206787, "learning_rate": 2.130906769917959e-05, "loss": 0.2163, "step": 14424, "teacher_loss": 0.19730296730995178 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.5607089996337891, "learning_rate": 2.1307007048945538e-05, "loss": 0.3629, "step": 14425, "teacher_loss": 0.34096360206604004 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.39820170402526855, "learning_rate": 2.1304946254110727e-05, "loss": 0.2256, "step": 14426, "teacher_loss": 0.206430584192276 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.30117055773735046, "learning_rate": 2.13028853147224e-05, "loss": 0.2338, "step": 14427, "teacher_loss": 0.22631970047950745 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.4237770438194275, "learning_rate": 2.1300824230827808e-05, "loss": 0.262, "step": 14428, "teacher_loss": 0.24397261440753937 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.6387193202972412, "learning_rate": 2.129876300247421e-05, "loss": 0.4991, "step": 14429, "teacher_loss": 0.48361361026763916 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.3046054244041443, "learning_rate": 2.129670162970886e-05, "loss": 0.2585, "step": 14430, "teacher_loss": 0.2533457279205322 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.48005443811416626, "learning_rate": 2.129464011257902e-05, "loss": 0.3538, "step": 14431, "teacher_loss": 0.3398140072822571 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.8018385171890259, "learning_rate": 2.1292578451131953e-05, "loss": 0.344, "step": 14432, "teacher_loss": 0.29312971234321594 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.3150048553943634, "learning_rate": 2.129051664541493e-05, "loss": 0.2657, "step": 14433, "teacher_loss": 0.26026415824890137 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.6677534580230713, "learning_rate": 2.1288454695475218e-05, "loss": 0.5446, "step": 14434, "teacher_loss": 0.5309034585952759 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.37522852420806885, "learning_rate": 2.1286392601360095e-05, "loss": 0.2901, "step": 14435, "teacher_loss": 0.28069210052490234 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.527740478515625, "learning_rate": 2.128433036311684e-05, "loss": 0.2821, "step": 14436, "teacher_loss": 0.2548117935657501 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.5387645363807678, "learning_rate": 2.128226798079273e-05, "loss": 0.2881, "step": 14437, "teacher_loss": 0.2602214217185974 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.426969051361084, "learning_rate": 2.1280205454435047e-05, "loss": 0.2114, "step": 14438, "teacher_loss": 0.1874251365661621 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.2988869547843933, "learning_rate": 2.1278142784091085e-05, "loss": 0.271, "step": 14439, "teacher_loss": 0.2679510712623596 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.46239596605300903, "learning_rate": 2.127607996980813e-05, "loss": 0.3226, "step": 14440, "teacher_loss": 0.30709004402160645 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.19903071224689484, "learning_rate": 2.127401701163348e-05, "loss": 0.1719, "step": 14441, "teacher_loss": 0.16883979737758636 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.4708396792411804, "learning_rate": 2.127195390961443e-05, "loss": 0.3982, "step": 14442, "teacher_loss": 0.39010220766067505 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.5290124416351318, "learning_rate": 2.126989066379828e-05, "loss": 0.2699, "step": 14443, "teacher_loss": 0.24112248420715332 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.320599764585495, "learning_rate": 2.1267827274232335e-05, "loss": 0.2191, "step": 14444, "teacher_loss": 0.20787745714187622 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.4347943961620331, "learning_rate": 2.12657637409639e-05, "loss": 0.2518, "step": 14445, "teacher_loss": 0.23144616186618805 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.15413731336593628, "learning_rate": 2.1263700064040293e-05, "loss": 0.1727, "step": 14446, "teacher_loss": 0.17479299008846283 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 1.0684468746185303, "learning_rate": 2.126163624350882e-05, "loss": 0.3132, "step": 14447, "teacher_loss": 0.22930869460105896 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.7531291246414185, "learning_rate": 2.12595722794168e-05, "loss": 0.3471, "step": 14448, "teacher_loss": 0.3019852638244629 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.3335733413696289, "learning_rate": 2.1257508171811563e-05, "loss": 0.2978, "step": 14449, "teacher_loss": 0.29385241866111755 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.32621896266937256, "learning_rate": 2.1255443920740417e-05, "loss": 0.3307, "step": 14450, "teacher_loss": 0.3312102258205414 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.2610246241092682, "learning_rate": 2.12533795262507e-05, "loss": 0.3511, "step": 14451, "teacher_loss": 0.3610619902610779 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.3389078974723816, "learning_rate": 2.125131498838974e-05, "loss": 0.2537, "step": 14452, "teacher_loss": 0.24422556161880493 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.5795071125030518, "learning_rate": 2.1249250307204864e-05, "loss": 0.2266, "step": 14453, "teacher_loss": 0.1873396635055542 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.7731492519378662, "learning_rate": 2.124718548274342e-05, "loss": 0.3108, "step": 14454, "teacher_loss": 0.25940829515457153 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.34906116127967834, "learning_rate": 2.1245120515052738e-05, "loss": 0.29, "step": 14455, "teacher_loss": 0.2834164798259735 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.9481781721115112, "learning_rate": 2.124305540418017e-05, "loss": 0.2777, "step": 14456, "teacher_loss": 0.2032223343849182 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.43922561407089233, "learning_rate": 2.124099015017306e-05, "loss": 0.2405, "step": 14457, "teacher_loss": 0.2183646708726883 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.44619810581207275, "learning_rate": 2.1238924753078755e-05, "loss": 0.2945, "step": 14458, "teacher_loss": 0.2776389718055725 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.5715669393539429, "learning_rate": 2.1236859212944613e-05, "loss": 0.3972, "step": 14459, "teacher_loss": 0.377782940864563 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.6885632276535034, "learning_rate": 2.123479352981799e-05, "loss": 0.313, "step": 14460, "teacher_loss": 0.27130627632141113 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.4756855070590973, "learning_rate": 2.123272770374624e-05, "loss": 0.2834, "step": 14461, "teacher_loss": 0.2620348036289215 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.4813898205757141, "learning_rate": 2.1230661734776728e-05, "loss": 0.262, "step": 14462, "teacher_loss": 0.23762303590774536 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.8735138177871704, "learning_rate": 2.122859562295683e-05, "loss": 0.2217, "step": 14463, "teacher_loss": 0.14925727248191833 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.5166009664535522, "learning_rate": 2.1226529368333904e-05, "loss": 0.2449, "step": 14464, "teacher_loss": 0.21474343538284302 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.9103567600250244, "learning_rate": 2.122446297095533e-05, "loss": 0.3825, "step": 14465, "teacher_loss": 0.3238013982772827 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.23310214281082153, "learning_rate": 2.122239643086848e-05, "loss": 0.2248, "step": 14466, "teacher_loss": 0.22383025288581848 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.18237142264842987, "learning_rate": 2.1220329748120736e-05, "loss": 0.1698, "step": 14467, "teacher_loss": 0.16834893822669983 }, { "compression_loss": 0.0, "epoch": 2.61, "label_loss": 0.2115219235420227, "learning_rate": 2.1218262922759484e-05, "loss": 0.172, "step": 14468, "teacher_loss": 0.16755372285842896 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5679544806480408, "learning_rate": 2.1216195954832098e-05, "loss": 0.247, "step": 14469, "teacher_loss": 0.2113179713487625 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.6342524290084839, "learning_rate": 2.1214128844385984e-05, "loss": 0.3366, "step": 14470, "teacher_loss": 0.3035721778869629 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.42312175035476685, "learning_rate": 2.1212061591468524e-05, "loss": 0.2136, "step": 14471, "teacher_loss": 0.19028016924858093 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5176512598991394, "learning_rate": 2.1209994196127117e-05, "loss": 0.5481, "step": 14472, "teacher_loss": 0.5514723062515259 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.39181992411613464, "learning_rate": 2.120792665840916e-05, "loss": 0.2419, "step": 14473, "teacher_loss": 0.2252657413482666 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 1.0661423206329346, "learning_rate": 2.1205858978362056e-05, "loss": 0.3835, "step": 14474, "teacher_loss": 0.3076481819152832 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.36675921082496643, "learning_rate": 2.1203791156033217e-05, "loss": 0.205, "step": 14475, "teacher_loss": 0.18701758980751038 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.3144836723804474, "learning_rate": 2.1201723191470043e-05, "loss": 0.2553, "step": 14476, "teacher_loss": 0.2487637847661972 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.41050562262535095, "learning_rate": 2.1199655084719954e-05, "loss": 0.2477, "step": 14477, "teacher_loss": 0.22958195209503174 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.34472328424453735, "learning_rate": 2.1197586835830354e-05, "loss": 0.3646, "step": 14478, "teacher_loss": 0.36683762073516846 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.3256007432937622, "learning_rate": 2.1195518444848673e-05, "loss": 0.1769, "step": 14479, "teacher_loss": 0.16036507487297058 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.4364761710166931, "learning_rate": 2.1193449911822328e-05, "loss": 0.2845, "step": 14480, "teacher_loss": 0.2676158547401428 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.4624486565589905, "learning_rate": 2.1191381236798746e-05, "loss": 0.2137, "step": 14481, "teacher_loss": 0.1860925257205963 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.4825872778892517, "learning_rate": 2.1189312419825358e-05, "loss": 0.2201, "step": 14482, "teacher_loss": 0.1909511685371399 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.35513556003570557, "learning_rate": 2.118724346094959e-05, "loss": 0.2044, "step": 14483, "teacher_loss": 0.1876668483018875 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.2631889879703522, "learning_rate": 2.118517436021888e-05, "loss": 0.1785, "step": 14484, "teacher_loss": 0.16910137236118317 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.7301787734031677, "learning_rate": 2.1183105117680667e-05, "loss": 0.3538, "step": 14485, "teacher_loss": 0.31199949979782104 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.31994932889938354, "learning_rate": 2.1181035733382388e-05, "loss": 0.2923, "step": 14486, "teacher_loss": 0.2892826795578003 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.22506609559059143, "learning_rate": 2.1178966207371496e-05, "loss": 0.1481, "step": 14487, "teacher_loss": 0.13950274884700775 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.3799022436141968, "learning_rate": 2.1176896539695434e-05, "loss": 0.4455, "step": 14488, "teacher_loss": 0.45276570320129395 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5600171685218811, "learning_rate": 2.1174826730401647e-05, "loss": 0.3911, "step": 14489, "teacher_loss": 0.3723044991493225 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.19089347124099731, "learning_rate": 2.1172756779537606e-05, "loss": 0.2136, "step": 14490, "teacher_loss": 0.21609412133693695 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.30896735191345215, "learning_rate": 2.117068668715075e-05, "loss": 0.2817, "step": 14491, "teacher_loss": 0.27861863374710083 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5656956434249878, "learning_rate": 2.1168616453288557e-05, "loss": 0.3747, "step": 14492, "teacher_loss": 0.353436142206192 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5164735317230225, "learning_rate": 2.1166546077998477e-05, "loss": 0.317, "step": 14493, "teacher_loss": 0.29486221075057983 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.32413625717163086, "learning_rate": 2.1164475561327984e-05, "loss": 0.2263, "step": 14494, "teacher_loss": 0.21547305583953857 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.28341031074523926, "learning_rate": 2.1162404903324558e-05, "loss": 0.2358, "step": 14495, "teacher_loss": 0.23055371642112732 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.2807903289794922, "learning_rate": 2.1160334104035654e-05, "loss": 0.211, "step": 14496, "teacher_loss": 0.2032170295715332 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.4560181498527527, "learning_rate": 2.115826316350876e-05, "loss": 0.2071, "step": 14497, "teacher_loss": 0.17938853800296783 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.44641512632369995, "learning_rate": 2.1156192081791355e-05, "loss": 0.4098, "step": 14498, "teacher_loss": 0.4057130217552185 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.4787118434906006, "learning_rate": 2.115412085893092e-05, "loss": 0.2293, "step": 14499, "teacher_loss": 0.2015482783317566 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5047792792320251, "learning_rate": 2.115204949497495e-05, "loss": 0.2904, "step": 14500, "teacher_loss": 0.2665749192237854 }, { "epoch": 2.62, "eval_exact_match": 79.51750236518448, "eval_f1": 86.93928215304093, "step": 14500 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5995454788208008, "learning_rate": 2.1149977989970926e-05, "loss": 0.2962, "step": 14501, "teacher_loss": 0.26248687505722046 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.6062762141227722, "learning_rate": 2.114790634396635e-05, "loss": 0.2912, "step": 14502, "teacher_loss": 0.2562112808227539 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.6573885679244995, "learning_rate": 2.114583455700871e-05, "loss": 0.3305, "step": 14503, "teacher_loss": 0.2941551208496094 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.37098604440689087, "learning_rate": 2.114376262914551e-05, "loss": 0.1917, "step": 14504, "teacher_loss": 0.1717677116394043 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5347769856452942, "learning_rate": 2.1141690560424253e-05, "loss": 0.2743, "step": 14505, "teacher_loss": 0.2453932762145996 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.45043063163757324, "learning_rate": 2.1139618350892447e-05, "loss": 0.3166, "step": 14506, "teacher_loss": 0.3017372488975525 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.3075307011604309, "learning_rate": 2.1137546000597603e-05, "loss": 0.2254, "step": 14507, "teacher_loss": 0.21629445254802704 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.3495030403137207, "learning_rate": 2.1135473509587222e-05, "loss": 0.2623, "step": 14508, "teacher_loss": 0.2525894045829773 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.8060945272445679, "learning_rate": 2.1133400877908833e-05, "loss": 0.3409, "step": 14509, "teacher_loss": 0.28916603326797485 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.6402349472045898, "learning_rate": 2.1131328105609954e-05, "loss": 0.2651, "step": 14510, "teacher_loss": 0.2234615683555603 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.6258589029312134, "learning_rate": 2.1129255192738096e-05, "loss": 0.3852, "step": 14511, "teacher_loss": 0.3584093451499939 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.10898482799530029, "learning_rate": 2.11271821393408e-05, "loss": 0.2298, "step": 14512, "teacher_loss": 0.24322909116744995 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.3679760992527008, "learning_rate": 2.1125108945465583e-05, "loss": 0.2313, "step": 14513, "teacher_loss": 0.21611681580543518 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5516211986541748, "learning_rate": 2.1123035611159984e-05, "loss": 0.2161, "step": 14514, "teacher_loss": 0.17886114120483398 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.1856776773929596, "learning_rate": 2.112096213647154e-05, "loss": 0.1809, "step": 14515, "teacher_loss": 0.1804029643535614 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.6840387582778931, "learning_rate": 2.111888852144778e-05, "loss": 0.2634, "step": 14516, "teacher_loss": 0.21670308709144592 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.7231588959693909, "learning_rate": 2.111681476613625e-05, "loss": 0.3998, "step": 14517, "teacher_loss": 0.36390289664268494 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.4352991580963135, "learning_rate": 2.1114740870584504e-05, "loss": 0.2345, "step": 14518, "teacher_loss": 0.21220329403877258 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5919502973556519, "learning_rate": 2.111266683484008e-05, "loss": 0.2939, "step": 14519, "teacher_loss": 0.26076579093933105 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.5072752833366394, "learning_rate": 2.1110592658950534e-05, "loss": 0.2557, "step": 14520, "teacher_loss": 0.22772987186908722 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.29583653807640076, "learning_rate": 2.1108518342963412e-05, "loss": 0.2504, "step": 14521, "teacher_loss": 0.24532687664031982 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.4957273006439209, "learning_rate": 2.1106443886926288e-05, "loss": 0.2931, "step": 14522, "teacher_loss": 0.2705777585506439 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.34643977880477905, "learning_rate": 2.110436929088671e-05, "loss": 0.1677, "step": 14523, "teacher_loss": 0.14782464504241943 }, { "compression_loss": 0.0, "epoch": 2.62, "label_loss": 0.28452128171920776, "learning_rate": 2.110229455489224e-05, "loss": 0.2392, "step": 14524, "teacher_loss": 0.23421666026115417 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.7480511665344238, "learning_rate": 2.1100219678990457e-05, "loss": 0.4178, "step": 14525, "teacher_loss": 0.3811163902282715 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.31360793113708496, "learning_rate": 2.1098144663228923e-05, "loss": 0.2742, "step": 14526, "teacher_loss": 0.26982730627059937 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.8070668578147888, "learning_rate": 2.109606950765522e-05, "loss": 0.3356, "step": 14527, "teacher_loss": 0.28317731618881226 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.3485603630542755, "learning_rate": 2.1093994212316918e-05, "loss": 0.2649, "step": 14528, "teacher_loss": 0.2555796802043915 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.4407389760017395, "learning_rate": 2.1091918777261596e-05, "loss": 0.3781, "step": 14529, "teacher_loss": 0.371107816696167 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.3816796839237213, "learning_rate": 2.1089843202536847e-05, "loss": 0.4507, "step": 14530, "teacher_loss": 0.458349347114563 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.4882408678531647, "learning_rate": 2.108776748819025e-05, "loss": 0.4083, "step": 14531, "teacher_loss": 0.3994430899620056 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.3758133053779602, "learning_rate": 2.1085691634269397e-05, "loss": 0.2774, "step": 14532, "teacher_loss": 0.266434907913208 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.7965704202651978, "learning_rate": 2.1083615640821875e-05, "loss": 0.348, "step": 14533, "teacher_loss": 0.29811179637908936 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.3647003769874573, "learning_rate": 2.108153950789529e-05, "loss": 0.2781, "step": 14534, "teacher_loss": 0.2685237526893616 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.8953534960746765, "learning_rate": 2.107946323553724e-05, "loss": 0.3007, "step": 14535, "teacher_loss": 0.23466211557388306 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.26077595353126526, "learning_rate": 2.107738682379532e-05, "loss": 0.1888, "step": 14536, "teacher_loss": 0.18084746599197388 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.9403353333473206, "learning_rate": 2.1075310272717147e-05, "loss": 0.7519, "step": 14537, "teacher_loss": 0.730973482131958 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.32168591022491455, "learning_rate": 2.107323358235032e-05, "loss": 0.2376, "step": 14538, "teacher_loss": 0.2282719612121582 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.5695239901542664, "learning_rate": 2.1071156752742455e-05, "loss": 0.2445, "step": 14539, "teacher_loss": 0.20838084816932678 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.1920047402381897, "learning_rate": 2.106907978394117e-05, "loss": 0.1709, "step": 14540, "teacher_loss": 0.1685151755809784 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.5061135292053223, "learning_rate": 2.1067002675994077e-05, "loss": 0.28, "step": 14541, "teacher_loss": 0.25492262840270996 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.653713047504425, "learning_rate": 2.1064925428948808e-05, "loss": 0.339, "step": 14542, "teacher_loss": 0.30398696660995483 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.44536933302879333, "learning_rate": 2.106284804285298e-05, "loss": 0.3035, "step": 14543, "teacher_loss": 0.2876836955547333 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.6877913475036621, "learning_rate": 2.1060770517754223e-05, "loss": 0.5858, "step": 14544, "teacher_loss": 0.5744898319244385 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.40376752614974976, "learning_rate": 2.1058692853700174e-05, "loss": 0.2394, "step": 14545, "teacher_loss": 0.22113852202892303 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.36561787128448486, "learning_rate": 2.1056615050738458e-05, "loss": 0.2141, "step": 14546, "teacher_loss": 0.19726331532001495 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.27502134442329407, "learning_rate": 2.105453710891672e-05, "loss": 0.2366, "step": 14547, "teacher_loss": 0.23234912753105164 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.31792324781417847, "learning_rate": 2.10524590282826e-05, "loss": 0.2436, "step": 14548, "teacher_loss": 0.23531028628349304 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.35475093126296997, "learning_rate": 2.1050380808883735e-05, "loss": 0.2742, "step": 14549, "teacher_loss": 0.265228271484375 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.38412511348724365, "learning_rate": 2.1048302450767785e-05, "loss": 0.1854, "step": 14550, "teacher_loss": 0.16329364478588104 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.3182380795478821, "learning_rate": 2.1046223953982385e-05, "loss": 0.2889, "step": 14551, "teacher_loss": 0.28558987379074097 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.6259177923202515, "learning_rate": 2.1044145318575204e-05, "loss": 0.5042, "step": 14552, "teacher_loss": 0.490645170211792 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.8444479703903198, "learning_rate": 2.1042066544593894e-05, "loss": 0.5037, "step": 14553, "teacher_loss": 0.4658720791339874 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.5971534252166748, "learning_rate": 2.1039987632086105e-05, "loss": 0.2396, "step": 14554, "teacher_loss": 0.19987118244171143 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.33437925577163696, "learning_rate": 2.1037908581099512e-05, "loss": 0.2993, "step": 14555, "teacher_loss": 0.2954440414905548 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.6378849148750305, "learning_rate": 2.1035829391681783e-05, "loss": 0.2156, "step": 14556, "teacher_loss": 0.16867297887802124 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.5989898443222046, "learning_rate": 2.1033750063880577e-05, "loss": 0.343, "step": 14557, "teacher_loss": 0.31458014249801636 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.27151185274124146, "learning_rate": 2.1031670597743574e-05, "loss": 0.2725, "step": 14558, "teacher_loss": 0.27260616421699524 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.44433945417404175, "learning_rate": 2.1029590993318446e-05, "loss": 0.224, "step": 14559, "teacher_loss": 0.19949209690093994 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.7775967717170715, "learning_rate": 2.1027511250652877e-05, "loss": 0.272, "step": 14560, "teacher_loss": 0.21585991978645325 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.6937455534934998, "learning_rate": 2.1025431369794546e-05, "loss": 0.248, "step": 14561, "teacher_loss": 0.1985187530517578 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.9760698080062866, "learning_rate": 2.1023351350791138e-05, "loss": 0.3725, "step": 14562, "teacher_loss": 0.30542656779289246 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.27515149116516113, "learning_rate": 2.102127119369034e-05, "loss": 0.2038, "step": 14563, "teacher_loss": 0.19581684470176697 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.3918849229812622, "learning_rate": 2.1019190898539845e-05, "loss": 0.2113, "step": 14564, "teacher_loss": 0.19118145108222961 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.2730717360973358, "learning_rate": 2.1017110465387355e-05, "loss": 0.2424, "step": 14565, "teacher_loss": 0.23895448446273804 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.3024517297744751, "learning_rate": 2.1015029894280557e-05, "loss": 0.2226, "step": 14566, "teacher_loss": 0.21374335885047913 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.29027271270751953, "learning_rate": 2.1012949185267164e-05, "loss": 0.1594, "step": 14567, "teacher_loss": 0.14490941166877747 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.3681783080101013, "learning_rate": 2.1010868338394868e-05, "loss": 0.2367, "step": 14568, "teacher_loss": 0.22213119268417358 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.47246965765953064, "learning_rate": 2.1008787353711386e-05, "loss": 0.2155, "step": 14569, "teacher_loss": 0.1869097352027893 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.7064085006713867, "learning_rate": 2.1006706231264426e-05, "loss": 0.4949, "step": 14570, "teacher_loss": 0.47139036655426025 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.1997348666191101, "learning_rate": 2.1004624971101696e-05, "loss": 0.2698, "step": 14571, "teacher_loss": 0.27758127450942993 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.2163429707288742, "learning_rate": 2.1002543573270925e-05, "loss": 0.2067, "step": 14572, "teacher_loss": 0.20557327568531036 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.7282078266143799, "learning_rate": 2.1000462037819824e-05, "loss": 0.336, "step": 14573, "teacher_loss": 0.29239726066589355 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.5752413272857666, "learning_rate": 2.0998380364796112e-05, "loss": 0.277, "step": 14574, "teacher_loss": 0.24383561313152313 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.7550591230392456, "learning_rate": 2.0996298554247534e-05, "loss": 0.4287, "step": 14575, "teacher_loss": 0.39241844415664673 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.7636871337890625, "learning_rate": 2.09942166062218e-05, "loss": 0.3412, "step": 14576, "teacher_loss": 0.2942371666431427 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.20371948182582855, "learning_rate": 2.0992134520766652e-05, "loss": 0.1594, "step": 14577, "teacher_loss": 0.15448534488677979 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.4729740023612976, "learning_rate": 2.0990052297929825e-05, "loss": 0.2799, "step": 14578, "teacher_loss": 0.2584296464920044 }, { "compression_loss": 0.0, "epoch": 2.63, "label_loss": 0.450888454914093, "learning_rate": 2.0987969937759058e-05, "loss": 0.2321, "step": 14579, "teacher_loss": 0.20781373977661133 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.3802034854888916, "learning_rate": 2.0985887440302098e-05, "loss": 0.2746, "step": 14580, "teacher_loss": 0.2629126012325287 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.2989276945590973, "learning_rate": 2.0983804805606678e-05, "loss": 0.1827, "step": 14581, "teacher_loss": 0.16980625689029694 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.5499764084815979, "learning_rate": 2.098172203372056e-05, "loss": 0.2976, "step": 14582, "teacher_loss": 0.26956692337989807 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.5096302032470703, "learning_rate": 2.0979639124691488e-05, "loss": 0.2996, "step": 14583, "teacher_loss": 0.27626073360443115 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.5435378551483154, "learning_rate": 2.0977556078567215e-05, "loss": 0.2684, "step": 14584, "teacher_loss": 0.23783159255981445 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.14881932735443115, "learning_rate": 2.097547289539551e-05, "loss": 0.2124, "step": 14585, "teacher_loss": 0.21947932243347168 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.1609226018190384, "learning_rate": 2.097338957522412e-05, "loss": 0.2814, "step": 14586, "teacher_loss": 0.2948067784309387 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.6556715965270996, "learning_rate": 2.0971306118100818e-05, "loss": 0.2885, "step": 14587, "teacher_loss": 0.2477402687072754 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.38650113344192505, "learning_rate": 2.096922252407337e-05, "loss": 0.169, "step": 14588, "teacher_loss": 0.14481377601623535 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.3359062075614929, "learning_rate": 2.0967138793189548e-05, "loss": 0.2475, "step": 14589, "teacher_loss": 0.23768089711666107 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.19188524782657623, "learning_rate": 2.0965054925497124e-05, "loss": 0.1734, "step": 14590, "teacher_loss": 0.17137299478054047 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.482607901096344, "learning_rate": 2.0962970921043874e-05, "loss": 0.24, "step": 14591, "teacher_loss": 0.2130959928035736 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.6315637826919556, "learning_rate": 2.0960886779877576e-05, "loss": 0.3931, "step": 14592, "teacher_loss": 0.36656075716018677 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.4184904396533966, "learning_rate": 2.095880250204602e-05, "loss": 0.265, "step": 14593, "teacher_loss": 0.24792422354221344 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.21091127395629883, "learning_rate": 2.0956718087596984e-05, "loss": 0.1692, "step": 14594, "teacher_loss": 0.16451403498649597 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.3134652376174927, "learning_rate": 2.0954633536578267e-05, "loss": 0.2231, "step": 14595, "teacher_loss": 0.2130478173494339 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.7868915796279907, "learning_rate": 2.0952548849037648e-05, "loss": 0.3979, "step": 14596, "teacher_loss": 0.3546677827835083 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.7050416469573975, "learning_rate": 2.0950464025022936e-05, "loss": 0.3306, "step": 14597, "teacher_loss": 0.28899216651916504 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.3890405297279358, "learning_rate": 2.0948379064581926e-05, "loss": 0.2586, "step": 14598, "teacher_loss": 0.24412935972213745 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.4515721797943115, "learning_rate": 2.0946293967762414e-05, "loss": 0.1825, "step": 14599, "teacher_loss": 0.15259262919425964 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.49622777104377747, "learning_rate": 2.0944208734612214e-05, "loss": 0.3378, "step": 14600, "teacher_loss": 0.32023149728775024 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.27318406105041504, "learning_rate": 2.0942123365179123e-05, "loss": 0.2716, "step": 14601, "teacher_loss": 0.27146002650260925 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.33356279134750366, "learning_rate": 2.0940037859510963e-05, "loss": 0.2879, "step": 14602, "teacher_loss": 0.2828671336174011 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 1.2546168565750122, "learning_rate": 2.093795221765554e-05, "loss": 0.4395, "step": 14603, "teacher_loss": 0.3489788770675659 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.19852034747600555, "learning_rate": 2.093586643966068e-05, "loss": 0.2417, "step": 14604, "teacher_loss": 0.24644528329372406 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.6874034404754639, "learning_rate": 2.09337805255742e-05, "loss": 0.3272, "step": 14605, "teacher_loss": 0.28713172674179077 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.3663751482963562, "learning_rate": 2.093169447544392e-05, "loss": 0.2996, "step": 14606, "teacher_loss": 0.2922108471393585 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.7669240236282349, "learning_rate": 2.0929608289317668e-05, "loss": 0.4997, "step": 14607, "teacher_loss": 0.4699811339378357 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.5025430917739868, "learning_rate": 2.0927521967243277e-05, "loss": 0.3428, "step": 14608, "teacher_loss": 0.3250804543495178 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.29807913303375244, "learning_rate": 2.092543550926858e-05, "loss": 0.2532, "step": 14609, "teacher_loss": 0.2481934130191803 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.2336660623550415, "learning_rate": 2.0923348915441413e-05, "loss": 0.181, "step": 14610, "teacher_loss": 0.1751883625984192 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.5441685318946838, "learning_rate": 2.0921262185809615e-05, "loss": 0.2548, "step": 14611, "teacher_loss": 0.22260916233062744 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.1980355679988861, "learning_rate": 2.0919175320421023e-05, "loss": 0.2452, "step": 14612, "teacher_loss": 0.2504881024360657 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.15502992272377014, "learning_rate": 2.0917088319323497e-05, "loss": 0.1693, "step": 14613, "teacher_loss": 0.17090265452861786 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.3092743754386902, "learning_rate": 2.0915001182564865e-05, "loss": 0.3307, "step": 14614, "teacher_loss": 0.3331238627433777 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.450814425945282, "learning_rate": 2.0912913910192996e-05, "loss": 0.209, "step": 14615, "teacher_loss": 0.1821650266647339 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.1796136498451233, "learning_rate": 2.091082650225574e-05, "loss": 0.1643, "step": 14616, "teacher_loss": 0.16258826851844788 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.5932972431182861, "learning_rate": 2.090873895880095e-05, "loss": 0.317, "step": 14617, "teacher_loss": 0.28626346588134766 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.6091727018356323, "learning_rate": 2.090665127987649e-05, "loss": 0.3711, "step": 14618, "teacher_loss": 0.3446587324142456 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.640316367149353, "learning_rate": 2.090456346553023e-05, "loss": 0.478, "step": 14619, "teacher_loss": 0.4599878787994385 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.27549922466278076, "learning_rate": 2.0902475515810034e-05, "loss": 0.2438, "step": 14620, "teacher_loss": 0.24026665091514587 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.18280774354934692, "learning_rate": 2.0900387430763767e-05, "loss": 0.2057, "step": 14621, "teacher_loss": 0.20828525722026825 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 1.0501079559326172, "learning_rate": 2.0898299210439305e-05, "loss": 0.4427, "step": 14622, "teacher_loss": 0.37516582012176514 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.46030622720718384, "learning_rate": 2.0896210854884528e-05, "loss": 0.2721, "step": 14623, "teacher_loss": 0.2511984705924988 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.2684404253959656, "learning_rate": 2.0894122364147315e-05, "loss": 0.2386, "step": 14624, "teacher_loss": 0.23532560467720032 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.6694341897964478, "learning_rate": 2.0892033738275553e-05, "loss": 0.2312, "step": 14625, "teacher_loss": 0.18255293369293213 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.4191741347312927, "learning_rate": 2.0889944977317113e-05, "loss": 0.2634, "step": 14626, "teacher_loss": 0.2460833042860031 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 1.0263479948043823, "learning_rate": 2.0887856081319896e-05, "loss": 0.4365, "step": 14627, "teacher_loss": 0.37095707654953003 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.801518976688385, "learning_rate": 2.0885767050331794e-05, "loss": 0.3814, "step": 14628, "teacher_loss": 0.3347224295139313 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.5373536944389343, "learning_rate": 2.0883677884400695e-05, "loss": 0.225, "step": 14629, "teacher_loss": 0.190243199467659 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.563356876373291, "learning_rate": 2.088158858357451e-05, "loss": 0.3466, "step": 14630, "teacher_loss": 0.3224976062774658 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.24259087443351746, "learning_rate": 2.0879499147901127e-05, "loss": 0.2304, "step": 14631, "teacher_loss": 0.22904127836227417 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.4866694211959839, "learning_rate": 2.0877409577428458e-05, "loss": 0.3811, "step": 14632, "teacher_loss": 0.36938101053237915 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.23844614624977112, "learning_rate": 2.087531987220441e-05, "loss": 0.1762, "step": 14633, "teacher_loss": 0.16927990317344666 }, { "compression_loss": 0.0, "epoch": 2.64, "label_loss": 0.39554959535598755, "learning_rate": 2.087323003227689e-05, "loss": 0.2579, "step": 14634, "teacher_loss": 0.24259623885154724 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.35707932710647583, "learning_rate": 2.087114005769382e-05, "loss": 0.2258, "step": 14635, "teacher_loss": 0.21115869283676147 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.7241878509521484, "learning_rate": 2.08690499485031e-05, "loss": 0.3516, "step": 14636, "teacher_loss": 0.31020867824554443 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.10791706293821335, "learning_rate": 2.086695970475267e-05, "loss": 0.2177, "step": 14637, "teacher_loss": 0.22994467616081238 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.5874520540237427, "learning_rate": 2.086486932649044e-05, "loss": 0.3201, "step": 14638, "teacher_loss": 0.29044651985168457 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.4429684281349182, "learning_rate": 2.0862778813764342e-05, "loss": 0.26, "step": 14639, "teacher_loss": 0.2396511733531952 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.9647077322006226, "learning_rate": 2.0860688166622306e-05, "loss": 0.2707, "step": 14640, "teacher_loss": 0.19361506402492523 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 1.0816384553909302, "learning_rate": 2.085859738511226e-05, "loss": 0.7439, "step": 14641, "teacher_loss": 0.7063202857971191 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.5294765830039978, "learning_rate": 2.0856506469282134e-05, "loss": 0.2488, "step": 14642, "teacher_loss": 0.217629075050354 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.5293600559234619, "learning_rate": 2.0854415419179884e-05, "loss": 0.2878, "step": 14643, "teacher_loss": 0.2610046863555908 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.5124907493591309, "learning_rate": 2.0852324234853436e-05, "loss": 0.2466, "step": 14644, "teacher_loss": 0.21706527471542358 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.24402692914009094, "learning_rate": 2.0850232916350735e-05, "loss": 0.1897, "step": 14645, "teacher_loss": 0.1836518943309784 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.23504573106765747, "learning_rate": 2.084814146371974e-05, "loss": 0.2255, "step": 14646, "teacher_loss": 0.2243957817554474 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.5070377588272095, "learning_rate": 2.084604987700839e-05, "loss": 0.2814, "step": 14647, "teacher_loss": 0.256369411945343 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.7381858825683594, "learning_rate": 2.0843958156264647e-05, "loss": 0.3674, "step": 14648, "teacher_loss": 0.32615670561790466 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.2413741797208786, "learning_rate": 2.0841866301536463e-05, "loss": 0.2452, "step": 14649, "teacher_loss": 0.24560299515724182 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.3306029438972473, "learning_rate": 2.08397743128718e-05, "loss": 0.2428, "step": 14650, "teacher_loss": 0.23300690948963165 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.7581703066825867, "learning_rate": 2.0837682190318626e-05, "loss": 0.3801, "step": 14651, "teacher_loss": 0.3380867838859558 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.5163165330886841, "learning_rate": 2.083558993392489e-05, "loss": 0.2548, "step": 14652, "teacher_loss": 0.22577707469463348 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.375863254070282, "learning_rate": 2.083349754373858e-05, "loss": 0.1938, "step": 14653, "teacher_loss": 0.17356109619140625 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.24171626567840576, "learning_rate": 2.0831405019807664e-05, "loss": 0.2003, "step": 14654, "teacher_loss": 0.19567041099071503 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 1.1167583465576172, "learning_rate": 2.082931236218011e-05, "loss": 0.619, "step": 14655, "teacher_loss": 0.5636433362960815 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.7422667145729065, "learning_rate": 2.0827219570903898e-05, "loss": 0.4879, "step": 14656, "teacher_loss": 0.4596354365348816 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.6334754824638367, "learning_rate": 2.0825126646027017e-05, "loss": 0.2474, "step": 14657, "teacher_loss": 0.20449486374855042 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.415048748254776, "learning_rate": 2.0823033587597446e-05, "loss": 0.2228, "step": 14658, "teacher_loss": 0.2014315128326416 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.2275198996067047, "learning_rate": 2.082094039566317e-05, "loss": 0.1891, "step": 14659, "teacher_loss": 0.1848609745502472 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.8425917625427246, "learning_rate": 2.0818847070272188e-05, "loss": 0.3965, "step": 14660, "teacher_loss": 0.34697920083999634 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.22775152325630188, "learning_rate": 2.0816753611472487e-05, "loss": 0.2498, "step": 14661, "teacher_loss": 0.252196341753006 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.28745514154434204, "learning_rate": 2.081466001931206e-05, "loss": 0.2238, "step": 14662, "teacher_loss": 0.21674004197120667 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.5068768262863159, "learning_rate": 2.081256629383892e-05, "loss": 0.2716, "step": 14663, "teacher_loss": 0.24546386301517487 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 1.4280047416687012, "learning_rate": 2.0810472435101052e-05, "loss": 0.3688, "step": 14664, "teacher_loss": 0.25105544924736023 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.2577989101409912, "learning_rate": 2.080837844314648e-05, "loss": 0.1955, "step": 14665, "teacher_loss": 0.188523069024086 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.27874815464019775, "learning_rate": 2.0806284318023203e-05, "loss": 0.1641, "step": 14666, "teacher_loss": 0.15139323472976685 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.6849238872528076, "learning_rate": 2.0804190059779227e-05, "loss": 0.2678, "step": 14667, "teacher_loss": 0.22150607407093048 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.98930823802948, "learning_rate": 2.080209566846259e-05, "loss": 0.3024, "step": 14668, "teacher_loss": 0.2260798215866089 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.5425367951393127, "learning_rate": 2.0800001144121284e-05, "loss": 0.3573, "step": 14669, "teacher_loss": 0.33671891689300537 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.11649313569068909, "learning_rate": 2.0797906486803343e-05, "loss": 0.2412, "step": 14670, "teacher_loss": 0.2551063001155853 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.4284122586250305, "learning_rate": 2.079581169655679e-05, "loss": 0.2422, "step": 14671, "teacher_loss": 0.22154012322425842 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.402354896068573, "learning_rate": 2.079371677342965e-05, "loss": 0.2287, "step": 14672, "teacher_loss": 0.209381103515625 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.24666452407836914, "learning_rate": 2.079162171746996e-05, "loss": 0.1819, "step": 14673, "teacher_loss": 0.1746814250946045 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 1.0770916938781738, "learning_rate": 2.078952652872574e-05, "loss": 0.43, "step": 14674, "teacher_loss": 0.3581126034259796 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.27461695671081543, "learning_rate": 2.0787431207245044e-05, "loss": 0.195, "step": 14675, "teacher_loss": 0.18610987067222595 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.4925176501274109, "learning_rate": 2.0785335753075897e-05, "loss": 0.2064, "step": 14676, "teacher_loss": 0.17457237839698792 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.2951141595840454, "learning_rate": 2.0783240166266344e-05, "loss": 0.2292, "step": 14677, "teacher_loss": 0.22184959053993225 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.761034369468689, "learning_rate": 2.0781144446864436e-05, "loss": 0.2727, "step": 14678, "teacher_loss": 0.21839825809001923 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.3799636960029602, "learning_rate": 2.077904859491822e-05, "loss": 0.2031, "step": 14679, "teacher_loss": 0.18344098329544067 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.5852311849594116, "learning_rate": 2.0776952610475747e-05, "loss": 0.34, "step": 14680, "teacher_loss": 0.3127070963382721 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.23990550637245178, "learning_rate": 2.077485649358506e-05, "loss": 0.2702, "step": 14681, "teacher_loss": 0.27358514070510864 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.3969283699989319, "learning_rate": 2.0772760244294242e-05, "loss": 0.2322, "step": 14682, "teacher_loss": 0.21390017867088318 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.8457655906677246, "learning_rate": 2.0770663862651333e-05, "loss": 0.3171, "step": 14683, "teacher_loss": 0.25832873582839966 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.3686928153038025, "learning_rate": 2.0768567348704402e-05, "loss": 0.3334, "step": 14684, "teacher_loss": 0.32942837476730347 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.6739547252655029, "learning_rate": 2.076647070250152e-05, "loss": 0.2527, "step": 14685, "teacher_loss": 0.20592814683914185 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.26532652974128723, "learning_rate": 2.076437392409075e-05, "loss": 0.2427, "step": 14686, "teacher_loss": 0.2401755303144455 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.6995213031768799, "learning_rate": 2.0762277013520167e-05, "loss": 0.2633, "step": 14687, "teacher_loss": 0.214847132563591 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.4038154184818268, "learning_rate": 2.0760179970837855e-05, "loss": 0.231, "step": 14688, "teacher_loss": 0.21183782815933228 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.47509586811065674, "learning_rate": 2.075808279609188e-05, "loss": 0.2801, "step": 14689, "teacher_loss": 0.25839611887931824 }, { "compression_loss": 0.0, "epoch": 2.65, "label_loss": 0.7341402173042297, "learning_rate": 2.0755985489330332e-05, "loss": 0.3073, "step": 14690, "teacher_loss": 0.2598535120487213 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.4638305604457855, "learning_rate": 2.0753888050601296e-05, "loss": 0.2199, "step": 14691, "teacher_loss": 0.1927635669708252 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.23588450253009796, "learning_rate": 2.0751790479952855e-05, "loss": 0.192, "step": 14692, "teacher_loss": 0.1870940923690796 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.5333236455917358, "learning_rate": 2.0749692777433108e-05, "loss": 0.3326, "step": 14693, "teacher_loss": 0.31025242805480957 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.7882859706878662, "learning_rate": 2.0747594943090137e-05, "loss": 0.6274, "step": 14694, "teacher_loss": 0.6095231175422668 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.37170228362083435, "learning_rate": 2.074549697697205e-05, "loss": 0.33, "step": 14695, "teacher_loss": 0.32532545924186707 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.16137054562568665, "learning_rate": 2.074339887912694e-05, "loss": 0.2005, "step": 14696, "teacher_loss": 0.20480555295944214 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.6075645685195923, "learning_rate": 2.0741300649602917e-05, "loss": 0.258, "step": 14697, "teacher_loss": 0.21915292739868164 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.31483930349349976, "learning_rate": 2.0739202288448087e-05, "loss": 0.2131, "step": 14698, "teacher_loss": 0.20175063610076904 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.31544730067253113, "learning_rate": 2.0737103795710547e-05, "loss": 0.1877, "step": 14699, "teacher_loss": 0.17353597283363342 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.3023248314857483, "learning_rate": 2.0735005171438426e-05, "loss": 0.3303, "step": 14700, "teacher_loss": 0.3333730399608612 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.9221238493919373, "learning_rate": 2.073290641567983e-05, "loss": 0.3116, "step": 14701, "teacher_loss": 0.2437857687473297 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.45813173055648804, "learning_rate": 2.073080752848287e-05, "loss": 0.3086, "step": 14702, "teacher_loss": 0.2920103073120117 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.33743762969970703, "learning_rate": 2.0728708509895683e-05, "loss": 0.2028, "step": 14703, "teacher_loss": 0.18779322504997253 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.3800710141658783, "learning_rate": 2.072660935996638e-05, "loss": 0.3146, "step": 14704, "teacher_loss": 0.3072799742221832 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.4884878993034363, "learning_rate": 2.07245100787431e-05, "loss": 0.1963, "step": 14705, "teacher_loss": 0.16387403011322021 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.4497961401939392, "learning_rate": 2.0722410666273962e-05, "loss": 0.2161, "step": 14706, "teacher_loss": 0.19016006588935852 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.30216801166534424, "learning_rate": 2.07203111226071e-05, "loss": 0.2682, "step": 14707, "teacher_loss": 0.26438117027282715 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.15227991342544556, "learning_rate": 2.071821144779066e-05, "loss": 0.1403, "step": 14708, "teacher_loss": 0.1389472782611847 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.28978532552719116, "learning_rate": 2.071611164187278e-05, "loss": 0.2554, "step": 14709, "teacher_loss": 0.25162842869758606 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.3722158968448639, "learning_rate": 2.071401170490159e-05, "loss": 0.1713, "step": 14710, "teacher_loss": 0.1490097939968109 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.30517736077308655, "learning_rate": 2.0711911636925246e-05, "loss": 0.1626, "step": 14711, "teacher_loss": 0.14673930406570435 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.42546653747558594, "learning_rate": 2.0709811437991894e-05, "loss": 0.236, "step": 14712, "teacher_loss": 0.21493341028690338 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.3332485258579254, "learning_rate": 2.0707711108149683e-05, "loss": 0.2306, "step": 14713, "teacher_loss": 0.21918663382530212 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.341885507106781, "learning_rate": 2.070561064744677e-05, "loss": 0.1602, "step": 14714, "teacher_loss": 0.14002607762813568 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.5701409578323364, "learning_rate": 2.070351005593131e-05, "loss": 0.4271, "step": 14715, "teacher_loss": 0.41115817427635193 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.6645904779434204, "learning_rate": 2.0701409333651468e-05, "loss": 0.3465, "step": 14716, "teacher_loss": 0.3112054765224457 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.8569662570953369, "learning_rate": 2.0699308480655397e-05, "loss": 0.3761, "step": 14717, "teacher_loss": 0.3227236866950989 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.49947136640548706, "learning_rate": 2.0697207496991277e-05, "loss": 0.2813, "step": 14718, "teacher_loss": 0.25710728764533997 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.5880289673805237, "learning_rate": 2.0695106382707267e-05, "loss": 0.2577, "step": 14719, "teacher_loss": 0.22094978392124176 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.18167561292648315, "learning_rate": 2.0693005137851543e-05, "loss": 0.1722, "step": 14720, "teacher_loss": 0.1711582988500595 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.38657066226005554, "learning_rate": 2.069090376247228e-05, "loss": 0.2095, "step": 14721, "teacher_loss": 0.18982061743736267 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.5976470708847046, "learning_rate": 2.068880225661765e-05, "loss": 0.4819, "step": 14722, "teacher_loss": 0.469077467918396 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.5527717471122742, "learning_rate": 2.0686700620335854e-05, "loss": 0.2502, "step": 14723, "teacher_loss": 0.21663561463356018 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.15773695707321167, "learning_rate": 2.068459885367505e-05, "loss": 0.1825, "step": 14724, "teacher_loss": 0.18526363372802734 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.5297321081161499, "learning_rate": 2.0682496956683442e-05, "loss": 0.1848, "step": 14725, "teacher_loss": 0.146424800157547 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.602410078048706, "learning_rate": 2.0680394929409215e-05, "loss": 0.288, "step": 14726, "teacher_loss": 0.25306910276412964 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.44356757402420044, "learning_rate": 2.0678292771900565e-05, "loss": 0.2606, "step": 14727, "teacher_loss": 0.24022988975048065 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.6300420761108398, "learning_rate": 2.0676190484205686e-05, "loss": 0.2512, "step": 14728, "teacher_loss": 0.20909321308135986 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.5623605251312256, "learning_rate": 2.0674088066372773e-05, "loss": 0.2513, "step": 14729, "teacher_loss": 0.21677166223526 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.32679712772369385, "learning_rate": 2.0671985518450036e-05, "loss": 0.317, "step": 14730, "teacher_loss": 0.31596440076828003 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.5877115726470947, "learning_rate": 2.0669882840485678e-05, "loss": 0.4483, "step": 14731, "teacher_loss": 0.4328177273273468 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.406552255153656, "learning_rate": 2.06677800325279e-05, "loss": 0.2916, "step": 14732, "teacher_loss": 0.2788045406341553 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.7280393838882446, "learning_rate": 2.0665677094624925e-05, "loss": 0.3261, "step": 14733, "teacher_loss": 0.2814173102378845 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.22949644923210144, "learning_rate": 2.0663574026824956e-05, "loss": 0.2826, "step": 14734, "teacher_loss": 0.28846633434295654 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.22902411222457886, "learning_rate": 2.0661470829176215e-05, "loss": 0.1874, "step": 14735, "teacher_loss": 0.18280129134655 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.40011727809906006, "learning_rate": 2.0659367501726926e-05, "loss": 0.2172, "step": 14736, "teacher_loss": 0.19682228565216064 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.3148219883441925, "learning_rate": 2.0657264044525304e-05, "loss": 0.26, "step": 14737, "teacher_loss": 0.2539372742176056 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.3955051004886627, "learning_rate": 2.065516045761958e-05, "loss": 0.1598, "step": 14738, "teacher_loss": 0.13363346457481384 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 1.0723299980163574, "learning_rate": 2.0653056741057986e-05, "loss": 0.6933, "step": 14739, "teacher_loss": 0.6512176990509033 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.45492395758628845, "learning_rate": 2.065095289488874e-05, "loss": 0.2737, "step": 14740, "teacher_loss": 0.2535954713821411 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.4235838055610657, "learning_rate": 2.0648848919160096e-05, "loss": 0.1998, "step": 14741, "teacher_loss": 0.1749129444360733 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.36798763275146484, "learning_rate": 2.0646744813920278e-05, "loss": 0.2218, "step": 14742, "teacher_loss": 0.20555952191352844 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.40499061346054077, "learning_rate": 2.0644640579217536e-05, "loss": 0.2394, "step": 14743, "teacher_loss": 0.2209864854812622 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.5199931263923645, "learning_rate": 2.06425362151001e-05, "loss": 0.3907, "step": 14744, "teacher_loss": 0.376347154378891 }, { "compression_loss": 0.0, "epoch": 2.66, "label_loss": 0.268401175737381, "learning_rate": 2.0640431721616233e-05, "loss": 0.1944, "step": 14745, "teacher_loss": 0.1861613541841507 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.30936434864997864, "learning_rate": 2.0638327098814175e-05, "loss": 0.2118, "step": 14746, "teacher_loss": 0.2009427547454834 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 1.0602633953094482, "learning_rate": 2.0636222346742184e-05, "loss": 0.3706, "step": 14747, "teacher_loss": 0.29398536682128906 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.8793731927871704, "learning_rate": 2.0634117465448507e-05, "loss": 0.3002, "step": 14748, "teacher_loss": 0.23583374917507172 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.2738930583000183, "learning_rate": 2.063201245498141e-05, "loss": 0.2426, "step": 14749, "teacher_loss": 0.23912805318832397 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 1.2484601736068726, "learning_rate": 2.0629907315389157e-05, "loss": 0.336, "step": 14750, "teacher_loss": 0.23462095856666565 }, { "epoch": 2.67, "eval_exact_match": 79.59318826868495, "eval_f1": 87.144408385029, "step": 14750 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.6365036964416504, "learning_rate": 2.0627802046720008e-05, "loss": 0.2622, "step": 14751, "teacher_loss": 0.2206268608570099 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.5054407119750977, "learning_rate": 2.0625696649022225e-05, "loss": 0.2488, "step": 14752, "teacher_loss": 0.22033405303955078 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.4557850956916809, "learning_rate": 2.0623591122344093e-05, "loss": 0.2012, "step": 14753, "teacher_loss": 0.17294421792030334 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.6534373760223389, "learning_rate": 2.0621485466733875e-05, "loss": 0.3412, "step": 14754, "teacher_loss": 0.3064731955528259 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.10340330749750137, "learning_rate": 2.0619379682239845e-05, "loss": 0.2099, "step": 14755, "teacher_loss": 0.22174470126628876 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.1720178872346878, "learning_rate": 2.0617273768910288e-05, "loss": 0.1669, "step": 14756, "teacher_loss": 0.1663341224193573 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.7388808727264404, "learning_rate": 2.0615167726793485e-05, "loss": 0.2578, "step": 14757, "teacher_loss": 0.20435532927513123 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.18003569543361664, "learning_rate": 2.061306155593773e-05, "loss": 0.1983, "step": 14758, "teacher_loss": 0.2003352791070938 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.42469677329063416, "learning_rate": 2.061095525639129e-05, "loss": 0.1962, "step": 14759, "teacher_loss": 0.17084118723869324 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.2845797836780548, "learning_rate": 2.060884882820247e-05, "loss": 0.2396, "step": 14760, "teacher_loss": 0.234589621424675 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.28279832005500793, "learning_rate": 2.060674227141957e-05, "loss": 0.2059, "step": 14761, "teacher_loss": 0.19737903773784637 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.6885349154472351, "learning_rate": 2.0604635586090873e-05, "loss": 0.3231, "step": 14762, "teacher_loss": 0.28254565596580505 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.6126066446304321, "learning_rate": 2.060252877226469e-05, "loss": 0.3972, "step": 14763, "teacher_loss": 0.3732442855834961 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.8504101037979126, "learning_rate": 2.0600421829989314e-05, "loss": 0.3734, "step": 14764, "teacher_loss": 0.3203462064266205 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.5746818780899048, "learning_rate": 2.059831475931306e-05, "loss": 0.3413, "step": 14765, "teacher_loss": 0.3153492212295532 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.5643368363380432, "learning_rate": 2.0596207560284238e-05, "loss": 0.2862, "step": 14766, "teacher_loss": 0.25526243448257446 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.5080165863037109, "learning_rate": 2.0594100232951147e-05, "loss": 0.2295, "step": 14767, "teacher_loss": 0.19854536652565002 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.3373258709907532, "learning_rate": 2.0591992777362112e-05, "loss": 0.2633, "step": 14768, "teacher_loss": 0.25505757331848145 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.087464340031147, "learning_rate": 2.058988519356545e-05, "loss": 0.149, "step": 14769, "teacher_loss": 0.15580469369888306 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.2055964320898056, "learning_rate": 2.0587777481609476e-05, "loss": 0.1789, "step": 14770, "teacher_loss": 0.1759772151708603 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.38859981298446655, "learning_rate": 2.058566964154252e-05, "loss": 0.2961, "step": 14771, "teacher_loss": 0.2858433127403259 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.23692506551742554, "learning_rate": 2.0583561673412908e-05, "loss": 0.312, "step": 14772, "teacher_loss": 0.3203795552253723 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.568581223487854, "learning_rate": 2.0581453577268967e-05, "loss": 0.3331, "step": 14773, "teacher_loss": 0.3069083094596863 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.6714084148406982, "learning_rate": 2.057934535315902e-05, "loss": 0.3505, "step": 14774, "teacher_loss": 0.31489598751068115 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.16717246174812317, "learning_rate": 2.0577237001131424e-05, "loss": 0.2964, "step": 14775, "teacher_loss": 0.310787558555603 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.4580898880958557, "learning_rate": 2.05751285212345e-05, "loss": 0.2646, "step": 14776, "teacher_loss": 0.24304884672164917 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.451576828956604, "learning_rate": 2.0573019913516597e-05, "loss": 0.2937, "step": 14777, "teacher_loss": 0.2761993110179901 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.16200639307498932, "learning_rate": 2.0570911178026054e-05, "loss": 0.133, "step": 14778, "teacher_loss": 0.1297340989112854 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.3857666254043579, "learning_rate": 2.0568802314811214e-05, "loss": 0.2469, "step": 14779, "teacher_loss": 0.23148225247859955 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.4879947900772095, "learning_rate": 2.0566693323920444e-05, "loss": 0.2986, "step": 14780, "teacher_loss": 0.2775239944458008 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.8031771183013916, "learning_rate": 2.0564584205402077e-05, "loss": 0.5503, "step": 14781, "teacher_loss": 0.5221806764602661 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.2689746618270874, "learning_rate": 2.056247495930448e-05, "loss": 0.1917, "step": 14782, "teacher_loss": 0.1830991506576538 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.6977099180221558, "learning_rate": 2.0560365585676017e-05, "loss": 0.3911, "step": 14783, "teacher_loss": 0.35707300901412964 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.5370658040046692, "learning_rate": 2.055825608456503e-05, "loss": 0.274, "step": 14784, "teacher_loss": 0.24477878212928772 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.3308143615722656, "learning_rate": 2.05561464560199e-05, "loss": 0.2497, "step": 14785, "teacher_loss": 0.2407037913799286 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.5128515958786011, "learning_rate": 2.0554036700088996e-05, "loss": 0.2721, "step": 14786, "teacher_loss": 0.24531877040863037 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.6130527257919312, "learning_rate": 2.0551926816820675e-05, "loss": 0.3641, "step": 14787, "teacher_loss": 0.3364737629890442 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.2753695547580719, "learning_rate": 2.0549816806263322e-05, "loss": 0.2395, "step": 14788, "teacher_loss": 0.23546728491783142 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.2698274254798889, "learning_rate": 2.0547706668465306e-05, "loss": 0.2708, "step": 14789, "teacher_loss": 0.27089452743530273 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.5168047547340393, "learning_rate": 2.0545596403475008e-05, "loss": 0.4558, "step": 14790, "teacher_loss": 0.4490697979927063 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.5163775682449341, "learning_rate": 2.0543486011340815e-05, "loss": 0.33, "step": 14791, "teacher_loss": 0.30933457612991333 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.34799379110336304, "learning_rate": 2.0541375492111107e-05, "loss": 0.2287, "step": 14792, "teacher_loss": 0.21542897820472717 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.3095616102218628, "learning_rate": 2.053926484583427e-05, "loss": 0.2214, "step": 14793, "teacher_loss": 0.21160462498664856 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.7041926980018616, "learning_rate": 2.05371540725587e-05, "loss": 0.322, "step": 14794, "teacher_loss": 0.27954310178756714 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.294477641582489, "learning_rate": 2.0535043172332787e-05, "loss": 0.3611, "step": 14795, "teacher_loss": 0.3685116171836853 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.1934657096862793, "learning_rate": 2.0532932145204932e-05, "loss": 0.2109, "step": 14796, "teacher_loss": 0.21281927824020386 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.2769962549209595, "learning_rate": 2.053082099122353e-05, "loss": 0.37, "step": 14797, "teacher_loss": 0.38036006689071655 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.669194757938385, "learning_rate": 2.0528709710436982e-05, "loss": 0.2887, "step": 14798, "teacher_loss": 0.2464020550251007 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.8606622219085693, "learning_rate": 2.05265983028937e-05, "loss": 0.318, "step": 14799, "teacher_loss": 0.2577003240585327 }, { "compression_loss": 0.0, "epoch": 2.67, "label_loss": 0.5190480947494507, "learning_rate": 2.0524486768642086e-05, "loss": 0.2916, "step": 14800, "teacher_loss": 0.2663763761520386 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.34998780488967896, "learning_rate": 2.0522375107730556e-05, "loss": 0.2638, "step": 14801, "teacher_loss": 0.2541815936565399 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.4187970757484436, "learning_rate": 2.052026332020752e-05, "loss": 0.2141, "step": 14802, "teacher_loss": 0.19130240380764008 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.845827043056488, "learning_rate": 2.05181514061214e-05, "loss": 0.2638, "step": 14803, "teacher_loss": 0.1991061568260193 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.3965373635292053, "learning_rate": 2.0516039365520607e-05, "loss": 0.2277, "step": 14804, "teacher_loss": 0.20899033546447754 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.32548409700393677, "learning_rate": 2.0513927198453573e-05, "loss": 0.2778, "step": 14805, "teacher_loss": 0.27255067229270935 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.4849543571472168, "learning_rate": 2.0511814904968717e-05, "loss": 0.1671, "step": 14806, "teacher_loss": 0.13182950019836426 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.4429827332496643, "learning_rate": 2.050970248511447e-05, "loss": 0.2927, "step": 14807, "teacher_loss": 0.27595388889312744 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.40011781454086304, "learning_rate": 2.050758993893927e-05, "loss": 0.2157, "step": 14808, "teacher_loss": 0.1952388882637024 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.5186194181442261, "learning_rate": 2.050547726649154e-05, "loss": 0.3738, "step": 14809, "teacher_loss": 0.35766738653182983 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.569491446018219, "learning_rate": 2.0503364467819725e-05, "loss": 0.2493, "step": 14810, "teacher_loss": 0.21372094750404358 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.33216992020606995, "learning_rate": 2.0501251542972262e-05, "loss": 0.2538, "step": 14811, "teacher_loss": 0.24510881304740906 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.4135696589946747, "learning_rate": 2.0499138491997592e-05, "loss": 0.2934, "step": 14812, "teacher_loss": 0.28000539541244507 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.42928725481033325, "learning_rate": 2.049702531494417e-05, "loss": 0.258, "step": 14813, "teacher_loss": 0.2389732450246811 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.5191138982772827, "learning_rate": 2.0494912011860435e-05, "loss": 0.258, "step": 14814, "teacher_loss": 0.22902607917785645 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.31187760829925537, "learning_rate": 2.049279858279484e-05, "loss": 0.2877, "step": 14815, "teacher_loss": 0.28500157594680786 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.23061445355415344, "learning_rate": 2.0490685027795843e-05, "loss": 0.254, "step": 14816, "teacher_loss": 0.2566325068473816 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.3891162872314453, "learning_rate": 2.04885713469119e-05, "loss": 0.2109, "step": 14817, "teacher_loss": 0.1911163032054901 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.24955663084983826, "learning_rate": 2.0486457540191474e-05, "loss": 0.1846, "step": 14818, "teacher_loss": 0.17732734978199005 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.2184136062860489, "learning_rate": 2.0484343607683026e-05, "loss": 0.209, "step": 14819, "teacher_loss": 0.20798182487487793 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.4304956793785095, "learning_rate": 2.0482229549435017e-05, "loss": 0.3369, "step": 14820, "teacher_loss": 0.3264923691749573 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.6395919322967529, "learning_rate": 2.0480115365495928e-05, "loss": 0.4184, "step": 14821, "teacher_loss": 0.39378637075424194 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.44393083453178406, "learning_rate": 2.047800105591422e-05, "loss": 0.2814, "step": 14822, "teacher_loss": 0.26332634687423706 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.1826765239238739, "learning_rate": 2.047588662073837e-05, "loss": 0.2174, "step": 14823, "teacher_loss": 0.22122052311897278 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.18634039163589478, "learning_rate": 2.0473772060016862e-05, "loss": 0.2608, "step": 14824, "teacher_loss": 0.2690912187099457 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.4635426104068756, "learning_rate": 2.047165737379817e-05, "loss": 0.2333, "step": 14825, "teacher_loss": 0.2077116072177887 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.5371942520141602, "learning_rate": 2.0469542562130775e-05, "loss": 0.5128, "step": 14826, "teacher_loss": 0.5100386142730713 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.2768608629703522, "learning_rate": 2.046742762506317e-05, "loss": 0.2259, "step": 14827, "teacher_loss": 0.22021490335464478 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.24004840850830078, "learning_rate": 2.0465312562643846e-05, "loss": 0.2119, "step": 14828, "teacher_loss": 0.20877695083618164 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.8347723484039307, "learning_rate": 2.0463197374921288e-05, "loss": 0.5108, "step": 14829, "teacher_loss": 0.4747787117958069 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.3548860549926758, "learning_rate": 2.046108206194399e-05, "loss": 0.2471, "step": 14830, "teacher_loss": 0.23514020442962646 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.4827243685722351, "learning_rate": 2.045896662376046e-05, "loss": 0.2524, "step": 14831, "teacher_loss": 0.22675958275794983 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.7522687911987305, "learning_rate": 2.045685106041919e-05, "loss": 0.3689, "step": 14832, "teacher_loss": 0.32631945610046387 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.6438785195350647, "learning_rate": 2.0454735371968683e-05, "loss": 0.2849, "step": 14833, "teacher_loss": 0.24505344033241272 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.8225363492965698, "learning_rate": 2.0452619558457448e-05, "loss": 0.2528, "step": 14834, "teacher_loss": 0.18947236239910126 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.2629261612892151, "learning_rate": 2.0450503619933997e-05, "loss": 0.2314, "step": 14835, "teacher_loss": 0.22791366279125214 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.25196897983551025, "learning_rate": 2.044838755644684e-05, "loss": 0.2562, "step": 14836, "teacher_loss": 0.25667887926101685 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.14289674162864685, "learning_rate": 2.0446271368044484e-05, "loss": 0.1845, "step": 14837, "teacher_loss": 0.1891317069530487 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.12363754212856293, "learning_rate": 2.0444155054775463e-05, "loss": 0.1938, "step": 14838, "teacher_loss": 0.2016124129295349 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.3097110092639923, "learning_rate": 2.044203861668829e-05, "loss": 0.3324, "step": 14839, "teacher_loss": 0.33490562438964844 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.524707555770874, "learning_rate": 2.0439922053831482e-05, "loss": 0.3185, "step": 14840, "teacher_loss": 0.2956264019012451 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.6648913621902466, "learning_rate": 2.0437805366253573e-05, "loss": 0.2934, "step": 14841, "teacher_loss": 0.25209522247314453 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 1.0346568822860718, "learning_rate": 2.043568855400309e-05, "loss": 0.3442, "step": 14842, "teacher_loss": 0.26744967699050903 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.5004614591598511, "learning_rate": 2.0433571617128565e-05, "loss": 0.2421, "step": 14843, "teacher_loss": 0.21341001987457275 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.22472357749938965, "learning_rate": 2.0431454555678536e-05, "loss": 0.2311, "step": 14844, "teacher_loss": 0.23185688257217407 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.4382818341255188, "learning_rate": 2.0429337369701535e-05, "loss": 0.3341, "step": 14845, "teacher_loss": 0.32250866293907166 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.32862529158592224, "learning_rate": 2.042722005924611e-05, "loss": 0.1858, "step": 14846, "teacher_loss": 0.16997158527374268 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.43893536925315857, "learning_rate": 2.0425102624360797e-05, "loss": 0.3055, "step": 14847, "teacher_loss": 0.29072582721710205 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.5672550797462463, "learning_rate": 2.0422985065094146e-05, "loss": 0.2316, "step": 14848, "teacher_loss": 0.19433942437171936 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.27996551990509033, "learning_rate": 2.042086738149471e-05, "loss": 0.1841, "step": 14849, "teacher_loss": 0.17339572310447693 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.9047365188598633, "learning_rate": 2.0418749573611033e-05, "loss": 0.4897, "step": 14850, "teacher_loss": 0.44353121519088745 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.533079981803894, "learning_rate": 2.041663164149168e-05, "loss": 0.2438, "step": 14851, "teacher_loss": 0.21162816882133484 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 1.0755133628845215, "learning_rate": 2.04145135851852e-05, "loss": 0.3964, "step": 14852, "teacher_loss": 0.3209264278411865 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.7049232721328735, "learning_rate": 2.0412395404740156e-05, "loss": 0.2866, "step": 14853, "teacher_loss": 0.24007275700569153 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.5834645628929138, "learning_rate": 2.0410277100205116e-05, "loss": 0.3281, "step": 14854, "teacher_loss": 0.2997483015060425 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.6641458868980408, "learning_rate": 2.040815867162864e-05, "loss": 0.4216, "step": 14855, "teacher_loss": 0.3946259319782257 }, { "compression_loss": 0.0, "epoch": 2.68, "label_loss": 0.32788607478141785, "learning_rate": 2.04060401190593e-05, "loss": 0.2071, "step": 14856, "teacher_loss": 0.1936974823474884 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.2416599839925766, "learning_rate": 2.040392144254567e-05, "loss": 0.2604, "step": 14857, "teacher_loss": 0.26249971985816956 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5173584222793579, "learning_rate": 2.040180264213632e-05, "loss": 0.3391, "step": 14858, "teacher_loss": 0.3193283975124359 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.3677915930747986, "learning_rate": 2.0399683717879837e-05, "loss": 0.2568, "step": 14859, "teacher_loss": 0.24450330436229706 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.22522315382957458, "learning_rate": 2.0397564669824792e-05, "loss": 0.2287, "step": 14860, "teacher_loss": 0.22910797595977783 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5012238025665283, "learning_rate": 2.039544549801977e-05, "loss": 0.2218, "step": 14861, "teacher_loss": 0.19079944491386414 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.3694916367530823, "learning_rate": 2.039332620251336e-05, "loss": 0.2039, "step": 14862, "teacher_loss": 0.18554240465164185 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.22652840614318848, "learning_rate": 2.0391206783354154e-05, "loss": 0.2142, "step": 14863, "teacher_loss": 0.21282660961151123 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.6911014318466187, "learning_rate": 2.0389087240590736e-05, "loss": 0.3094, "step": 14864, "teacher_loss": 0.26693761348724365 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.2800799310207367, "learning_rate": 2.0386967574271705e-05, "loss": 0.2336, "step": 14865, "teacher_loss": 0.2284429520368576 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.37927111983299255, "learning_rate": 2.038484778444566e-05, "loss": 0.2709, "step": 14866, "teacher_loss": 0.25888872146606445 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5265012383460999, "learning_rate": 2.0382727871161197e-05, "loss": 0.4306, "step": 14867, "teacher_loss": 0.4199068546295166 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.6282833814620972, "learning_rate": 2.0380607834466927e-05, "loss": 0.2349, "step": 14868, "teacher_loss": 0.19116206467151642 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.509934663772583, "learning_rate": 2.0378487674411447e-05, "loss": 0.2903, "step": 14869, "teacher_loss": 0.265918493270874 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.6388763189315796, "learning_rate": 2.0376367391043375e-05, "loss": 0.2254, "step": 14870, "teacher_loss": 0.17950831353664398 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 1.0789724588394165, "learning_rate": 2.0374246984411315e-05, "loss": 0.4355, "step": 14871, "teacher_loss": 0.36395561695098877 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5592183470726013, "learning_rate": 2.0372126454563882e-05, "loss": 0.2987, "step": 14872, "teacher_loss": 0.2697659134864807 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.35236307978630066, "learning_rate": 2.03700058015497e-05, "loss": 0.2188, "step": 14873, "teacher_loss": 0.20399588346481323 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.31600648164749146, "learning_rate": 2.036788502541738e-05, "loss": 0.269, "step": 14874, "teacher_loss": 0.2637813687324524 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.48180603981018066, "learning_rate": 2.0365764126215555e-05, "loss": 0.219, "step": 14875, "teacher_loss": 0.18974421918392181 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5358086824417114, "learning_rate": 2.0363643103992847e-05, "loss": 0.3445, "step": 14876, "teacher_loss": 0.32321232557296753 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5469584465026855, "learning_rate": 2.036152195879788e-05, "loss": 0.3533, "step": 14877, "teacher_loss": 0.33181384205818176 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.42526283860206604, "learning_rate": 2.0359400690679288e-05, "loss": 0.2364, "step": 14878, "teacher_loss": 0.21537292003631592 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5540854930877686, "learning_rate": 2.035727929968571e-05, "loss": 0.2367, "step": 14879, "teacher_loss": 0.2014390528202057 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5503325462341309, "learning_rate": 2.0355157785865778e-05, "loss": 0.2183, "step": 14880, "teacher_loss": 0.1814216673374176 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.504286527633667, "learning_rate": 2.0353036149268135e-05, "loss": 0.2499, "step": 14881, "teacher_loss": 0.22167330980300903 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.8380905389785767, "learning_rate": 2.0350914389941423e-05, "loss": 0.3318, "step": 14882, "teacher_loss": 0.2755822241306305 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.4582725763320923, "learning_rate": 2.0348792507934283e-05, "loss": 0.2939, "step": 14883, "teacher_loss": 0.2755844295024872 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.6674776077270508, "learning_rate": 2.034667050329537e-05, "loss": 0.2184, "step": 14884, "teacher_loss": 0.16845136880874634 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.27727022767066956, "learning_rate": 2.0344548376073328e-05, "loss": 0.241, "step": 14885, "teacher_loss": 0.23698115348815918 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.2868749797344208, "learning_rate": 2.0342426126316825e-05, "loss": 0.2382, "step": 14886, "teacher_loss": 0.23276180028915405 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.522817850112915, "learning_rate": 2.03403037540745e-05, "loss": 0.2846, "step": 14887, "teacher_loss": 0.2581116557121277 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5375682711601257, "learning_rate": 2.0338181259395023e-05, "loss": 0.2082, "step": 14888, "teacher_loss": 0.17165547609329224 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.6137347221374512, "learning_rate": 2.0336058642327058e-05, "loss": 0.3141, "step": 14889, "teacher_loss": 0.28084874153137207 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5696321725845337, "learning_rate": 2.0333935902919264e-05, "loss": 0.3638, "step": 14890, "teacher_loss": 0.340931236743927 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.2673691213130951, "learning_rate": 2.0331813041220312e-05, "loss": 0.2372, "step": 14891, "teacher_loss": 0.2338741570711136 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.4888160228729248, "learning_rate": 2.032969005727887e-05, "loss": 0.2351, "step": 14892, "teacher_loss": 0.20687642693519592 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.6059926748275757, "learning_rate": 2.0327566951143615e-05, "loss": 0.3272, "step": 14893, "teacher_loss": 0.2962338328361511 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.24283966422080994, "learning_rate": 2.0325443722863228e-05, "loss": 0.2503, "step": 14894, "teacher_loss": 0.25113609433174133 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.27584022283554077, "learning_rate": 2.0323320372486378e-05, "loss": 0.223, "step": 14895, "teacher_loss": 0.21708548069000244 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.17030948400497437, "learning_rate": 2.0321196900061753e-05, "loss": 0.1562, "step": 14896, "teacher_loss": 0.15462495386600494 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.6236451864242554, "learning_rate": 2.0319073305638035e-05, "loss": 0.2856, "step": 14897, "teacher_loss": 0.24798667430877686 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.3590516448020935, "learning_rate": 2.0316949589263917e-05, "loss": 0.2264, "step": 14898, "teacher_loss": 0.21161547303199768 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.2167343944311142, "learning_rate": 2.031482575098808e-05, "loss": 0.2339, "step": 14899, "teacher_loss": 0.23578590154647827 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.43944716453552246, "learning_rate": 2.031270179085923e-05, "loss": 0.2189, "step": 14900, "teacher_loss": 0.19440308213233948 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.23635739088058472, "learning_rate": 2.0310577708926057e-05, "loss": 0.2483, "step": 14901, "teacher_loss": 0.24957653880119324 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.4435775876045227, "learning_rate": 2.0308453505237252e-05, "loss": 0.2123, "step": 14902, "teacher_loss": 0.18658530712127686 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.6203392148017883, "learning_rate": 2.030632917984153e-05, "loss": 0.3012, "step": 14903, "teacher_loss": 0.26576539874076843 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.38520002365112305, "learning_rate": 2.0304204732787585e-05, "loss": 0.3173, "step": 14904, "teacher_loss": 0.3097492456436157 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.39204150438308716, "learning_rate": 2.0302080164124128e-05, "loss": 0.1985, "step": 14905, "teacher_loss": 0.17703217267990112 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.385428249835968, "learning_rate": 2.0299955473899876e-05, "loss": 0.2028, "step": 14906, "teacher_loss": 0.18245339393615723 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.3512749671936035, "learning_rate": 2.029783066216353e-05, "loss": 0.296, "step": 14907, "teacher_loss": 0.28990548849105835 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.21630430221557617, "learning_rate": 2.0295705728963808e-05, "loss": 0.2158, "step": 14908, "teacher_loss": 0.2157471776008606 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.5027220249176025, "learning_rate": 2.029358067434944e-05, "loss": 0.2659, "step": 14909, "teacher_loss": 0.2395501434803009 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.179561585187912, "learning_rate": 2.0291455498369128e-05, "loss": 0.2769, "step": 14910, "teacher_loss": 0.28771156072616577 }, { "compression_loss": 0.0, "epoch": 2.69, "label_loss": 0.18068565428256989, "learning_rate": 2.028933020107161e-05, "loss": 0.1871, "step": 14911, "teacher_loss": 0.18781127035617828 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4077897071838379, "learning_rate": 2.028720478250561e-05, "loss": 0.2073, "step": 14912, "teacher_loss": 0.18507859110832214 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.46194833517074585, "learning_rate": 2.028507924271985e-05, "loss": 0.2616, "step": 14913, "teacher_loss": 0.23930887877941132 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4916226863861084, "learning_rate": 2.028295358176308e-05, "loss": 0.2695, "step": 14914, "teacher_loss": 0.24482116103172302 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.36132073402404785, "learning_rate": 2.0280827799684013e-05, "loss": 0.2553, "step": 14915, "teacher_loss": 0.2434784471988678 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.40071791410446167, "learning_rate": 2.0278701896531404e-05, "loss": 0.2575, "step": 14916, "teacher_loss": 0.2416183203458786 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.5685251355171204, "learning_rate": 2.027657587235398e-05, "loss": 0.2723, "step": 14917, "teacher_loss": 0.23935860395431519 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.2976388931274414, "learning_rate": 2.0274449727200497e-05, "loss": 0.209, "step": 14918, "teacher_loss": 0.19914615154266357 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4672843813896179, "learning_rate": 2.0272323461119694e-05, "loss": 0.2154, "step": 14919, "teacher_loss": 0.1874154955148697 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.5073993802070618, "learning_rate": 2.0270197074160323e-05, "loss": 0.4834, "step": 14920, "teacher_loss": 0.48074042797088623 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 1.1281288862228394, "learning_rate": 2.0268070566371133e-05, "loss": 0.6246, "step": 14921, "teacher_loss": 0.5686434507369995 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4203755855560303, "learning_rate": 2.0265943937800875e-05, "loss": 0.2661, "step": 14922, "teacher_loss": 0.24896439909934998 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.5933854579925537, "learning_rate": 2.0263817188498313e-05, "loss": 0.2773, "step": 14923, "teacher_loss": 0.24218492209911346 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4876479506492615, "learning_rate": 2.0261690318512207e-05, "loss": 0.3052, "step": 14924, "teacher_loss": 0.28497496247291565 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.677702009677887, "learning_rate": 2.025956332789132e-05, "loss": 0.2422, "step": 14925, "teacher_loss": 0.19386549293994904 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.930736780166626, "learning_rate": 2.025743621668441e-05, "loss": 0.3899, "step": 14926, "teacher_loss": 0.3298322558403015 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.6612527966499329, "learning_rate": 2.025530898494025e-05, "loss": 0.3012, "step": 14927, "teacher_loss": 0.2612287998199463 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.8193802237510681, "learning_rate": 2.025318163270761e-05, "loss": 0.5153, "step": 14928, "teacher_loss": 0.48155510425567627 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4310070872306824, "learning_rate": 2.025105416003527e-05, "loss": 0.2806, "step": 14929, "teacher_loss": 0.26393601298332214 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.25112998485565186, "learning_rate": 2.0248926566971996e-05, "loss": 0.2119, "step": 14930, "teacher_loss": 0.2074863612651825 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.6088855266571045, "learning_rate": 2.024679885356658e-05, "loss": 0.2971, "step": 14931, "teacher_loss": 0.2624693512916565 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.6737299561500549, "learning_rate": 2.0244671019867788e-05, "loss": 0.2164, "step": 14932, "teacher_loss": 0.1656334400177002 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.22340169548988342, "learning_rate": 2.024254306592442e-05, "loss": 0.1796, "step": 14933, "teacher_loss": 0.17475393414497375 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.6424590349197388, "learning_rate": 2.024041499178526e-05, "loss": 0.2593, "step": 14934, "teacher_loss": 0.21674300730228424 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.2921537160873413, "learning_rate": 2.0238286797499085e-05, "loss": 0.2715, "step": 14935, "teacher_loss": 0.2692144513130188 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4621097147464752, "learning_rate": 2.0236158483114707e-05, "loss": 0.2368, "step": 14936, "teacher_loss": 0.21177539229393005 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.38373756408691406, "learning_rate": 2.0234030048680913e-05, "loss": 0.2315, "step": 14937, "teacher_loss": 0.21457837522029877 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.3856561779975891, "learning_rate": 2.0231901494246504e-05, "loss": 0.2405, "step": 14938, "teacher_loss": 0.22442519664764404 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.43048378825187683, "learning_rate": 2.022977281986028e-05, "loss": 0.2571, "step": 14939, "teacher_loss": 0.23779019713401794 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.5011731386184692, "learning_rate": 2.022764402557104e-05, "loss": 0.3095, "step": 14940, "teacher_loss": 0.2881562113761902 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.5723336338996887, "learning_rate": 2.02255151114276e-05, "loss": 0.4479, "step": 14941, "teacher_loss": 0.434123694896698 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4294903874397278, "learning_rate": 2.0223386077478766e-05, "loss": 0.2913, "step": 14942, "teacher_loss": 0.2759302854537964 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.6003793478012085, "learning_rate": 2.0221256923773345e-05, "loss": 0.2718, "step": 14943, "teacher_loss": 0.23534628748893738 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.12675902247428894, "learning_rate": 2.0219127650360162e-05, "loss": 0.1277, "step": 14944, "teacher_loss": 0.12781323492527008 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.45469433069229126, "learning_rate": 2.0216998257288024e-05, "loss": 0.291, "step": 14945, "teacher_loss": 0.27282413840293884 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.3445495367050171, "learning_rate": 2.0214868744605764e-05, "loss": 0.1886, "step": 14946, "teacher_loss": 0.17132286727428436 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.707584023475647, "learning_rate": 2.0212739112362194e-05, "loss": 0.4402, "step": 14947, "teacher_loss": 0.4104752540588379 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.42357051372528076, "learning_rate": 2.0210609360606145e-05, "loss": 0.2966, "step": 14948, "teacher_loss": 0.28252023458480835 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 1.0093250274658203, "learning_rate": 2.0208479489386445e-05, "loss": 0.2453, "step": 14949, "teacher_loss": 0.16037200391292572 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.6762949228286743, "learning_rate": 2.020634949875193e-05, "loss": 0.4994, "step": 14950, "teacher_loss": 0.47974202036857605 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.5987178683280945, "learning_rate": 2.020421938875143e-05, "loss": 0.2842, "step": 14951, "teacher_loss": 0.2492598593235016 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.6745460033416748, "learning_rate": 2.0202089159433777e-05, "loss": 0.3873, "step": 14952, "teacher_loss": 0.35540324449539185 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.2650189697742462, "learning_rate": 2.0199958810847823e-05, "loss": 0.2489, "step": 14953, "teacher_loss": 0.24714966118335724 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.5582663416862488, "learning_rate": 2.01978283430424e-05, "loss": 0.2583, "step": 14954, "teacher_loss": 0.22492723166942596 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.1415107250213623, "learning_rate": 2.019569775606636e-05, "loss": 0.1824, "step": 14955, "teacher_loss": 0.18693727254867554 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.3144680857658386, "learning_rate": 2.0193567049968543e-05, "loss": 0.248, "step": 14956, "teacher_loss": 0.24056866765022278 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4739103317260742, "learning_rate": 2.0191436224797807e-05, "loss": 0.2721, "step": 14957, "teacher_loss": 0.24962449073791504 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.13006094098091125, "learning_rate": 2.0189305280603005e-05, "loss": 0.2387, "step": 14958, "teacher_loss": 0.2507938742637634 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.6888920068740845, "learning_rate": 2.018717421743299e-05, "loss": 0.3612, "step": 14959, "teacher_loss": 0.3247529864311218 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.2874410152435303, "learning_rate": 2.0185043035336617e-05, "loss": 0.2244, "step": 14960, "teacher_loss": 0.21735477447509766 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.11321654915809631, "learning_rate": 2.018291173436276e-05, "loss": 0.1598, "step": 14961, "teacher_loss": 0.16503086686134338 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.423062801361084, "learning_rate": 2.0180780314560278e-05, "loss": 0.2577, "step": 14962, "teacher_loss": 0.2392856776714325 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.2879185378551483, "learning_rate": 2.0178648775978028e-05, "loss": 0.1797, "step": 14963, "teacher_loss": 0.16769476234912872 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4349994361400604, "learning_rate": 2.0176517118664893e-05, "loss": 0.1966, "step": 14964, "teacher_loss": 0.170067697763443 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.8133188486099243, "learning_rate": 2.0174385342669737e-05, "loss": 0.6554, "step": 14965, "teacher_loss": 0.6378690004348755 }, { "compression_loss": 0.0, "epoch": 2.7, "label_loss": 0.4016261100769043, "learning_rate": 2.0172253448041443e-05, "loss": 0.2626, "step": 14966, "teacher_loss": 0.24720095098018646 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.309848427772522, "learning_rate": 2.0170121434828883e-05, "loss": 0.232, "step": 14967, "teacher_loss": 0.22334083914756775 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.3638608157634735, "learning_rate": 2.0167989303080935e-05, "loss": 0.2184, "step": 14968, "teacher_loss": 0.20221257209777832 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.70854252576828, "learning_rate": 2.0165857052846496e-05, "loss": 0.4609, "step": 14969, "teacher_loss": 0.4333451986312866 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.537553608417511, "learning_rate": 2.0163724684174435e-05, "loss": 0.3886, "step": 14970, "teacher_loss": 0.37209880352020264 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.7901999950408936, "learning_rate": 2.0161592197113652e-05, "loss": 0.3719, "step": 14971, "teacher_loss": 0.3253900408744812 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.4182116985321045, "learning_rate": 2.0159459591713036e-05, "loss": 0.3722, "step": 14972, "teacher_loss": 0.3670896887779236 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.4978400468826294, "learning_rate": 2.0157326868021474e-05, "loss": 0.2628, "step": 14973, "teacher_loss": 0.23664109408855438 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.30138349533081055, "learning_rate": 2.0155194026087883e-05, "loss": 0.202, "step": 14974, "teacher_loss": 0.19093114137649536 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.2574589252471924, "learning_rate": 2.0153061065961134e-05, "loss": 0.2109, "step": 14975, "teacher_loss": 0.20575420558452606 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.22126173973083496, "learning_rate": 2.0150927987690152e-05, "loss": 0.1673, "step": 14976, "teacher_loss": 0.1612926423549652 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.6109637022018433, "learning_rate": 2.0148794791323834e-05, "loss": 0.3405, "step": 14977, "teacher_loss": 0.3104754388332367 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.6727228164672852, "learning_rate": 2.0146661476911084e-05, "loss": 0.3203, "step": 14978, "teacher_loss": 0.28110796213150024 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.815883994102478, "learning_rate": 2.014452804450082e-05, "loss": 0.3878, "step": 14979, "teacher_loss": 0.3402690291404724 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.636858344078064, "learning_rate": 2.0142394494141958e-05, "loss": 0.2753, "step": 14980, "teacher_loss": 0.23514223098754883 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.3174043893814087, "learning_rate": 2.01402608258834e-05, "loss": 0.2627, "step": 14981, "teacher_loss": 0.25658273696899414 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.5861826539039612, "learning_rate": 2.0138127039774075e-05, "loss": 0.2581, "step": 14982, "teacher_loss": 0.22161316871643066 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.7548277378082275, "learning_rate": 2.01359931358629e-05, "loss": 0.4608, "step": 14983, "teacher_loss": 0.42816829681396484 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.8381767272949219, "learning_rate": 2.0133859114198803e-05, "loss": 0.435, "step": 14984, "teacher_loss": 0.3902357220649719 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.45744454860687256, "learning_rate": 2.0131724974830708e-05, "loss": 0.2484, "step": 14985, "teacher_loss": 0.2251608669757843 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.7651112079620361, "learning_rate": 2.0129590717807545e-05, "loss": 0.3111, "step": 14986, "teacher_loss": 0.26064351201057434 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.2947438359260559, "learning_rate": 2.0127456343178248e-05, "loss": 0.2453, "step": 14987, "teacher_loss": 0.23976153135299683 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.34521710872650146, "learning_rate": 2.0125321850991748e-05, "loss": 0.1885, "step": 14988, "teacher_loss": 0.17104095220565796 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.35951903462409973, "learning_rate": 2.0123187241296988e-05, "loss": 0.2669, "step": 14989, "teacher_loss": 0.2566111981868744 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.3678209185600281, "learning_rate": 2.01210525141429e-05, "loss": 0.2, "step": 14990, "teacher_loss": 0.18132254481315613 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.7792915105819702, "learning_rate": 2.011891766957843e-05, "loss": 0.2716, "step": 14991, "teacher_loss": 0.2152017056941986 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.7213821411132812, "learning_rate": 2.011678270765253e-05, "loss": 0.3212, "step": 14992, "teacher_loss": 0.2767312824726105 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.42048174142837524, "learning_rate": 2.011464762841414e-05, "loss": 0.295, "step": 14993, "teacher_loss": 0.28108155727386475 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.5271192789077759, "learning_rate": 2.011251243191222e-05, "loss": 0.2411, "step": 14994, "teacher_loss": 0.20933997631072998 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.33664506673812866, "learning_rate": 2.0110377118195713e-05, "loss": 0.2295, "step": 14995, "teacher_loss": 0.21758291125297546 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.39082062244415283, "learning_rate": 2.010824168731358e-05, "loss": 0.315, "step": 14996, "teacher_loss": 0.3065846860408783 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.30067944526672363, "learning_rate": 2.0106106139314782e-05, "loss": 0.2156, "step": 14997, "teacher_loss": 0.20610946416854858 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.8333945274353027, "learning_rate": 2.010397047424828e-05, "loss": 0.4313, "step": 14998, "teacher_loss": 0.3866270184516907 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.5261723399162292, "learning_rate": 2.010183469216304e-05, "loss": 0.3401, "step": 14999, "teacher_loss": 0.31944113969802856 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.15280258655548096, "learning_rate": 2.009969879310802e-05, "loss": 0.2276, "step": 15000, "teacher_loss": 0.23591607809066772 }, { "epoch": 2.71, "eval_exact_match": 79.71617786187322, "eval_f1": 87.17503057374732, "step": 15000 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.3536294996738434, "learning_rate": 2.0097562777132202e-05, "loss": 0.221, "step": 15001, "teacher_loss": 0.20631879568099976 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.30649125576019287, "learning_rate": 2.009542664428455e-05, "loss": 0.2487, "step": 15002, "teacher_loss": 0.24228887259960175 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.4666973352432251, "learning_rate": 2.0093290394614038e-05, "loss": 0.25, "step": 15003, "teacher_loss": 0.22590233385562897 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.5467059016227722, "learning_rate": 2.0091154028169655e-05, "loss": 0.2859, "step": 15004, "teacher_loss": 0.2568756937980652 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.1568412482738495, "learning_rate": 2.0089017545000372e-05, "loss": 0.2014, "step": 15005, "teacher_loss": 0.20640623569488525 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.7227605581283569, "learning_rate": 2.008688094515517e-05, "loss": 0.3014, "step": 15006, "teacher_loss": 0.25458258390426636 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.4704834222793579, "learning_rate": 2.0084744228683047e-05, "loss": 0.2567, "step": 15007, "teacher_loss": 0.23297539353370667 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.20007620751857758, "learning_rate": 2.0082607395632978e-05, "loss": 0.1581, "step": 15008, "teacher_loss": 0.15343214571475983 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.9830831289291382, "learning_rate": 2.008047044605396e-05, "loss": 0.3883, "step": 15009, "teacher_loss": 0.3222392201423645 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.2133117914199829, "learning_rate": 2.007833337999499e-05, "loss": 0.1469, "step": 15010, "teacher_loss": 0.13955624401569366 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.19274112582206726, "learning_rate": 2.0076196197505056e-05, "loss": 0.2294, "step": 15011, "teacher_loss": 0.23343610763549805 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.43140918016433716, "learning_rate": 2.0074058898633164e-05, "loss": 0.2622, "step": 15012, "teacher_loss": 0.24334970116615295 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.24975115060806274, "learning_rate": 2.007192148342832e-05, "loss": 0.204, "step": 15013, "teacher_loss": 0.19893011450767517 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.19218358397483826, "learning_rate": 2.006978395193952e-05, "loss": 0.1982, "step": 15014, "teacher_loss": 0.19886913895606995 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.8757625222206116, "learning_rate": 2.006764630421577e-05, "loss": 0.3259, "step": 15015, "teacher_loss": 0.26476386189460754 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.2087867259979248, "learning_rate": 2.006550854030609e-05, "loss": 0.1731, "step": 15016, "teacher_loss": 0.16912755370140076 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.5303830504417419, "learning_rate": 2.006337066025948e-05, "loss": 0.219, "step": 15017, "teacher_loss": 0.18441784381866455 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.469784140586853, "learning_rate": 2.0061232664124966e-05, "loss": 0.3161, "step": 15018, "teacher_loss": 0.29902392625808716 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.17396605014801025, "learning_rate": 2.005909455195156e-05, "loss": 0.237, "step": 15019, "teacher_loss": 0.2440508008003235 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.5754035711288452, "learning_rate": 2.0056956323788282e-05, "loss": 0.2102, "step": 15020, "teacher_loss": 0.16965632140636444 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.7735744714736938, "learning_rate": 2.005481797968416e-05, "loss": 0.6237, "step": 15021, "teacher_loss": 0.6070912480354309 }, { "compression_loss": 0.0, "epoch": 2.71, "label_loss": 0.31068155169487, "learning_rate": 2.0052679519688216e-05, "loss": 0.1982, "step": 15022, "teacher_loss": 0.18568764626979828 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.16337335109710693, "learning_rate": 2.0050540943849477e-05, "loss": 0.2176, "step": 15023, "teacher_loss": 0.2235870063304901 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.2505873441696167, "learning_rate": 2.004840225221698e-05, "loss": 0.1784, "step": 15024, "teacher_loss": 0.17039605975151062 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4637059271335602, "learning_rate": 2.0046263444839752e-05, "loss": 0.2269, "step": 15025, "teacher_loss": 0.20060932636260986 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.796784520149231, "learning_rate": 2.0044124521766836e-05, "loss": 0.3761, "step": 15026, "teacher_loss": 0.3293894827365875 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.8234351277351379, "learning_rate": 2.0041985483047265e-05, "loss": 0.4887, "step": 15027, "teacher_loss": 0.45147186517715454 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.7973535656929016, "learning_rate": 2.0039846328730082e-05, "loss": 0.3512, "step": 15028, "teacher_loss": 0.30161386728286743 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.6982119679450989, "learning_rate": 2.0037707058864343e-05, "loss": 0.2315, "step": 15029, "teacher_loss": 0.17969286441802979 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.471706748008728, "learning_rate": 2.0035567673499073e-05, "loss": 0.2346, "step": 15030, "teacher_loss": 0.20824161171913147 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.39602822065353394, "learning_rate": 2.0033428172683333e-05, "loss": 0.2095, "step": 15031, "teacher_loss": 0.18878960609436035 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.9143403768539429, "learning_rate": 2.003128855646619e-05, "loss": 0.5763, "step": 15032, "teacher_loss": 0.538723349571228 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.1988438367843628, "learning_rate": 2.0029148824896672e-05, "loss": 0.1773, "step": 15033, "teacher_loss": 0.17487892508506775 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4166041910648346, "learning_rate": 2.0027008978023853e-05, "loss": 0.2585, "step": 15034, "teacher_loss": 0.2409186214208603 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.5560095906257629, "learning_rate": 2.0024869015896793e-05, "loss": 0.3295, "step": 15035, "teacher_loss": 0.3043414354324341 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4813695549964905, "learning_rate": 2.0022728938564548e-05, "loss": 0.2326, "step": 15036, "teacher_loss": 0.20496976375579834 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.47685176134109497, "learning_rate": 2.002058874607619e-05, "loss": 0.2391, "step": 15037, "teacher_loss": 0.21265339851379395 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.7392712831497192, "learning_rate": 2.0018448438480784e-05, "loss": 0.2811, "step": 15038, "teacher_loss": 0.2301851511001587 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.28946763277053833, "learning_rate": 2.0016308015827402e-05, "loss": 0.1964, "step": 15039, "teacher_loss": 0.18610183894634247 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.3137316107749939, "learning_rate": 2.0014167478165117e-05, "loss": 0.2067, "step": 15040, "teacher_loss": 0.19480668008327484 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4612748324871063, "learning_rate": 2.0012026825543002e-05, "loss": 0.2401, "step": 15041, "teacher_loss": 0.215494304895401 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.3548908531665802, "learning_rate": 2.0009886058010137e-05, "loss": 0.2915, "step": 15042, "teacher_loss": 0.284482479095459 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.22971507906913757, "learning_rate": 2.000774517561561e-05, "loss": 0.2425, "step": 15043, "teacher_loss": 0.2439051866531372 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4746737778186798, "learning_rate": 2.0005604178408498e-05, "loss": 0.2731, "step": 15044, "teacher_loss": 0.2507379651069641 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4142010807991028, "learning_rate": 2.000346306643789e-05, "loss": 0.1577, "step": 15045, "teacher_loss": 0.12919777631759644 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.26875972747802734, "learning_rate": 2.0001321839752877e-05, "loss": 0.1826, "step": 15046, "teacher_loss": 0.17305982112884521 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.5402956008911133, "learning_rate": 1.9999180498402547e-05, "loss": 0.3649, "step": 15047, "teacher_loss": 0.34537264704704285 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4461023807525635, "learning_rate": 1.9997039042435997e-05, "loss": 0.2905, "step": 15048, "teacher_loss": 0.2732193171977997 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.2971504330635071, "learning_rate": 1.9994897471902325e-05, "loss": 0.1938, "step": 15049, "teacher_loss": 0.18228499591350555 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.6897265911102295, "learning_rate": 1.9992755786850626e-05, "loss": 0.3493, "step": 15050, "teacher_loss": 0.3114684224128723 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.5948957800865173, "learning_rate": 1.999061398733001e-05, "loss": 0.4352, "step": 15051, "teacher_loss": 0.41743582487106323 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.36130496859550476, "learning_rate": 1.9988472073389578e-05, "loss": 0.2496, "step": 15052, "teacher_loss": 0.23721513152122498 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4595414698123932, "learning_rate": 1.998633004507843e-05, "loss": 0.2672, "step": 15053, "teacher_loss": 0.2458733767271042 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.42380625009536743, "learning_rate": 1.9984187902445696e-05, "loss": 0.2295, "step": 15054, "teacher_loss": 0.20793843269348145 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.44624564051628113, "learning_rate": 1.9982045645540464e-05, "loss": 0.442, "step": 15055, "teacher_loss": 0.44155222177505493 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.34407877922058105, "learning_rate": 1.9979903274411864e-05, "loss": 0.2251, "step": 15056, "teacher_loss": 0.21184967458248138 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.2622450292110443, "learning_rate": 1.9977760789109024e-05, "loss": 0.2703, "step": 15057, "teacher_loss": 0.2712153196334839 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.24984706938266754, "learning_rate": 1.9975618189681043e-05, "loss": 0.2316, "step": 15058, "teacher_loss": 0.22957536578178406 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4520396590232849, "learning_rate": 1.9973475476177057e-05, "loss": 0.2484, "step": 15059, "teacher_loss": 0.22573867440223694 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.375400185585022, "learning_rate": 1.997133264864619e-05, "loss": 0.3091, "step": 15060, "teacher_loss": 0.30178260803222656 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 1.1193232536315918, "learning_rate": 1.996918970713757e-05, "loss": 0.6195, "step": 15061, "teacher_loss": 0.5639264583587646 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.5159069299697876, "learning_rate": 1.9967046651700332e-05, "loss": 0.2446, "step": 15062, "teacher_loss": 0.2144462913274765 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.6716480255126953, "learning_rate": 1.99649034823836e-05, "loss": 0.5184, "step": 15063, "teacher_loss": 0.5013296008110046 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4155537486076355, "learning_rate": 1.996276019923652e-05, "loss": 0.2657, "step": 15064, "teacher_loss": 0.2490987777709961 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.2730403542518616, "learning_rate": 1.996061680230823e-05, "loss": 0.2524, "step": 15065, "teacher_loss": 0.2500574290752411 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.6909595131874084, "learning_rate": 1.9958473291647865e-05, "loss": 0.2662, "step": 15066, "teacher_loss": 0.21902316808700562 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.30844852328300476, "learning_rate": 1.9956329667304577e-05, "loss": 0.2243, "step": 15067, "teacher_loss": 0.21495559811592102 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.16114582121372223, "learning_rate": 1.995418592932751e-05, "loss": 0.1739, "step": 15068, "teacher_loss": 0.1752689778804779 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.4944458603858948, "learning_rate": 1.995204207776581e-05, "loss": 0.283, "step": 15069, "teacher_loss": 0.2594802975654602 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.8666689991950989, "learning_rate": 1.9949898112668634e-05, "loss": 0.3587, "step": 15070, "teacher_loss": 0.30222347378730774 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.38930201530456543, "learning_rate": 1.9947754034085134e-05, "loss": 0.3235, "step": 15071, "teacher_loss": 0.3161659836769104 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.48068174719810486, "learning_rate": 1.9945609842064468e-05, "loss": 0.2449, "step": 15072, "teacher_loss": 0.21873216331005096 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.22470299899578094, "learning_rate": 1.9943465536655793e-05, "loss": 0.1702, "step": 15073, "teacher_loss": 0.16413387656211853 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.22504305839538574, "learning_rate": 1.994132111790828e-05, "loss": 0.2852, "step": 15074, "teacher_loss": 0.2918716073036194 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.25561243295669556, "learning_rate": 1.9939176585871085e-05, "loss": 0.209, "step": 15075, "teacher_loss": 0.20380191504955292 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.7776265144348145, "learning_rate": 1.993703194059338e-05, "loss": 0.2858, "step": 15076, "teacher_loss": 0.23111248016357422 }, { "compression_loss": 0.0, "epoch": 2.72, "label_loss": 0.41156893968582153, "learning_rate": 1.9934887182124337e-05, "loss": 0.3208, "step": 15077, "teacher_loss": 0.3106657564640045 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.5161766409873962, "learning_rate": 1.993274231051313e-05, "loss": 0.2744, "step": 15078, "teacher_loss": 0.2475668489933014 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.9737175703048706, "learning_rate": 1.993059732580892e-05, "loss": 0.5216, "step": 15079, "teacher_loss": 0.4713541269302368 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.5958664417266846, "learning_rate": 1.9928452228060903e-05, "loss": 0.2761, "step": 15080, "teacher_loss": 0.24053451418876648 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.680786669254303, "learning_rate": 1.992630701731825e-05, "loss": 0.2853, "step": 15081, "teacher_loss": 0.24131283164024353 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.24362145364284515, "learning_rate": 1.992416169363015e-05, "loss": 0.2423, "step": 15082, "teacher_loss": 0.24220535159111023 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.8904927372932434, "learning_rate": 1.9922016257045782e-05, "loss": 0.3639, "step": 15083, "teacher_loss": 0.3054329454898834 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.1477115899324417, "learning_rate": 1.991987070761434e-05, "loss": 0.1593, "step": 15084, "teacher_loss": 0.16060245037078857 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.42675545811653137, "learning_rate": 1.9917725045385017e-05, "loss": 0.3022, "step": 15085, "teacher_loss": 0.28839045763015747 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.5468289852142334, "learning_rate": 1.9915579270406993e-05, "loss": 0.2665, "step": 15086, "teacher_loss": 0.23534198105335236 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.8861464262008667, "learning_rate": 1.9913433382729488e-05, "loss": 0.4564, "step": 15087, "teacher_loss": 0.40859711170196533 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.18965277075767517, "learning_rate": 1.9911287382401675e-05, "loss": 0.1957, "step": 15088, "teacher_loss": 0.1964040994644165 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.4445428252220154, "learning_rate": 1.9909141269472772e-05, "loss": 0.4737, "step": 15089, "teacher_loss": 0.47697240114212036 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.3543934226036072, "learning_rate": 1.9906995043991978e-05, "loss": 0.2033, "step": 15090, "teacher_loss": 0.18648485839366913 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.2961617112159729, "learning_rate": 1.9904848706008498e-05, "loss": 0.1925, "step": 15091, "teacher_loss": 0.18095944821834564 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.35567817091941833, "learning_rate": 1.990270225557155e-05, "loss": 0.2438, "step": 15092, "teacher_loss": 0.2313770353794098 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.4605990946292877, "learning_rate": 1.9900555692730325e-05, "loss": 0.339, "step": 15093, "teacher_loss": 0.32544130086898804 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.5304573178291321, "learning_rate": 1.989840901753406e-05, "loss": 0.2668, "step": 15094, "teacher_loss": 0.23751601576805115 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.935044527053833, "learning_rate": 1.989626223003196e-05, "loss": 0.3426, "step": 15095, "teacher_loss": 0.2767980694770813 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.24674253165721893, "learning_rate": 1.9894115330273245e-05, "loss": 0.2247, "step": 15096, "teacher_loss": 0.22226402163505554 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.544734001159668, "learning_rate": 1.9891968318307143e-05, "loss": 0.2859, "step": 15097, "teacher_loss": 0.25711536407470703 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.18568967282772064, "learning_rate": 1.988982119418287e-05, "loss": 0.2077, "step": 15098, "teacher_loss": 0.210148423910141 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.3483494520187378, "learning_rate": 1.988767395794966e-05, "loss": 0.3527, "step": 15099, "teacher_loss": 0.3531569540500641 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.33492332696914673, "learning_rate": 1.988552660965674e-05, "loss": 0.3481, "step": 15100, "teacher_loss": 0.3495703339576721 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.30936503410339355, "learning_rate": 1.988337914935334e-05, "loss": 0.2308, "step": 15101, "teacher_loss": 0.22202084958553314 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.5264618992805481, "learning_rate": 1.98812315770887e-05, "loss": 0.2171, "step": 15102, "teacher_loss": 0.18275868892669678 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.4199342429637909, "learning_rate": 1.9879083892912054e-05, "loss": 0.2189, "step": 15103, "teacher_loss": 0.19651193916797638 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.31059807538986206, "learning_rate": 1.987693609687264e-05, "loss": 0.1869, "step": 15104, "teacher_loss": 0.17317906022071838 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.4697466492652893, "learning_rate": 1.9874788189019707e-05, "loss": 0.2632, "step": 15105, "teacher_loss": 0.24027252197265625 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.17916375398635864, "learning_rate": 1.9872640169402496e-05, "loss": 0.2371, "step": 15106, "teacher_loss": 0.24357305467128754 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.298399955034256, "learning_rate": 1.9870492038070255e-05, "loss": 0.2646, "step": 15107, "teacher_loss": 0.2608868479728699 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.2749473452568054, "learning_rate": 1.9868343795072228e-05, "loss": 0.1871, "step": 15108, "teacher_loss": 0.17735815048217773 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.46526551246643066, "learning_rate": 1.986619544045768e-05, "loss": 0.2129, "step": 15109, "teacher_loss": 0.1848325878381729 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.5566456317901611, "learning_rate": 1.986404697427586e-05, "loss": 0.293, "step": 15110, "teacher_loss": 0.2636886239051819 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.7540261745452881, "learning_rate": 1.9861898396576023e-05, "loss": 0.3267, "step": 15111, "teacher_loss": 0.2791852056980133 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.5506866574287415, "learning_rate": 1.9859749707407436e-05, "loss": 0.2305, "step": 15112, "teacher_loss": 0.1949162483215332 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.2481362521648407, "learning_rate": 1.9857600906819356e-05, "loss": 0.1977, "step": 15113, "teacher_loss": 0.19209900498390198 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.11785888671875, "learning_rate": 1.985545199486105e-05, "loss": 0.1877, "step": 15114, "teacher_loss": 0.1954491138458252 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.18139125406742096, "learning_rate": 1.985330297158179e-05, "loss": 0.2199, "step": 15115, "teacher_loss": 0.22421470284461975 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.4692199230194092, "learning_rate": 1.985115383703084e-05, "loss": 0.255, "step": 15116, "teacher_loss": 0.23124787211418152 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.21738798916339874, "learning_rate": 1.9849004591257488e-05, "loss": 0.22, "step": 15117, "teacher_loss": 0.22025524079799652 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.2938863933086395, "learning_rate": 1.9846855234310985e-05, "loss": 0.1928, "step": 15118, "teacher_loss": 0.18157872557640076 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.3548092544078827, "learning_rate": 1.984470576624063e-05, "loss": 0.2173, "step": 15119, "teacher_loss": 0.2020546793937683 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.33035704493522644, "learning_rate": 1.9842556187095695e-05, "loss": 0.3313, "step": 15120, "teacher_loss": 0.33141976594924927 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.3157961368560791, "learning_rate": 1.9840406496925465e-05, "loss": 0.2067, "step": 15121, "teacher_loss": 0.19453680515289307 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.6413888335227966, "learning_rate": 1.983825669577923e-05, "loss": 0.2912, "step": 15122, "teacher_loss": 0.2522585093975067 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.708191990852356, "learning_rate": 1.983610678370627e-05, "loss": 0.7878, "step": 15123, "teacher_loss": 0.7966430187225342 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.8005844354629517, "learning_rate": 1.9833956760755882e-05, "loss": 0.3203, "step": 15124, "teacher_loss": 0.2668999433517456 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.26291677355766296, "learning_rate": 1.983180662697736e-05, "loss": 0.2303, "step": 15125, "teacher_loss": 0.2266272008419037 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.25841909646987915, "learning_rate": 1.9829656382419997e-05, "loss": 0.3125, "step": 15126, "teacher_loss": 0.3185466527938843 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.28554433584213257, "learning_rate": 1.9827506027133094e-05, "loss": 0.202, "step": 15127, "teacher_loss": 0.19270142912864685 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.7003921270370483, "learning_rate": 1.9825355561165953e-05, "loss": 0.2623, "step": 15128, "teacher_loss": 0.21357497572898865 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.1778913140296936, "learning_rate": 1.9823204984567873e-05, "loss": 0.1686, "step": 15129, "teacher_loss": 0.16754662990570068 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.7207590937614441, "learning_rate": 1.982105429738817e-05, "loss": 0.2455, "step": 15130, "teacher_loss": 0.1927299201488495 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.4375700354576111, "learning_rate": 1.981890349967614e-05, "loss": 0.2488, "step": 15131, "teacher_loss": 0.2278408408164978 }, { "compression_loss": 0.0, "epoch": 2.73, "label_loss": 0.21476247906684875, "learning_rate": 1.98167525914811e-05, "loss": 0.1755, "step": 15132, "teacher_loss": 0.1711852252483368 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.19516701996326447, "learning_rate": 1.9814601572852368e-05, "loss": 0.2367, "step": 15133, "teacher_loss": 0.24130795896053314 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.44253799319267273, "learning_rate": 1.981245044383925e-05, "loss": 0.1928, "step": 15134, "teacher_loss": 0.16509635746479034 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.23452308773994446, "learning_rate": 1.981029920449108e-05, "loss": 0.2802, "step": 15135, "teacher_loss": 0.28531378507614136 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.29700279235839844, "learning_rate": 1.980814785485717e-05, "loss": 0.2097, "step": 15136, "teacher_loss": 0.19998842477798462 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.7240496873855591, "learning_rate": 1.9805996394986844e-05, "loss": 0.4021, "step": 15137, "teacher_loss": 0.366362065076828 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4322538375854492, "learning_rate": 1.9803844824929425e-05, "loss": 0.2576, "step": 15138, "teacher_loss": 0.2382260411977768 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.13878756761550903, "learning_rate": 1.9801693144734256e-05, "loss": 0.1624, "step": 15139, "teacher_loss": 0.16496823728084564 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4488065242767334, "learning_rate": 1.9799541354450652e-05, "loss": 0.2146, "step": 15140, "teacher_loss": 0.18863120675086975 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.17453351616859436, "learning_rate": 1.9797389454127957e-05, "loss": 0.1358, "step": 15141, "teacher_loss": 0.13149572908878326 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.48861896991729736, "learning_rate": 1.9795237443815507e-05, "loss": 0.475, "step": 15142, "teacher_loss": 0.47346892952919006 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5806810855865479, "learning_rate": 1.9793085323562633e-05, "loss": 0.2578, "step": 15143, "teacher_loss": 0.221955806016922 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.18329587578773499, "learning_rate": 1.9790933093418692e-05, "loss": 0.1693, "step": 15144, "teacher_loss": 0.1677565574645996 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.8105214834213257, "learning_rate": 1.9788780753433016e-05, "loss": 0.368, "step": 15145, "teacher_loss": 0.3188292384147644 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.6586999893188477, "learning_rate": 1.978662830365495e-05, "loss": 0.2157, "step": 15146, "teacher_loss": 0.16644065082073212 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4170007109642029, "learning_rate": 1.9784475744133855e-05, "loss": 0.3969, "step": 15147, "teacher_loss": 0.39466798305511475 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.6390199661254883, "learning_rate": 1.9782323074919066e-05, "loss": 0.3034, "step": 15148, "teacher_loss": 0.2661534547805786 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.32657966017723083, "learning_rate": 1.978017029605996e-05, "loss": 0.2315, "step": 15149, "teacher_loss": 0.22088859975337982 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4098842740058899, "learning_rate": 1.977801740760587e-05, "loss": 0.2898, "step": 15150, "teacher_loss": 0.2765064239501953 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4801176190376282, "learning_rate": 1.977586440960617e-05, "loss": 0.3448, "step": 15151, "teacher_loss": 0.3297881484031677 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5493592023849487, "learning_rate": 1.977371130211022e-05, "loss": 0.2631, "step": 15152, "teacher_loss": 0.23129115998744965 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4405379891395569, "learning_rate": 1.977155808516738e-05, "loss": 0.2348, "step": 15153, "teacher_loss": 0.21195551753044128 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4689837098121643, "learning_rate": 1.9769404758827017e-05, "loss": 0.2075, "step": 15154, "teacher_loss": 0.1784050464630127 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.6527721285820007, "learning_rate": 1.9767251323138508e-05, "loss": 0.4971, "step": 15155, "teacher_loss": 0.4797801375389099 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.23711979389190674, "learning_rate": 1.976509777815121e-05, "loss": 0.212, "step": 15156, "teacher_loss": 0.20919150114059448 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 1.0806182622909546, "learning_rate": 1.9762944123914517e-05, "loss": 0.7386, "step": 15157, "teacher_loss": 0.7006382346153259 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 1.246617078781128, "learning_rate": 1.9760790360477786e-05, "loss": 0.3176, "step": 15158, "teacher_loss": 0.21440258622169495 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.17018583416938782, "learning_rate": 1.9758636487890408e-05, "loss": 0.2284, "step": 15159, "teacher_loss": 0.2348358929157257 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4834911823272705, "learning_rate": 1.975648250620177e-05, "loss": 0.2456, "step": 15160, "teacher_loss": 0.21916499733924866 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.7042248845100403, "learning_rate": 1.9754328415461243e-05, "loss": 0.5092, "step": 15161, "teacher_loss": 0.4874890446662903 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4873436987400055, "learning_rate": 1.9752174215718217e-05, "loss": 0.3298, "step": 15162, "teacher_loss": 0.3122524917125702 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.18083898723125458, "learning_rate": 1.975001990702209e-05, "loss": 0.2131, "step": 15163, "teacher_loss": 0.21665626764297485 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5595321655273438, "learning_rate": 1.9747865489422242e-05, "loss": 0.2676, "step": 15164, "teacher_loss": 0.2351897805929184 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5304072499275208, "learning_rate": 1.9745710962968075e-05, "loss": 0.281, "step": 15165, "teacher_loss": 0.2532690465450287 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.23622167110443115, "learning_rate": 1.974355632770899e-05, "loss": 0.2966, "step": 15166, "teacher_loss": 0.30329370498657227 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5728350877761841, "learning_rate": 1.974140158369437e-05, "loss": 0.2289, "step": 15167, "teacher_loss": 0.19067008793354034 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.37634527683258057, "learning_rate": 1.9739246730973633e-05, "loss": 0.1893, "step": 15168, "teacher_loss": 0.1685633361339569 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.30013829469680786, "learning_rate": 1.973709176959618e-05, "loss": 0.2164, "step": 15169, "teacher_loss": 0.20713499188423157 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 1.0726590156555176, "learning_rate": 1.973493669961141e-05, "loss": 0.2688, "step": 15170, "teacher_loss": 0.17949554324150085 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.43128281831741333, "learning_rate": 1.973278152106874e-05, "loss": 0.1854, "step": 15171, "teacher_loss": 0.15809732675552368 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.2755378484725952, "learning_rate": 1.973062623401758e-05, "loss": 0.2203, "step": 15172, "teacher_loss": 0.21418413519859314 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.3010275959968567, "learning_rate": 1.972847083850734e-05, "loss": 0.1941, "step": 15173, "teacher_loss": 0.18224990367889404 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5239337086677551, "learning_rate": 1.9726315334587446e-05, "loss": 0.2558, "step": 15174, "teacher_loss": 0.22604581713676453 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4318421185016632, "learning_rate": 1.9724159722307306e-05, "loss": 0.2336, "step": 15175, "teacher_loss": 0.21155688166618347 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.2940051555633545, "learning_rate": 1.972200400171635e-05, "loss": 0.1698, "step": 15176, "teacher_loss": 0.15604063868522644 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5416123867034912, "learning_rate": 1.9719848172864004e-05, "loss": 0.3919, "step": 15177, "teacher_loss": 0.37521782517433167 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5701338648796082, "learning_rate": 1.9717692235799682e-05, "loss": 0.2458, "step": 15178, "teacher_loss": 0.20977017283439636 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.36374589800834656, "learning_rate": 1.971553619057282e-05, "loss": 0.2067, "step": 15179, "teacher_loss": 0.18928195536136627 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.4188700318336487, "learning_rate": 1.9713380037232863e-05, "loss": 0.264, "step": 15180, "teacher_loss": 0.24675922095775604 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.24901972711086273, "learning_rate": 1.9711223775829225e-05, "loss": 0.2156, "step": 15181, "teacher_loss": 0.21192272007465363 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.22387628257274628, "learning_rate": 1.9709067406411352e-05, "loss": 0.158, "step": 15182, "teacher_loss": 0.15069857239723206 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5138344764709473, "learning_rate": 1.9706910929028684e-05, "loss": 0.35, "step": 15183, "teacher_loss": 0.33182626962661743 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5507846474647522, "learning_rate": 1.9704754343730655e-05, "loss": 0.2894, "step": 15184, "teacher_loss": 0.2603911757469177 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.609535813331604, "learning_rate": 1.9702597650566723e-05, "loss": 0.2835, "step": 15185, "teacher_loss": 0.2472611963748932 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.34522104263305664, "learning_rate": 1.9700440849586316e-05, "loss": 0.1808, "step": 15186, "teacher_loss": 0.1625167727470398 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.5362224578857422, "learning_rate": 1.9698283940838896e-05, "loss": 0.4922, "step": 15187, "teacher_loss": 0.48734956979751587 }, { "compression_loss": 0.0, "epoch": 2.74, "label_loss": 0.9018310904502869, "learning_rate": 1.9696126924373917e-05, "loss": 0.417, "step": 15188, "teacher_loss": 0.3631435036659241 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.20408844947814941, "learning_rate": 1.9693969800240825e-05, "loss": 0.198, "step": 15189, "teacher_loss": 0.19735214114189148 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.29109904170036316, "learning_rate": 1.9691812568489076e-05, "loss": 0.2257, "step": 15190, "teacher_loss": 0.21845673024654388 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.4749983251094818, "learning_rate": 1.9689655229168134e-05, "loss": 0.2314, "step": 15191, "teacher_loss": 0.20435748994350433 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.11566022038459778, "learning_rate": 1.9687497782327456e-05, "loss": 0.2273, "step": 15192, "teacher_loss": 0.23967286944389343 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.487579882144928, "learning_rate": 1.968534022801651e-05, "loss": 0.2777, "step": 15193, "teacher_loss": 0.25442981719970703 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.21503601968288422, "learning_rate": 1.9683182566284753e-05, "loss": 0.2347, "step": 15194, "teacher_loss": 0.23686090111732483 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.17470958828926086, "learning_rate": 1.968102479718167e-05, "loss": 0.1602, "step": 15195, "teacher_loss": 0.1586049348115921 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.3236986994743347, "learning_rate": 1.967886692075672e-05, "loss": 0.1934, "step": 15196, "teacher_loss": 0.17886868119239807 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.4406489431858063, "learning_rate": 1.967670893705938e-05, "loss": 0.2463, "step": 15197, "teacher_loss": 0.22465577721595764 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.6793450117111206, "learning_rate": 1.967455084613912e-05, "loss": 0.2646, "step": 15198, "teacher_loss": 0.21854379773139954 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.2680261731147766, "learning_rate": 1.967239264804543e-05, "loss": 0.2216, "step": 15199, "teacher_loss": 0.21641956269741058 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.23984336853027344, "learning_rate": 1.9670234342827783e-05, "loss": 0.2353, "step": 15200, "teacher_loss": 0.23474860191345215 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.09922236204147339, "learning_rate": 1.966807593053566e-05, "loss": 0.1285, "step": 15201, "teacher_loss": 0.13175831735134125 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.6094710826873779, "learning_rate": 1.9665917411218562e-05, "loss": 0.2467, "step": 15202, "teacher_loss": 0.2063438594341278 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.28626570105552673, "learning_rate": 1.966375878492596e-05, "loss": 0.2251, "step": 15203, "teacher_loss": 0.21829816699028015 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.47271013259887695, "learning_rate": 1.9661600051707355e-05, "loss": 0.5874, "step": 15204, "teacher_loss": 0.6001725196838379 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 1.128417730331421, "learning_rate": 1.9659441211612234e-05, "loss": 0.2597, "step": 15205, "teacher_loss": 0.16312867403030396 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.6140072345733643, "learning_rate": 1.9657282264690095e-05, "loss": 0.2533, "step": 15206, "teacher_loss": 0.21319587528705597 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.2668343782424927, "learning_rate": 1.965512321099044e-05, "loss": 0.2536, "step": 15207, "teacher_loss": 0.25213319063186646 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.7780238389968872, "learning_rate": 1.9652964050562766e-05, "loss": 0.2169, "step": 15208, "teacher_loss": 0.1545523703098297 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.7898622155189514, "learning_rate": 1.9650804783456575e-05, "loss": 0.569, "step": 15209, "teacher_loss": 0.5444784164428711 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.7480657696723938, "learning_rate": 1.964864540972138e-05, "loss": 0.3207, "step": 15210, "teacher_loss": 0.27317607402801514 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.426583468914032, "learning_rate": 1.9646485929406676e-05, "loss": 0.4055, "step": 15211, "teacher_loss": 0.40312451124191284 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.2943565845489502, "learning_rate": 1.9644326342561983e-05, "loss": 0.1782, "step": 15212, "teacher_loss": 0.16524159908294678 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.4286937117576599, "learning_rate": 1.964216664923681e-05, "loss": 0.1984, "step": 15213, "teacher_loss": 0.17279568314552307 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.6582702398300171, "learning_rate": 1.964000684948067e-05, "loss": 0.2786, "step": 15214, "teacher_loss": 0.2364538609981537 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.18538734316825867, "learning_rate": 1.9637846943343093e-05, "loss": 0.1418, "step": 15215, "teacher_loss": 0.13701054453849792 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.44110599160194397, "learning_rate": 1.9635686930873587e-05, "loss": 0.3109, "step": 15216, "teacher_loss": 0.29638004302978516 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.6912287473678589, "learning_rate": 1.963352681212168e-05, "loss": 0.545, "step": 15217, "teacher_loss": 0.5287714004516602 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.23366637527942657, "learning_rate": 1.9631366587136893e-05, "loss": 0.1803, "step": 15218, "teacher_loss": 0.17433884739875793 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.3281964063644409, "learning_rate": 1.9629206255968754e-05, "loss": 0.202, "step": 15219, "teacher_loss": 0.18794460594654083 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.4137471318244934, "learning_rate": 1.9627045818666796e-05, "loss": 0.2501, "step": 15220, "teacher_loss": 0.23189963400363922 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.7862658500671387, "learning_rate": 1.9624885275280553e-05, "loss": 0.3132, "step": 15221, "teacher_loss": 0.26060980558395386 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.2272559106349945, "learning_rate": 1.9622724625859553e-05, "loss": 0.2102, "step": 15222, "teacher_loss": 0.2083524465560913 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.1550198644399643, "learning_rate": 1.962056387045334e-05, "loss": 0.1537, "step": 15223, "teacher_loss": 0.1535821259021759 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.4116232395172119, "learning_rate": 1.961840300911145e-05, "loss": 0.2618, "step": 15224, "teacher_loss": 0.2451116293668747 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.2854640483856201, "learning_rate": 1.961624204188342e-05, "loss": 0.2055, "step": 15225, "teacher_loss": 0.196599543094635 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.2560485005378723, "learning_rate": 1.9614080968818807e-05, "loss": 0.3901, "step": 15226, "teacher_loss": 0.4049775302410126 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.3621958792209625, "learning_rate": 1.9611919789967148e-05, "loss": 0.1969, "step": 15227, "teacher_loss": 0.17855460941791534 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.33230215311050415, "learning_rate": 1.9609758505377995e-05, "loss": 0.3265, "step": 15228, "teacher_loss": 0.3258967399597168 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.3078997731208801, "learning_rate": 1.9607597115100905e-05, "loss": 0.3191, "step": 15229, "teacher_loss": 0.32038360834121704 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.2900291085243225, "learning_rate": 1.9605435619185424e-05, "loss": 0.2159, "step": 15230, "teacher_loss": 0.20765212178230286 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.3844910264015198, "learning_rate": 1.960327401768111e-05, "loss": 0.1709, "step": 15231, "teacher_loss": 0.14715400338172913 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.3461059331893921, "learning_rate": 1.9601112310637526e-05, "loss": 0.2043, "step": 15232, "teacher_loss": 0.1885887086391449 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.31589266657829285, "learning_rate": 1.959895049810423e-05, "loss": 0.25, "step": 15233, "teacher_loss": 0.24262520670890808 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.41189220547676086, "learning_rate": 1.959678858013079e-05, "loss": 0.2762, "step": 15234, "teacher_loss": 0.261086642742157 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.2599945664405823, "learning_rate": 1.9594626556766772e-05, "loss": 0.2345, "step": 15235, "teacher_loss": 0.23168711364269257 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.3744836449623108, "learning_rate": 1.9592464428061734e-05, "loss": 0.2579, "step": 15236, "teacher_loss": 0.24489115178585052 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.6489717364311218, "learning_rate": 1.9590302194065267e-05, "loss": 0.2525, "step": 15237, "teacher_loss": 0.20842532813549042 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 1.0973368883132935, "learning_rate": 1.9588139854826927e-05, "loss": 0.342, "step": 15238, "teacher_loss": 0.2580951750278473 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.25282567739486694, "learning_rate": 1.9585977410396295e-05, "loss": 0.2795, "step": 15239, "teacher_loss": 0.282443106174469 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.28659236431121826, "learning_rate": 1.9583814860822954e-05, "loss": 0.2107, "step": 15240, "teacher_loss": 0.20226648449897766 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.7333790063858032, "learning_rate": 1.9581652206156477e-05, "loss": 0.2816, "step": 15241, "teacher_loss": 0.2314022332429886 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.45125460624694824, "learning_rate": 1.9579489446446456e-05, "loss": 0.3062, "step": 15242, "teacher_loss": 0.290036678314209 }, { "compression_loss": 0.0, "epoch": 2.75, "label_loss": 0.07484418898820877, "learning_rate": 1.957732658174247e-05, "loss": 0.1293, "step": 15243, "teacher_loss": 0.13537269830703735 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.4858418107032776, "learning_rate": 1.9575163612094103e-05, "loss": 0.2672, "step": 15244, "teacher_loss": 0.2429550290107727 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.4726579785346985, "learning_rate": 1.957300053755096e-05, "loss": 0.2457, "step": 15245, "teacher_loss": 0.2204376459121704 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.4978002905845642, "learning_rate": 1.957083735816262e-05, "loss": 0.2308, "step": 15246, "teacher_loss": 0.20113354921340942 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.6288567781448364, "learning_rate": 1.9568674073978685e-05, "loss": 0.2816, "step": 15247, "teacher_loss": 0.24298366904258728 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.3875372111797333, "learning_rate": 1.9566510685048752e-05, "loss": 0.2473, "step": 15248, "teacher_loss": 0.2317335158586502 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.4088730812072754, "learning_rate": 1.956434719142242e-05, "loss": 0.2391, "step": 15249, "teacher_loss": 0.22022981941699982 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.40942302346229553, "learning_rate": 1.956218359314929e-05, "loss": 0.2978, "step": 15250, "teacher_loss": 0.28534507751464844 }, { "epoch": 2.76, "eval_exact_match": 79.7445600756859, "eval_f1": 87.1903482811981, "step": 15250 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.41600728034973145, "learning_rate": 1.956001989027897e-05, "loss": 0.3106, "step": 15251, "teacher_loss": 0.29887473583221436 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.3719387650489807, "learning_rate": 1.9557856082861067e-05, "loss": 0.2447, "step": 15252, "teacher_loss": 0.23055724799633026 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.38669300079345703, "learning_rate": 1.9555692170945185e-05, "loss": 0.1926, "step": 15253, "teacher_loss": 0.17103198170661926 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.8626641035079956, "learning_rate": 1.9553528154580946e-05, "loss": 0.3235, "step": 15254, "teacher_loss": 0.26356542110443115 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.3669757843017578, "learning_rate": 1.9551364033817955e-05, "loss": 0.2944, "step": 15255, "teacher_loss": 0.2863014042377472 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.1520523577928543, "learning_rate": 1.9549199808705834e-05, "loss": 0.1871, "step": 15256, "teacher_loss": 0.19100482761859894 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.4765346944332123, "learning_rate": 1.9547035479294196e-05, "loss": 0.3719, "step": 15257, "teacher_loss": 0.3602708578109741 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.5200376510620117, "learning_rate": 1.9544871045632667e-05, "loss": 0.3026, "step": 15258, "teacher_loss": 0.27845266461372375 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.5695130228996277, "learning_rate": 1.954270650777088e-05, "loss": 0.2594, "step": 15259, "teacher_loss": 0.22493885457515717 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.41902047395706177, "learning_rate": 1.9540541865758446e-05, "loss": 0.2287, "step": 15260, "teacher_loss": 0.20757697522640228 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.32884740829467773, "learning_rate": 1.9538377119645e-05, "loss": 0.2366, "step": 15261, "teacher_loss": 0.2263159155845642 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.17576825618743896, "learning_rate": 1.9536212269480175e-05, "loss": 0.3022, "step": 15262, "teacher_loss": 0.31626319885253906 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.3241882920265198, "learning_rate": 1.95340473153136e-05, "loss": 0.2243, "step": 15263, "teacher_loss": 0.21318133175373077 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.67814040184021, "learning_rate": 1.9531882257194916e-05, "loss": 0.3581, "step": 15264, "teacher_loss": 0.3225933611392975 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.8148816227912903, "learning_rate": 1.9529717095173764e-05, "loss": 0.5407, "step": 15265, "teacher_loss": 0.5102474689483643 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.5089905858039856, "learning_rate": 1.9527551829299772e-05, "loss": 0.4243, "step": 15266, "teacher_loss": 0.4148995280265808 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.4893893003463745, "learning_rate": 1.9525386459622594e-05, "loss": 0.236, "step": 15267, "teacher_loss": 0.2078860104084015 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.287700891494751, "learning_rate": 1.9523220986191873e-05, "loss": 0.2523, "step": 15268, "teacher_loss": 0.2484011948108673 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.8317673206329346, "learning_rate": 1.9521055409057254e-05, "loss": 0.2652, "step": 15269, "teacher_loss": 0.2022302746772766 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.2522299289703369, "learning_rate": 1.9518889728268398e-05, "loss": 0.1665, "step": 15270, "teacher_loss": 0.1569472700357437 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.25777074694633484, "learning_rate": 1.9516723943874938e-05, "loss": 0.1945, "step": 15271, "teacher_loss": 0.18752457201480865 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.3481416702270508, "learning_rate": 1.9514558055926546e-05, "loss": 0.2466, "step": 15272, "teacher_loss": 0.2353469431400299 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.5196369886398315, "learning_rate": 1.951239206447287e-05, "loss": 0.3199, "step": 15273, "teacher_loss": 0.29775428771972656 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.4133542776107788, "learning_rate": 1.951022596956357e-05, "loss": 0.2576, "step": 15274, "teacher_loss": 0.2402483969926834 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.36540108919143677, "learning_rate": 1.9508059771248315e-05, "loss": 0.4227, "step": 15275, "teacher_loss": 0.42908233404159546 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.4989544749259949, "learning_rate": 1.9505893469576767e-05, "loss": 0.2677, "step": 15276, "teacher_loss": 0.2420121282339096 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.3012455105781555, "learning_rate": 1.9503727064598583e-05, "loss": 0.3607, "step": 15277, "teacher_loss": 0.367296427488327 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 1.009514331817627, "learning_rate": 1.950156055636345e-05, "loss": 0.4326, "step": 15278, "teacher_loss": 0.36853593587875366 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.2650626301765442, "learning_rate": 1.9499393944921027e-05, "loss": 0.2065, "step": 15279, "teacher_loss": 0.20004716515541077 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.31511303782463074, "learning_rate": 1.9497227230320992e-05, "loss": 0.1859, "step": 15280, "teacher_loss": 0.1715787649154663 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.45767858624458313, "learning_rate": 1.9495060412613018e-05, "loss": 0.2633, "step": 15281, "teacher_loss": 0.24173401296138763 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.4147833585739136, "learning_rate": 1.9492893491846783e-05, "loss": 0.2652, "step": 15282, "teacher_loss": 0.2485392987728119 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.660057783126831, "learning_rate": 1.9490726468071973e-05, "loss": 0.3465, "step": 15283, "teacher_loss": 0.3116726279258728 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.3907315135002136, "learning_rate": 1.9488559341338265e-05, "loss": 0.1918, "step": 15284, "teacher_loss": 0.16965213418006897 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.5510426759719849, "learning_rate": 1.9486392111695357e-05, "loss": 0.2491, "step": 15285, "teacher_loss": 0.2155945748090744 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.9693059921264648, "learning_rate": 1.9484224779192924e-05, "loss": 0.3176, "step": 15286, "teacher_loss": 0.24523687362670898 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.3047083020210266, "learning_rate": 1.9482057343880662e-05, "loss": 0.3054, "step": 15287, "teacher_loss": 0.3055236041545868 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.3794265389442444, "learning_rate": 1.9479889805808263e-05, "loss": 0.2015, "step": 15288, "teacher_loss": 0.1817849725484848 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.3205123543739319, "learning_rate": 1.9477722165025422e-05, "loss": 0.1689, "step": 15289, "teacher_loss": 0.1520829051733017 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.41750961542129517, "learning_rate": 1.9475554421581835e-05, "loss": 0.2558, "step": 15290, "teacher_loss": 0.23785004019737244 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.5345646142959595, "learning_rate": 1.9473386575527203e-05, "loss": 0.2643, "step": 15291, "teacher_loss": 0.23427341878414154 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.8752279281616211, "learning_rate": 1.947121862691123e-05, "loss": 0.7775, "step": 15292, "teacher_loss": 0.76664799451828 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.4988873600959778, "learning_rate": 1.9469050575783622e-05, "loss": 0.3353, "step": 15293, "teacher_loss": 0.3171077370643616 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.9151477217674255, "learning_rate": 1.9466882422194078e-05, "loss": 0.4427, "step": 15294, "teacher_loss": 0.3902362585067749 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.2877733111381531, "learning_rate": 1.9464714166192318e-05, "loss": 0.1664, "step": 15295, "teacher_loss": 0.1529541164636612 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.2663087248802185, "learning_rate": 1.9462545807828046e-05, "loss": 0.1695, "step": 15296, "teacher_loss": 0.15874534845352173 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.5477350950241089, "learning_rate": 1.946037734715098e-05, "loss": 0.3764, "step": 15297, "teacher_loss": 0.3573461174964905 }, { "compression_loss": 0.0, "epoch": 2.76, "label_loss": 0.6456408500671387, "learning_rate": 1.945820878421083e-05, "loss": 0.255, "step": 15298, "teacher_loss": 0.21157687902450562 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.9340426921844482, "learning_rate": 1.945604011905732e-05, "loss": 0.4878, "step": 15299, "teacher_loss": 0.4381910562515259 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.20558130741119385, "learning_rate": 1.9453871351740173e-05, "loss": 0.1843, "step": 15300, "teacher_loss": 0.18190276622772217 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.6753048896789551, "learning_rate": 1.9451702482309106e-05, "loss": 0.2425, "step": 15301, "teacher_loss": 0.19444096088409424 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.36088013648986816, "learning_rate": 1.944953351081385e-05, "loss": 0.2754, "step": 15302, "teacher_loss": 0.2658649682998657 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 1.4103915691375732, "learning_rate": 1.944736443730413e-05, "loss": 0.811, "step": 15303, "teacher_loss": 0.7443736791610718 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.31517595052719116, "learning_rate": 1.9445195261829676e-05, "loss": 0.181, "step": 15304, "teacher_loss": 0.1661023050546646 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.47994256019592285, "learning_rate": 1.9443025984440223e-05, "loss": 0.2559, "step": 15305, "teacher_loss": 0.23095327615737915 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.2898304760456085, "learning_rate": 1.944085660518551e-05, "loss": 0.2117, "step": 15306, "teacher_loss": 0.20302514731884003 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.33229899406433105, "learning_rate": 1.9438687124115262e-05, "loss": 0.2829, "step": 15307, "teacher_loss": 0.27739930152893066 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.46569719910621643, "learning_rate": 1.943651754127923e-05, "loss": 0.5578, "step": 15308, "teacher_loss": 0.5680338740348816 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.2508891820907593, "learning_rate": 1.943434785672715e-05, "loss": 0.2547, "step": 15309, "teacher_loss": 0.2550843358039856 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.4334692358970642, "learning_rate": 1.943217807050877e-05, "loss": 0.2678, "step": 15310, "teacher_loss": 0.24943788349628448 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.8599886894226074, "learning_rate": 1.9430008182673836e-05, "loss": 0.5846, "step": 15311, "teacher_loss": 0.5539662837982178 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.6390784978866577, "learning_rate": 1.9427838193272096e-05, "loss": 0.2868, "step": 15312, "teacher_loss": 0.24764171242713928 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.20647691190242767, "learning_rate": 1.94256681023533e-05, "loss": 0.2086, "step": 15313, "teacher_loss": 0.20881029963493347 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.13374051451683044, "learning_rate": 1.9423497909967207e-05, "loss": 0.1766, "step": 15314, "teacher_loss": 0.18137669563293457 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.4113527834415436, "learning_rate": 1.9421327616163564e-05, "loss": 0.2406, "step": 15315, "teacher_loss": 0.2216111123561859 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.5625593662261963, "learning_rate": 1.9419157220992134e-05, "loss": 0.3561, "step": 15316, "teacher_loss": 0.3331114649772644 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.69444739818573, "learning_rate": 1.9416986724502685e-05, "loss": 0.2253, "step": 15317, "teacher_loss": 0.17316468060016632 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.34339022636413574, "learning_rate": 1.9414816126744968e-05, "loss": 0.2026, "step": 15318, "teacher_loss": 0.18690776824951172 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.38562166690826416, "learning_rate": 1.941264542776876e-05, "loss": 0.2599, "step": 15319, "teacher_loss": 0.24588152766227722 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.5221768617630005, "learning_rate": 1.9410474627623813e-05, "loss": 0.2317, "step": 15320, "teacher_loss": 0.19938988983631134 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.5356217622756958, "learning_rate": 1.940830372635991e-05, "loss": 0.2818, "step": 15321, "teacher_loss": 0.2535601854324341 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.32742053270339966, "learning_rate": 1.940613272402682e-05, "loss": 0.217, "step": 15322, "teacher_loss": 0.20475628972053528 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.38767537474632263, "learning_rate": 1.9403961620674318e-05, "loss": 0.2005, "step": 15323, "teacher_loss": 0.17973214387893677 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.45740729570388794, "learning_rate": 1.9401790416352175e-05, "loss": 0.2768, "step": 15324, "teacher_loss": 0.2567288279533386 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.22176772356033325, "learning_rate": 1.9399619111110182e-05, "loss": 0.1659, "step": 15325, "teacher_loss": 0.15968795120716095 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.11045975238084793, "learning_rate": 1.9397447704998106e-05, "loss": 0.1375, "step": 15326, "teacher_loss": 0.14054521918296814 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.33058393001556396, "learning_rate": 1.939527619806574e-05, "loss": 0.2196, "step": 15327, "teacher_loss": 0.2073148936033249 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.33219119906425476, "learning_rate": 1.9393104590362875e-05, "loss": 0.2704, "step": 15328, "teacher_loss": 0.26353660225868225 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.22832022607326508, "learning_rate": 1.939093288193929e-05, "loss": 0.2118, "step": 15329, "teacher_loss": 0.2099297046661377 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.4364140033721924, "learning_rate": 1.9388761072844776e-05, "loss": 0.2729, "step": 15330, "teacher_loss": 0.2547116279602051 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.4045739769935608, "learning_rate": 1.938658916312913e-05, "loss": 0.2633, "step": 15331, "teacher_loss": 0.24755240976810455 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.510159969329834, "learning_rate": 1.9384417152842142e-05, "loss": 0.285, "step": 15332, "teacher_loss": 0.25997763872146606 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.6237562894821167, "learning_rate": 1.9382245042033626e-05, "loss": 0.3098, "step": 15333, "teacher_loss": 0.2749118506908417 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.119927778840065, "learning_rate": 1.9380072830753358e-05, "loss": 0.0954, "step": 15334, "teacher_loss": 0.09265954792499542 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.18138110637664795, "learning_rate": 1.9377900519051158e-05, "loss": 0.1797, "step": 15335, "teacher_loss": 0.17950746417045593 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.45671266317367554, "learning_rate": 1.9375728106976824e-05, "loss": 0.2856, "step": 15336, "teacher_loss": 0.2665832042694092 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.7529361248016357, "learning_rate": 1.9373555594580158e-05, "loss": 0.3408, "step": 15337, "teacher_loss": 0.294966459274292 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.3107438385486603, "learning_rate": 1.937138298191098e-05, "loss": 0.2642, "step": 15338, "teacher_loss": 0.2589777112007141 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.18152591586112976, "learning_rate": 1.9369210269019095e-05, "loss": 0.1591, "step": 15339, "teacher_loss": 0.15660011768341064 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.37157773971557617, "learning_rate": 1.936703745595432e-05, "loss": 0.3271, "step": 15340, "teacher_loss": 0.3221741318702698 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.37285345792770386, "learning_rate": 1.936486454276647e-05, "loss": 0.2483, "step": 15341, "teacher_loss": 0.23447629809379578 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.20179642736911774, "learning_rate": 1.936269152950536e-05, "loss": 0.1496, "step": 15342, "teacher_loss": 0.14383332431316376 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.6244723796844482, "learning_rate": 1.936051841622081e-05, "loss": 0.2874, "step": 15343, "teacher_loss": 0.2499159276485443 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.6533231735229492, "learning_rate": 1.935834520296265e-05, "loss": 0.4383, "step": 15344, "teacher_loss": 0.4144238829612732 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.3007589280605316, "learning_rate": 1.93561718897807e-05, "loss": 0.1902, "step": 15345, "teacher_loss": 0.17791253328323364 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.41964584589004517, "learning_rate": 1.9353998476724796e-05, "loss": 0.1869, "step": 15346, "teacher_loss": 0.16106031835079193 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.5858582258224487, "learning_rate": 1.9351824963844753e-05, "loss": 0.2894, "step": 15347, "teacher_loss": 0.25647345185279846 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.3030139207839966, "learning_rate": 1.9349651351190415e-05, "loss": 0.237, "step": 15348, "teacher_loss": 0.2297019064426422 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.48419642448425293, "learning_rate": 1.934747763881161e-05, "loss": 0.2392, "step": 15349, "teacher_loss": 0.21197223663330078 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.35033953189849854, "learning_rate": 1.9345303826758178e-05, "loss": 0.3075, "step": 15350, "teacher_loss": 0.30274152755737305 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.6534970998764038, "learning_rate": 1.9343129915079956e-05, "loss": 0.2609, "step": 15351, "teacher_loss": 0.21730080246925354 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.40793681144714355, "learning_rate": 1.9340955903826788e-05, "loss": 0.2294, "step": 15352, "teacher_loss": 0.20960572361946106 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.26840487122535706, "learning_rate": 1.933878179304852e-05, "loss": 0.2003, "step": 15353, "teacher_loss": 0.19271568953990936 }, { "compression_loss": 0.0, "epoch": 2.77, "label_loss": 0.3471326231956482, "learning_rate": 1.9336607582794985e-05, "loss": 0.2656, "step": 15354, "teacher_loss": 0.25653257966041565 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.20372895896434784, "learning_rate": 1.9334433273116046e-05, "loss": 0.1942, "step": 15355, "teacher_loss": 0.19309498369693756 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.3926127552986145, "learning_rate": 1.933225886406154e-05, "loss": 0.1904, "step": 15356, "teacher_loss": 0.16796863079071045 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.712735652923584, "learning_rate": 1.9330084355681335e-05, "loss": 0.4207, "step": 15357, "teacher_loss": 0.38825154304504395 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.41693758964538574, "learning_rate": 1.9327909748025278e-05, "loss": 0.2528, "step": 15358, "teacher_loss": 0.23458613455295563 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.8446159362792969, "learning_rate": 1.9325735041143222e-05, "loss": 0.3504, "step": 15359, "teacher_loss": 0.2955010235309601 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.5601364374160767, "learning_rate": 1.9323560235085033e-05, "loss": 0.3274, "step": 15360, "teacher_loss": 0.3015804588794708 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.5117583274841309, "learning_rate": 1.9321385329900573e-05, "loss": 0.3487, "step": 15361, "teacher_loss": 0.3305990695953369 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.44725680351257324, "learning_rate": 1.9319210325639696e-05, "loss": 0.1836, "step": 15362, "teacher_loss": 0.1543216109275818 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.38868963718414307, "learning_rate": 1.9317035222352285e-05, "loss": 0.2469, "step": 15363, "teacher_loss": 0.2311720997095108 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.5383819341659546, "learning_rate": 1.9314860020088194e-05, "loss": 0.2819, "step": 15364, "teacher_loss": 0.25344693660736084 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.8332241177558899, "learning_rate": 1.93126847188973e-05, "loss": 0.3981, "step": 15365, "teacher_loss": 0.3497142195701599 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.24943889677524567, "learning_rate": 1.931050931882948e-05, "loss": 0.2349, "step": 15366, "teacher_loss": 0.23329699039459229 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.3653255105018616, "learning_rate": 1.9308333819934598e-05, "loss": 0.2133, "step": 15367, "teacher_loss": 0.1963883936405182 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.8866627216339111, "learning_rate": 1.930615822226254e-05, "loss": 0.4229, "step": 15368, "teacher_loss": 0.37142395973205566 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.7773796319961548, "learning_rate": 1.9303982525863188e-05, "loss": 0.3685, "step": 15369, "teacher_loss": 0.32303106784820557 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.446260005235672, "learning_rate": 1.930180673078642e-05, "loss": 0.3041, "step": 15370, "teacher_loss": 0.2882963716983795 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.33706748485565186, "learning_rate": 1.929963083708212e-05, "loss": 0.3445, "step": 15371, "teacher_loss": 0.3453790545463562 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.4106626510620117, "learning_rate": 1.9297454844800172e-05, "loss": 0.2056, "step": 15372, "teacher_loss": 0.1828521341085434 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.3490701913833618, "learning_rate": 1.9295278753990475e-05, "loss": 0.2546, "step": 15373, "teacher_loss": 0.2441122978925705 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.5082021355628967, "learning_rate": 1.9293102564702912e-05, "loss": 0.2587, "step": 15374, "teacher_loss": 0.23097503185272217 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.39774590730667114, "learning_rate": 1.9290926276987373e-05, "loss": 0.2017, "step": 15375, "teacher_loss": 0.17990842461585999 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.35292062163352966, "learning_rate": 1.9288749890893768e-05, "loss": 0.2353, "step": 15376, "teacher_loss": 0.22222284972667694 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.20118297636508942, "learning_rate": 1.928657340647198e-05, "loss": 0.2515, "step": 15377, "teacher_loss": 0.25704899430274963 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.5447788238525391, "learning_rate": 1.9284396823771922e-05, "loss": 0.2812, "step": 15378, "teacher_loss": 0.251919150352478 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.5870913863182068, "learning_rate": 1.928222014284348e-05, "loss": 0.4063, "step": 15379, "teacher_loss": 0.38619673252105713 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.7765418887138367, "learning_rate": 1.928004336373658e-05, "loss": 0.3454, "step": 15380, "teacher_loss": 0.2975092828273773 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.3485333025455475, "learning_rate": 1.9277866486501113e-05, "loss": 0.2375, "step": 15381, "teacher_loss": 0.22516947984695435 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.38803336024284363, "learning_rate": 1.9275689511186995e-05, "loss": 0.2028, "step": 15382, "teacher_loss": 0.18218934535980225 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.3977811932563782, "learning_rate": 1.9273512437844135e-05, "loss": 0.2817, "step": 15383, "teacher_loss": 0.26877161860466003 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.6130324602127075, "learning_rate": 1.9271335266522443e-05, "loss": 0.2424, "step": 15384, "teacher_loss": 0.20116689801216125 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.5464110374450684, "learning_rate": 1.9269157997271846e-05, "loss": 0.3315, "step": 15385, "teacher_loss": 0.30760204792022705 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.41381633281707764, "learning_rate": 1.9266980630142252e-05, "loss": 0.243, "step": 15386, "teacher_loss": 0.2240362912416458 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.2740339934825897, "learning_rate": 1.9264803165183585e-05, "loss": 0.3827, "step": 15387, "teacher_loss": 0.39475834369659424 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.21634894609451294, "learning_rate": 1.9262625602445773e-05, "loss": 0.2197, "step": 15388, "teacher_loss": 0.2201223373413086 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.19091299176216125, "learning_rate": 1.926044794197873e-05, "loss": 0.2231, "step": 15389, "teacher_loss": 0.22667405009269714 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.5715618133544922, "learning_rate": 1.925827018383239e-05, "loss": 0.2813, "step": 15390, "teacher_loss": 0.24906259775161743 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.15555621683597565, "learning_rate": 1.9256092328056685e-05, "loss": 0.2894, "step": 15391, "teacher_loss": 0.304284930229187 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.8207415342330933, "learning_rate": 1.925391437470154e-05, "loss": 0.5705, "step": 15392, "teacher_loss": 0.5427036285400391 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.142722025513649, "learning_rate": 1.9251736323816897e-05, "loss": 0.1684, "step": 15393, "teacher_loss": 0.1712525635957718 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.3283693790435791, "learning_rate": 1.924955817545268e-05, "loss": 0.2054, "step": 15394, "teacher_loss": 0.19175118207931519 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.20660507678985596, "learning_rate": 1.924737992965884e-05, "loss": 0.1957, "step": 15395, "teacher_loss": 0.19450122117996216 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.26444393396377563, "learning_rate": 1.924520158648531e-05, "loss": 0.2174, "step": 15396, "teacher_loss": 0.2121778130531311 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.22841104865074158, "learning_rate": 1.9243023145982037e-05, "loss": 0.2033, "step": 15397, "teacher_loss": 0.20048648118972778 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.29124128818511963, "learning_rate": 1.9240844608198964e-05, "loss": 0.1725, "step": 15398, "teacher_loss": 0.15928146243095398 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.7308986186981201, "learning_rate": 1.9238665973186037e-05, "loss": 0.299, "step": 15399, "teacher_loss": 0.2510484457015991 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.20163044333457947, "learning_rate": 1.923648724099321e-05, "loss": 0.1943, "step": 15400, "teacher_loss": 0.19347965717315674 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.5834843516349792, "learning_rate": 1.9234308411670435e-05, "loss": 0.2558, "step": 15401, "teacher_loss": 0.21933594346046448 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.62132328748703, "learning_rate": 1.923212948526766e-05, "loss": 0.3951, "step": 15402, "teacher_loss": 0.369960218667984 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.3575960397720337, "learning_rate": 1.9229950461834845e-05, "loss": 0.2526, "step": 15403, "teacher_loss": 0.24092787504196167 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.4675140380859375, "learning_rate": 1.9227771341421945e-05, "loss": 0.2513, "step": 15404, "teacher_loss": 0.2272489070892334 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.32540085911750793, "learning_rate": 1.9225592124078925e-05, "loss": 0.2055, "step": 15405, "teacher_loss": 0.19222715497016907 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.7232991456985474, "learning_rate": 1.922341280985575e-05, "loss": 0.4933, "step": 15406, "teacher_loss": 0.4677307605743408 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.27725711464881897, "learning_rate": 1.922123339880238e-05, "loss": 0.204, "step": 15407, "teacher_loss": 0.19587118923664093 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.6356396675109863, "learning_rate": 1.9219053890968782e-05, "loss": 0.4599, "step": 15408, "teacher_loss": 0.44042283296585083 }, { "compression_loss": 0.0, "epoch": 2.78, "label_loss": 0.21995791792869568, "learning_rate": 1.9216874286404927e-05, "loss": 0.1643, "step": 15409, "teacher_loss": 0.15806464850902557 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.419097363948822, "learning_rate": 1.921469458516079e-05, "loss": 0.3189, "step": 15410, "teacher_loss": 0.3078019618988037 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.37716197967529297, "learning_rate": 1.921251478728634e-05, "loss": 0.3801, "step": 15411, "teacher_loss": 0.38041630387306213 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.6402702927589417, "learning_rate": 1.9210334892831562e-05, "loss": 0.3405, "step": 15412, "teacher_loss": 0.30720949172973633 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.3596067428588867, "learning_rate": 1.9208154901846422e-05, "loss": 0.2027, "step": 15413, "teacher_loss": 0.18524807691574097 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.6070290803909302, "learning_rate": 1.9205974814380906e-05, "loss": 0.374, "step": 15414, "teacher_loss": 0.348120778799057 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.3950555920600891, "learning_rate": 1.9203794630485004e-05, "loss": 0.2118, "step": 15415, "teacher_loss": 0.19141331315040588 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.4933011531829834, "learning_rate": 1.920161435020869e-05, "loss": 0.3456, "step": 15416, "teacher_loss": 0.32919132709503174 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.5695645809173584, "learning_rate": 1.9199433973601955e-05, "loss": 0.2451, "step": 15417, "teacher_loss": 0.20904628932476044 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.21928316354751587, "learning_rate": 1.9197253500714797e-05, "loss": 0.2482, "step": 15418, "teacher_loss": 0.2514092028141022 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.7365667819976807, "learning_rate": 1.919507293159719e-05, "loss": 0.4647, "step": 15419, "teacher_loss": 0.4345453381538391 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.5829306244850159, "learning_rate": 1.9192892266299144e-05, "loss": 0.2626, "step": 15420, "teacher_loss": 0.2270369678735733 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.6354122757911682, "learning_rate": 1.919071150487065e-05, "loss": 0.28, "step": 15421, "teacher_loss": 0.24051980674266815 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.2125316858291626, "learning_rate": 1.9188530647361704e-05, "loss": 0.2635, "step": 15422, "teacher_loss": 0.2692033052444458 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.4536246657371521, "learning_rate": 1.9186349693822312e-05, "loss": 0.2328, "step": 15423, "teacher_loss": 0.20824947953224182 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.6706361174583435, "learning_rate": 1.9184168644302466e-05, "loss": 0.25, "step": 15424, "teacher_loss": 0.20325596630573273 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.5530021786689758, "learning_rate": 1.9181987498852175e-05, "loss": 0.3416, "step": 15425, "teacher_loss": 0.318141371011734 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.15645934641361237, "learning_rate": 1.917980625752146e-05, "loss": 0.2224, "step": 15426, "teacher_loss": 0.22971150279045105 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.7199164628982544, "learning_rate": 1.917762492036031e-05, "loss": 0.4066, "step": 15427, "teacher_loss": 0.37179577350616455 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.4001297950744629, "learning_rate": 1.9175443487418754e-05, "loss": 0.1977, "step": 15428, "teacher_loss": 0.17515522241592407 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.9426354169845581, "learning_rate": 1.9173261958746793e-05, "loss": 0.3102, "step": 15429, "teacher_loss": 0.23995313048362732 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.5005701184272766, "learning_rate": 1.9171080334394444e-05, "loss": 0.2994, "step": 15430, "teacher_loss": 0.2769980728626251 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.21406710147857666, "learning_rate": 1.9168898614411733e-05, "loss": 0.2847, "step": 15431, "teacher_loss": 0.29255950450897217 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.4387319087982178, "learning_rate": 1.9166716798848676e-05, "loss": 0.2175, "step": 15432, "teacher_loss": 0.19287964701652527 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.4194740355014801, "learning_rate": 1.9164534887755292e-05, "loss": 0.2878, "step": 15433, "teacher_loss": 0.2731245756149292 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.3970203399658203, "learning_rate": 1.916235288118161e-05, "loss": 0.2829, "step": 15434, "teacher_loss": 0.2701733112335205 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.7173104882240295, "learning_rate": 1.9160170779177657e-05, "loss": 0.4083, "step": 15435, "teacher_loss": 0.3739841878414154 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.5822244882583618, "learning_rate": 1.9157988581793463e-05, "loss": 0.3525, "step": 15436, "teacher_loss": 0.32701003551483154 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.9780691862106323, "learning_rate": 1.9155806289079053e-05, "loss": 0.3841, "step": 15437, "teacher_loss": 0.31810736656188965 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.5977356433868408, "learning_rate": 1.915362390108447e-05, "loss": 0.3182, "step": 15438, "teacher_loss": 0.28715863823890686 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.48312854766845703, "learning_rate": 1.9151441417859733e-05, "loss": 0.2482, "step": 15439, "teacher_loss": 0.22208517789840698 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.4813908338546753, "learning_rate": 1.91492588394549e-05, "loss": 0.3248, "step": 15440, "teacher_loss": 0.30738508701324463 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.14062903821468353, "learning_rate": 1.914707616592e-05, "loss": 0.1747, "step": 15441, "teacher_loss": 0.17854070663452148 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.18599435687065125, "learning_rate": 1.9144893397305077e-05, "loss": 0.2603, "step": 15442, "teacher_loss": 0.2685200572013855 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.38677364587783813, "learning_rate": 1.914271053366018e-05, "loss": 0.2403, "step": 15443, "teacher_loss": 0.22402793169021606 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.4892314076423645, "learning_rate": 1.914052757503534e-05, "loss": 0.2413, "step": 15444, "teacher_loss": 0.2137695997953415 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.3936445116996765, "learning_rate": 1.9138344521480628e-05, "loss": 0.1686, "step": 15445, "teacher_loss": 0.14355415105819702 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.35831132531166077, "learning_rate": 1.913616137304608e-05, "loss": 0.2937, "step": 15446, "teacher_loss": 0.2865021824836731 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.9481849074363708, "learning_rate": 1.9133978129781748e-05, "loss": 0.351, "step": 15447, "teacher_loss": 0.28465744853019714 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.48017382621765137, "learning_rate": 1.9131794791737704e-05, "loss": 0.2008, "step": 15448, "teacher_loss": 0.16977854073047638 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.16602830588817596, "learning_rate": 1.9129611358963978e-05, "loss": 0.1897, "step": 15449, "teacher_loss": 0.1923007369041443 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.45903316140174866, "learning_rate": 1.912742783151065e-05, "loss": 0.2485, "step": 15450, "teacher_loss": 0.2251119613647461 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.5154883861541748, "learning_rate": 1.912524420942778e-05, "loss": 0.3216, "step": 15451, "teacher_loss": 0.300012469291687 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.548113226890564, "learning_rate": 1.9123060492765422e-05, "loss": 0.4535, "step": 15452, "teacher_loss": 0.442943274974823 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.3259221911430359, "learning_rate": 1.9120876681573656e-05, "loss": 0.2872, "step": 15453, "teacher_loss": 0.28293758630752563 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.3390854001045227, "learning_rate": 1.911869277590254e-05, "loss": 0.3488, "step": 15454, "teacher_loss": 0.3499288260936737 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.5569580793380737, "learning_rate": 1.9116508775802143e-05, "loss": 0.2861, "step": 15455, "teacher_loss": 0.2560235857963562 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.371101438999176, "learning_rate": 1.911432468132255e-05, "loss": 0.3239, "step": 15456, "teacher_loss": 0.31864434480667114 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.7027199268341064, "learning_rate": 1.911214049251382e-05, "loss": 0.2799, "step": 15457, "teacher_loss": 0.23296645283699036 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.5315957069396973, "learning_rate": 1.910995620942604e-05, "loss": 0.2437, "step": 15458, "teacher_loss": 0.21166233718395233 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.7411613464355469, "learning_rate": 1.9107771832109288e-05, "loss": 0.697, "step": 15459, "teacher_loss": 0.6921473741531372 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.5061233043670654, "learning_rate": 1.9105587360613642e-05, "loss": 0.2554, "step": 15460, "teacher_loss": 0.2275104820728302 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.8083809614181519, "learning_rate": 1.9103402794989186e-05, "loss": 0.3693, "step": 15461, "teacher_loss": 0.32046806812286377 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.3506333529949188, "learning_rate": 1.9101218135286007e-05, "loss": 0.1816, "step": 15462, "teacher_loss": 0.16281311213970184 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.45168614387512207, "learning_rate": 1.9099033381554192e-05, "loss": 0.2991, "step": 15463, "teacher_loss": 0.28219398856163025 }, { "compression_loss": 0.0, "epoch": 2.79, "label_loss": 0.6518122553825378, "learning_rate": 1.9096848533843834e-05, "loss": 0.2654, "step": 15464, "teacher_loss": 0.22241343557834625 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.20811881124973297, "learning_rate": 1.9094663592205017e-05, "loss": 0.208, "step": 15465, "teacher_loss": 0.20794963836669922 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.3877553343772888, "learning_rate": 1.9092478556687848e-05, "loss": 0.315, "step": 15466, "teacher_loss": 0.30690741539001465 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.3082616329193115, "learning_rate": 1.9090293427342406e-05, "loss": 0.2151, "step": 15467, "teacher_loss": 0.20477703213691711 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.45852968096733093, "learning_rate": 1.908810820421881e-05, "loss": 0.386, "step": 15468, "teacher_loss": 0.3779807686805725 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.411659836769104, "learning_rate": 1.9085922887367138e-05, "loss": 0.2071, "step": 15469, "teacher_loss": 0.18435998260974884 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.46716147661209106, "learning_rate": 1.9083737476837512e-05, "loss": 0.281, "step": 15470, "teacher_loss": 0.2603006064891815 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.5763517022132874, "learning_rate": 1.9081551972680025e-05, "loss": 0.3504, "step": 15471, "teacher_loss": 0.32532578706741333 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.22218668460845947, "learning_rate": 1.907936637494479e-05, "loss": 0.1967, "step": 15472, "teacher_loss": 0.19387857615947723 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.24990811944007874, "learning_rate": 1.9077180683681914e-05, "loss": 0.286, "step": 15473, "teacher_loss": 0.2900213301181793 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.43082424998283386, "learning_rate": 1.907499489894151e-05, "loss": 0.2383, "step": 15474, "teacher_loss": 0.21685364842414856 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.5592328906059265, "learning_rate": 1.9072809020773696e-05, "loss": 0.2134, "step": 15475, "teacher_loss": 0.1749754548072815 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.7293300032615662, "learning_rate": 1.9070623049228575e-05, "loss": 0.2904, "step": 15476, "teacher_loss": 0.24163228273391724 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.6213291883468628, "learning_rate": 1.9068436984356274e-05, "loss": 0.2533, "step": 15477, "teacher_loss": 0.21244600415229797 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.5540156960487366, "learning_rate": 1.906625082620691e-05, "loss": 0.2158, "step": 15478, "teacher_loss": 0.1782737523317337 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.1648986041545868, "learning_rate": 1.9064064574830606e-05, "loss": 0.2153, "step": 15479, "teacher_loss": 0.22089609503746033 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.23908287286758423, "learning_rate": 1.9061878230277485e-05, "loss": 0.2692, "step": 15480, "teacher_loss": 0.272597074508667 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 1.263882040977478, "learning_rate": 1.905969179259768e-05, "loss": 0.3981, "step": 15481, "teacher_loss": 0.30194544792175293 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.1613868921995163, "learning_rate": 1.9057505261841305e-05, "loss": 0.212, "step": 15482, "teacher_loss": 0.21761903166770935 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.20352189242839813, "learning_rate": 1.9055318638058504e-05, "loss": 0.2041, "step": 15483, "teacher_loss": 0.20414231717586517 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.2667786180973053, "learning_rate": 1.9053131921299404e-05, "loss": 0.1904, "step": 15484, "teacher_loss": 0.18195980787277222 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 1.0054373741149902, "learning_rate": 1.9050945111614142e-05, "loss": 0.2998, "step": 15485, "teacher_loss": 0.2213945835828781 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.17990493774414062, "learning_rate": 1.9048758209052856e-05, "loss": 0.2183, "step": 15486, "teacher_loss": 0.22258487343788147 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.681825578212738, "learning_rate": 1.904657121366568e-05, "loss": 0.348, "step": 15487, "teacher_loss": 0.31090688705444336 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.578462541103363, "learning_rate": 1.904438412550276e-05, "loss": 0.2308, "step": 15488, "teacher_loss": 0.19216594099998474 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.44593191146850586, "learning_rate": 1.9042196944614234e-05, "loss": 0.2323, "step": 15489, "teacher_loss": 0.2085266411304474 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.42436695098876953, "learning_rate": 1.9040009671050253e-05, "loss": 0.3933, "step": 15490, "teacher_loss": 0.389850914478302 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.2601102888584137, "learning_rate": 1.9037822304860967e-05, "loss": 0.2496, "step": 15491, "teacher_loss": 0.24843838810920715 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.37709468603134155, "learning_rate": 1.903563484609652e-05, "loss": 0.2585, "step": 15492, "teacher_loss": 0.24529266357421875 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.7521003484725952, "learning_rate": 1.903344729480706e-05, "loss": 0.4214, "step": 15493, "teacher_loss": 0.3846304416656494 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.525193452835083, "learning_rate": 1.903125965104275e-05, "loss": 0.366, "step": 15494, "teacher_loss": 0.3483607769012451 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.232259601354599, "learning_rate": 1.902907191485374e-05, "loss": 0.207, "step": 15495, "teacher_loss": 0.2042347490787506 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.46593573689460754, "learning_rate": 1.9026884086290196e-05, "loss": 0.1987, "step": 15496, "teacher_loss": 0.1689864844083786 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.47424525022506714, "learning_rate": 1.9024696165402272e-05, "loss": 0.3479, "step": 15497, "teacher_loss": 0.3339027762413025 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.1579851508140564, "learning_rate": 1.902250815224013e-05, "loss": 0.2085, "step": 15498, "teacher_loss": 0.2141035944223404 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.3872631788253784, "learning_rate": 1.9020320046853935e-05, "loss": 0.283, "step": 15499, "teacher_loss": 0.2714151442050934 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.6938329935073853, "learning_rate": 1.9018131849293856e-05, "loss": 0.2964, "step": 15500, "teacher_loss": 0.2521893084049225 }, { "epoch": 2.8, "eval_exact_match": 80.08514664143803, "eval_f1": 87.3458180711934, "step": 15500 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.32541730999946594, "learning_rate": 1.9015943559610063e-05, "loss": 0.334, "step": 15501, "teacher_loss": 0.33500736951828003 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.5618366003036499, "learning_rate": 1.901375517785272e-05, "loss": 0.294, "step": 15502, "teacher_loss": 0.2642078995704651 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.7149922251701355, "learning_rate": 1.9011566704072007e-05, "loss": 0.372, "step": 15503, "teacher_loss": 0.3338811993598938 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 1.0027284622192383, "learning_rate": 1.90093781383181e-05, "loss": 0.6309, "step": 15504, "teacher_loss": 0.5895869731903076 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.3709099590778351, "learning_rate": 1.9007189480641168e-05, "loss": 0.303, "step": 15505, "teacher_loss": 0.2954355776309967 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.7155470252037048, "learning_rate": 1.90050007310914e-05, "loss": 0.3456, "step": 15506, "teacher_loss": 0.3044409453868866 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.28512662649154663, "learning_rate": 1.9002811889718966e-05, "loss": 0.2266, "step": 15507, "teacher_loss": 0.22005261480808258 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.2743754982948303, "learning_rate": 1.9000622956574063e-05, "loss": 0.2676, "step": 15508, "teacher_loss": 0.266897976398468 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.36258500814437866, "learning_rate": 1.8998433931706868e-05, "loss": 0.213, "step": 15509, "teacher_loss": 0.19641447067260742 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 1.0479073524475098, "learning_rate": 1.8996244815167568e-05, "loss": 1.0109, "step": 15510, "teacher_loss": 1.0067338943481445 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.7324337363243103, "learning_rate": 1.8994055607006363e-05, "loss": 0.2663, "step": 15511, "teacher_loss": 0.21455280482769012 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.2905508279800415, "learning_rate": 1.899186630727343e-05, "loss": 0.1903, "step": 15512, "teacher_loss": 0.17915646731853485 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.4828190207481384, "learning_rate": 1.8989676916018976e-05, "loss": 0.218, "step": 15513, "teacher_loss": 0.18862539529800415 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.2640182077884674, "learning_rate": 1.898748743329319e-05, "loss": 0.1846, "step": 15514, "teacher_loss": 0.17574471235275269 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.17918409407138824, "learning_rate": 1.898529785914627e-05, "loss": 0.3063, "step": 15515, "teacher_loss": 0.32046833634376526 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.29043838381767273, "learning_rate": 1.8983108193628425e-05, "loss": 0.2327, "step": 15516, "teacher_loss": 0.22626343369483948 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.44323647022247314, "learning_rate": 1.898091843678984e-05, "loss": 0.2247, "step": 15517, "teacher_loss": 0.2003874033689499 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.5753565430641174, "learning_rate": 1.8978728588680744e-05, "loss": 0.2813, "step": 15518, "teacher_loss": 0.24860483407974243 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.5129090547561646, "learning_rate": 1.897653864935132e-05, "loss": 0.2844, "step": 15519, "teacher_loss": 0.2589606046676636 }, { "compression_loss": 0.0, "epoch": 2.8, "label_loss": 0.3188718557357788, "learning_rate": 1.897434861885179e-05, "loss": 0.2179, "step": 15520, "teacher_loss": 0.2066279500722885 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.35984236001968384, "learning_rate": 1.8972158497232365e-05, "loss": 0.2603, "step": 15521, "teacher_loss": 0.24920672178268433 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.264047235250473, "learning_rate": 1.896996828454325e-05, "loss": 0.1933, "step": 15522, "teacher_loss": 0.18547002971172333 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.4257519543170929, "learning_rate": 1.8967777980834668e-05, "loss": 0.2409, "step": 15523, "teacher_loss": 0.22037647664546967 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.16302679479122162, "learning_rate": 1.8965587586156833e-05, "loss": 0.1563, "step": 15524, "teacher_loss": 0.15558823943138123 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.4739953279495239, "learning_rate": 1.8963397100559965e-05, "loss": 0.3629, "step": 15525, "teacher_loss": 0.35060369968414307 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.33425238728523254, "learning_rate": 1.8961206524094284e-05, "loss": 0.3309, "step": 15526, "teacher_loss": 0.33055824041366577 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.3556947112083435, "learning_rate": 1.895901585681001e-05, "loss": 0.1528, "step": 15527, "teacher_loss": 0.13030007481575012 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.22201834619045258, "learning_rate": 1.8956825098757377e-05, "loss": 0.2478, "step": 15528, "teacher_loss": 0.2506236135959625 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.2079494446516037, "learning_rate": 1.8954634249986602e-05, "loss": 0.184, "step": 15529, "teacher_loss": 0.18128818273544312 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.2944962680339813, "learning_rate": 1.8952443310547926e-05, "loss": 0.2676, "step": 15530, "teacher_loss": 0.2645842432975769 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.1734279841184616, "learning_rate": 1.895025228049157e-05, "loss": 0.2391, "step": 15531, "teacher_loss": 0.24639226496219635 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.6183395981788635, "learning_rate": 1.8948061159867774e-05, "loss": 0.3579, "step": 15532, "teacher_loss": 0.32891008257865906 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.507132887840271, "learning_rate": 1.8945869948726774e-05, "loss": 0.2778, "step": 15533, "teacher_loss": 0.2522915005683899 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.6699087619781494, "learning_rate": 1.894367864711881e-05, "loss": 0.3231, "step": 15534, "teacher_loss": 0.2845231294631958 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.41367071866989136, "learning_rate": 1.8941487255094112e-05, "loss": 0.2485, "step": 15535, "teacher_loss": 0.23012655973434448 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.7437030076980591, "learning_rate": 1.8939295772702933e-05, "loss": 0.2718, "step": 15536, "teacher_loss": 0.21932527422904968 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.37080633640289307, "learning_rate": 1.893710419999551e-05, "loss": 0.2116, "step": 15537, "teacher_loss": 0.19390274584293365 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.42633703351020813, "learning_rate": 1.8934912537022094e-05, "loss": 0.1711, "step": 15538, "teacher_loss": 0.14274394512176514 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.37351471185684204, "learning_rate": 1.8932720783832926e-05, "loss": 0.2574, "step": 15539, "teacher_loss": 0.24454814195632935 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.2345258593559265, "learning_rate": 1.8930528940478263e-05, "loss": 0.1896, "step": 15540, "teacher_loss": 0.18465213477611542 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.6827257871627808, "learning_rate": 1.892833700700836e-05, "loss": 0.2493, "step": 15541, "teacher_loss": 0.20110656321048737 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.4447067379951477, "learning_rate": 1.8926144983473464e-05, "loss": 0.2693, "step": 15542, "teacher_loss": 0.24985337257385254 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.21103844046592712, "learning_rate": 1.8923952869923836e-05, "loss": 0.2143, "step": 15543, "teacher_loss": 0.21465227007865906 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.2783176898956299, "learning_rate": 1.8921760666409734e-05, "loss": 0.2716, "step": 15544, "teacher_loss": 0.2708452343940735 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.8818731307983398, "learning_rate": 1.891956837298141e-05, "loss": 0.8136, "step": 15545, "teacher_loss": 0.8059999942779541 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.4194653630256653, "learning_rate": 1.8917375989689146e-05, "loss": 0.2238, "step": 15546, "teacher_loss": 0.20201988518238068 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.9790559411048889, "learning_rate": 1.8915183516583194e-05, "loss": 0.5023, "step": 15547, "teacher_loss": 0.44937628507614136 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.2915409803390503, "learning_rate": 1.8912990953713812e-05, "loss": 0.195, "step": 15548, "teacher_loss": 0.18431204557418823 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.42722997069358826, "learning_rate": 1.891079830113129e-05, "loss": 0.3306, "step": 15549, "teacher_loss": 0.319845050573349 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.42971712350845337, "learning_rate": 1.8908605558885882e-05, "loss": 0.2558, "step": 15550, "teacher_loss": 0.2364306002855301 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.4738118350505829, "learning_rate": 1.8906412727027873e-05, "loss": 0.2865, "step": 15551, "teacher_loss": 0.2657358646392822 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.34051600098609924, "learning_rate": 1.8904219805607527e-05, "loss": 0.1948, "step": 15552, "teacher_loss": 0.17857897281646729 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.38116997480392456, "learning_rate": 1.8902026794675124e-05, "loss": 0.2724, "step": 15553, "teacher_loss": 0.2602729797363281 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.5266185402870178, "learning_rate": 1.8899833694280952e-05, "loss": 0.2993, "step": 15554, "teacher_loss": 0.27399206161499023 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.3713909685611725, "learning_rate": 1.8897640504475283e-05, "loss": 0.2725, "step": 15555, "teacher_loss": 0.26149219274520874 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.4412344694137573, "learning_rate": 1.8895447225308403e-05, "loss": 0.2516, "step": 15556, "teacher_loss": 0.2305634617805481 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.6124913692474365, "learning_rate": 1.8893253856830597e-05, "loss": 0.409, "step": 15557, "teacher_loss": 0.3864133059978485 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 1.1241494417190552, "learning_rate": 1.8891060399092153e-05, "loss": 0.2867, "step": 15558, "teacher_loss": 0.193625807762146 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.42997950315475464, "learning_rate": 1.8888866852143363e-05, "loss": 0.3795, "step": 15559, "teacher_loss": 0.37392929196357727 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.39414238929748535, "learning_rate": 1.8886673216034513e-05, "loss": 0.1887, "step": 15560, "teacher_loss": 0.16592717170715332 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.43040138483047485, "learning_rate": 1.88844794908159e-05, "loss": 0.2094, "step": 15561, "teacher_loss": 0.18483954668045044 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.3976280689239502, "learning_rate": 1.888228567653781e-05, "loss": 0.2363, "step": 15562, "teacher_loss": 0.21836315095424652 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.38169950246810913, "learning_rate": 1.8880091773250558e-05, "loss": 0.2478, "step": 15563, "teacher_loss": 0.23293665051460266 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.3617252707481384, "learning_rate": 1.8877897781004435e-05, "loss": 0.2197, "step": 15564, "teacher_loss": 0.2039305865764618 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 1.1804354190826416, "learning_rate": 1.887570369984974e-05, "loss": 0.2984, "step": 15565, "teacher_loss": 0.20037254691123962 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.5848848819732666, "learning_rate": 1.887350952983678e-05, "loss": 0.2582, "step": 15566, "teacher_loss": 0.2219366729259491 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.4347054660320282, "learning_rate": 1.8871315271015857e-05, "loss": 0.228, "step": 15567, "teacher_loss": 0.20500747859477997 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.419778048992157, "learning_rate": 1.8869120923437284e-05, "loss": 0.2792, "step": 15568, "teacher_loss": 0.2636294960975647 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.2278396487236023, "learning_rate": 1.8866926487151374e-05, "loss": 0.1876, "step": 15569, "teacher_loss": 0.18314608931541443 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.8811348676681519, "learning_rate": 1.8864731962208422e-05, "loss": 0.3872, "step": 15570, "teacher_loss": 0.3323003649711609 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.5627462267875671, "learning_rate": 1.8862537348658764e-05, "loss": 0.3403, "step": 15571, "teacher_loss": 0.31559625267982483 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.5714628100395203, "learning_rate": 1.8860342646552698e-05, "loss": 0.2617, "step": 15572, "teacher_loss": 0.22725656628608704 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.3889523148536682, "learning_rate": 1.8858147855940546e-05, "loss": 0.2418, "step": 15573, "teacher_loss": 0.22547374665737152 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.44961076974868774, "learning_rate": 1.885595297687264e-05, "loss": 0.2337, "step": 15574, "teacher_loss": 0.20969700813293457 }, { "compression_loss": 0.0, "epoch": 2.81, "label_loss": 0.5013306736946106, "learning_rate": 1.8853758009399288e-05, "loss": 0.3811, "step": 15575, "teacher_loss": 0.3677855134010315 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.47343164682388306, "learning_rate": 1.8851562953570824e-05, "loss": 0.2834, "step": 15576, "teacher_loss": 0.2622416615486145 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.6585253477096558, "learning_rate": 1.8849367809437562e-05, "loss": 0.5735, "step": 15577, "teacher_loss": 0.5640000104904175 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.2611042857170105, "learning_rate": 1.8847172577049837e-05, "loss": 0.1916, "step": 15578, "teacher_loss": 0.18387891352176666 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.36949077248573303, "learning_rate": 1.8844977256457985e-05, "loss": 0.252, "step": 15579, "teacher_loss": 0.23896434903144836 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.5243006944656372, "learning_rate": 1.884278184771233e-05, "loss": 0.2923, "step": 15580, "teacher_loss": 0.2665550112724304 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.4477646052837372, "learning_rate": 1.8840586350863207e-05, "loss": 0.2392, "step": 15581, "teacher_loss": 0.21598154306411743 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.17496567964553833, "learning_rate": 1.8838390765960956e-05, "loss": 0.1921, "step": 15582, "teacher_loss": 0.19396603107452393 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.8789926171302795, "learning_rate": 1.883619509305591e-05, "loss": 0.4427, "step": 15583, "teacher_loss": 0.3941769003868103 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.2837710976600647, "learning_rate": 1.8833999332198418e-05, "loss": 0.2505, "step": 15584, "teacher_loss": 0.24682967364788055 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 1.0828001499176025, "learning_rate": 1.883180348343881e-05, "loss": 0.3935, "step": 15585, "teacher_loss": 0.3168558180332184 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.34016817808151245, "learning_rate": 1.8829607546827438e-05, "loss": 0.2511, "step": 15586, "teacher_loss": 0.24124349653720856 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.45018357038497925, "learning_rate": 1.8827411522414647e-05, "loss": 0.3647, "step": 15587, "teacher_loss": 0.35515522956848145 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.2050551474094391, "learning_rate": 1.8825215410250784e-05, "loss": 0.2142, "step": 15588, "teacher_loss": 0.21523958444595337 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.28036320209503174, "learning_rate": 1.8823019210386204e-05, "loss": 0.1926, "step": 15589, "teacher_loss": 0.1828005313873291 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.4579232335090637, "learning_rate": 1.8820822922871254e-05, "loss": 0.2495, "step": 15590, "teacher_loss": 0.22639469802379608 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.20725765824317932, "learning_rate": 1.881862654775629e-05, "loss": 0.1704, "step": 15591, "teacher_loss": 0.16634106636047363 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.33307963609695435, "learning_rate": 1.8816430085091663e-05, "loss": 0.2136, "step": 15592, "teacher_loss": 0.20032566785812378 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.32874059677124023, "learning_rate": 1.881423353492774e-05, "loss": 0.243, "step": 15593, "teacher_loss": 0.2335001826286316 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.9663118124008179, "learning_rate": 1.881203689731488e-05, "loss": 0.3227, "step": 15594, "teacher_loss": 0.2512326240539551 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.4488135874271393, "learning_rate": 1.8809840172303435e-05, "loss": 0.2905, "step": 15595, "teacher_loss": 0.272901713848114 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.46793121099472046, "learning_rate": 1.8807643359943788e-05, "loss": 0.3281, "step": 15596, "teacher_loss": 0.31253546476364136 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.5057793855667114, "learning_rate": 1.880544646028629e-05, "loss": 0.4708, "step": 15597, "teacher_loss": 0.4669633209705353 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.45457014441490173, "learning_rate": 1.880324947338131e-05, "loss": 0.2302, "step": 15598, "teacher_loss": 0.20530733466148376 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.37621429562568665, "learning_rate": 1.880105239927923e-05, "loss": 0.233, "step": 15599, "teacher_loss": 0.21705016493797302 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.7735012173652649, "learning_rate": 1.879885523803041e-05, "loss": 0.2609, "step": 15600, "teacher_loss": 0.20392854511737823 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.4213787913322449, "learning_rate": 1.879665798968523e-05, "loss": 0.222, "step": 15601, "teacher_loss": 0.1998279094696045 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.2106642723083496, "learning_rate": 1.8794460654294064e-05, "loss": 0.2121, "step": 15602, "teacher_loss": 0.21228326857089996 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.9285902976989746, "learning_rate": 1.8792263231907292e-05, "loss": 0.3901, "step": 15603, "teacher_loss": 0.3303107023239136 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.3709350824356079, "learning_rate": 1.87900657225753e-05, "loss": 0.2995, "step": 15604, "teacher_loss": 0.29150816798210144 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.24274086952209473, "learning_rate": 1.8787868126348455e-05, "loss": 0.2068, "step": 15605, "teacher_loss": 0.20277884602546692 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.3534519672393799, "learning_rate": 1.8785670443277156e-05, "loss": 0.2386, "step": 15606, "teacher_loss": 0.22580553591251373 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.4775151014328003, "learning_rate": 1.878347267341178e-05, "loss": 0.2302, "step": 15607, "teacher_loss": 0.20275001227855682 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.44628381729125977, "learning_rate": 1.8781274816802722e-05, "loss": 0.2749, "step": 15608, "teacher_loss": 0.25584763288497925 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.7155665159225464, "learning_rate": 1.877907687350037e-05, "loss": 0.2676, "step": 15609, "teacher_loss": 0.21778368949890137 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.9141194224357605, "learning_rate": 1.8776878843555114e-05, "loss": 0.39, "step": 15610, "teacher_loss": 0.3317718505859375 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.5735787153244019, "learning_rate": 1.877468072701735e-05, "loss": 0.5461, "step": 15611, "teacher_loss": 0.5430393218994141 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.28447479009628296, "learning_rate": 1.8772482523937475e-05, "loss": 0.1939, "step": 15612, "teacher_loss": 0.18388940393924713 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.6083656549453735, "learning_rate": 1.8770284234365883e-05, "loss": 0.2487, "step": 15613, "teacher_loss": 0.20877036452293396 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.6459749937057495, "learning_rate": 1.876808585835298e-05, "loss": 0.2735, "step": 15614, "teacher_loss": 0.23210833966732025 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.37882575392723083, "learning_rate": 1.876588739594916e-05, "loss": 0.2116, "step": 15615, "teacher_loss": 0.19300782680511475 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.6242225170135498, "learning_rate": 1.8763688847204843e-05, "loss": 0.3597, "step": 15616, "teacher_loss": 0.3303227126598358 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.48707103729248047, "learning_rate": 1.8761490212170416e-05, "loss": 0.334, "step": 15617, "teacher_loss": 0.31697624921798706 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.48537909984588623, "learning_rate": 1.87592914908963e-05, "loss": 0.2359, "step": 15618, "teacher_loss": 0.20818506181240082 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.5380996465682983, "learning_rate": 1.8757092683432903e-05, "loss": 0.2435, "step": 15619, "teacher_loss": 0.21077196300029755 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.3553329110145569, "learning_rate": 1.875489378983063e-05, "loss": 0.2457, "step": 15620, "teacher_loss": 0.2334851622581482 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.618273138999939, "learning_rate": 1.8752694810139903e-05, "loss": 0.2574, "step": 15621, "teacher_loss": 0.21730799973011017 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.19615758955478668, "learning_rate": 1.8750495744411137e-05, "loss": 0.1547, "step": 15622, "teacher_loss": 0.15014046430587769 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.29901596903800964, "learning_rate": 1.8748296592694744e-05, "loss": 0.3083, "step": 15623, "teacher_loss": 0.3092902898788452 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.34863829612731934, "learning_rate": 1.874609735504115e-05, "loss": 0.2112, "step": 15624, "teacher_loss": 0.19590693712234497 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.2578679323196411, "learning_rate": 1.8743898031500772e-05, "loss": 0.4657, "step": 15625, "teacher_loss": 0.48884499073028564 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.6031060218811035, "learning_rate": 1.874169862212404e-05, "loss": 0.219, "step": 15626, "teacher_loss": 0.17628945410251617 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.39561349153518677, "learning_rate": 1.8739499126961382e-05, "loss": 0.4644, "step": 15627, "teacher_loss": 0.4720189571380615 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 1.214064598083496, "learning_rate": 1.8737299546063213e-05, "loss": 0.4031, "step": 15628, "teacher_loss": 0.3129780888557434 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.606117844581604, "learning_rate": 1.8735099879479974e-05, "loss": 0.2636, "step": 15629, "teacher_loss": 0.22556297481060028 }, { "compression_loss": 0.0, "epoch": 2.82, "label_loss": 0.5360339879989624, "learning_rate": 1.8732900127262094e-05, "loss": 0.3415, "step": 15630, "teacher_loss": 0.31982964277267456 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.33991146087646484, "learning_rate": 1.8730700289460005e-05, "loss": 0.24, "step": 15631, "teacher_loss": 0.2288684844970703 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.26857253909111023, "learning_rate": 1.8728500366124142e-05, "loss": 0.2008, "step": 15632, "teacher_loss": 0.19331955909729004 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.504621148109436, "learning_rate": 1.8726300357304942e-05, "loss": 0.2489, "step": 15633, "teacher_loss": 0.2204873263835907 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5409475564956665, "learning_rate": 1.8724100263052854e-05, "loss": 0.273, "step": 15634, "teacher_loss": 0.2432001382112503 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.3308134973049164, "learning_rate": 1.8721900083418306e-05, "loss": 0.1748, "step": 15635, "teacher_loss": 0.15751197934150696 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.41927462816238403, "learning_rate": 1.871969981845175e-05, "loss": 0.3609, "step": 15636, "teacher_loss": 0.35441386699676514 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.7724589109420776, "learning_rate": 1.8717499468203627e-05, "loss": 0.3027, "step": 15637, "teacher_loss": 0.2504867911338806 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.4513605833053589, "learning_rate": 1.871529903272439e-05, "loss": 0.23, "step": 15638, "teacher_loss": 0.20538701117038727 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.42484816908836365, "learning_rate": 1.8713098512064485e-05, "loss": 0.3451, "step": 15639, "teacher_loss": 0.33622509241104126 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5539860129356384, "learning_rate": 1.871089790627436e-05, "loss": 0.1974, "step": 15640, "teacher_loss": 0.15773595869541168 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.43911492824554443, "learning_rate": 1.8708697215404478e-05, "loss": 0.2656, "step": 15641, "teacher_loss": 0.24634799361228943 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.9357492327690125, "learning_rate": 1.8706496439505285e-05, "loss": 0.7289, "step": 15642, "teacher_loss": 0.7059305310249329 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.3149608373641968, "learning_rate": 1.870429557862724e-05, "loss": 0.245, "step": 15643, "teacher_loss": 0.23724766075611115 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5427775382995605, "learning_rate": 1.8702094632820804e-05, "loss": 0.249, "step": 15644, "teacher_loss": 0.2163078486919403 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 1.2592142820358276, "learning_rate": 1.8699893602136438e-05, "loss": 0.4716, "step": 15645, "teacher_loss": 0.38407737016677856 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5874372720718384, "learning_rate": 1.8697692486624606e-05, "loss": 0.2909, "step": 15646, "teacher_loss": 0.2579842805862427 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.40630921721458435, "learning_rate": 1.869549128633577e-05, "loss": 0.3103, "step": 15647, "teacher_loss": 0.2996810972690582 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.32114318013191223, "learning_rate": 1.8693290001320398e-05, "loss": 0.2274, "step": 15648, "teacher_loss": 0.21702814102172852 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.49709194898605347, "learning_rate": 1.8691088631628964e-05, "loss": 0.2864, "step": 15649, "teacher_loss": 0.2629718780517578 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.4703463315963745, "learning_rate": 1.8688887177311925e-05, "loss": 0.2049, "step": 15650, "teacher_loss": 0.17542661726474762 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5739625692367554, "learning_rate": 1.868668563841977e-05, "loss": 0.3398, "step": 15651, "teacher_loss": 0.31378665566444397 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.26552414894104004, "learning_rate": 1.8684484015002966e-05, "loss": 0.1714, "step": 15652, "teacher_loss": 0.16098767518997192 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.32081177830696106, "learning_rate": 1.8682282307111988e-05, "loss": 0.2379, "step": 15653, "teacher_loss": 0.22868916392326355 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.23396648466587067, "learning_rate": 1.868008051479732e-05, "loss": 0.2617, "step": 15654, "teacher_loss": 0.26478099822998047 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5613775253295898, "learning_rate": 1.8677878638109434e-05, "loss": 0.6418, "step": 15655, "teacher_loss": 0.6507104635238647 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5904250741004944, "learning_rate": 1.867567667709882e-05, "loss": 0.2996, "step": 15656, "teacher_loss": 0.2672576308250427 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5111725330352783, "learning_rate": 1.8673474631815962e-05, "loss": 0.2568, "step": 15657, "teacher_loss": 0.22850503027439117 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.9034008979797363, "learning_rate": 1.8671272502311343e-05, "loss": 0.3059, "step": 15658, "teacher_loss": 0.23954731225967407 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.33696508407592773, "learning_rate": 1.8669070288635454e-05, "loss": 0.2637, "step": 15659, "teacher_loss": 0.25555580854415894 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.35835087299346924, "learning_rate": 1.866686799083878e-05, "loss": 0.1645, "step": 15660, "teacher_loss": 0.14300982654094696 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.30030542612075806, "learning_rate": 1.866466560897182e-05, "loss": 0.1432, "step": 15661, "teacher_loss": 0.12575635313987732 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.4981169104576111, "learning_rate": 1.8662463143085063e-05, "loss": 0.3893, "step": 15662, "teacher_loss": 0.3771844804286957 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.4201228618621826, "learning_rate": 1.8660260593229007e-05, "loss": 0.2382, "step": 15663, "teacher_loss": 0.2179337441921234 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.6173253655433655, "learning_rate": 1.8658057959454154e-05, "loss": 0.2886, "step": 15664, "teacher_loss": 0.25206518173217773 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.2200109362602234, "learning_rate": 1.8655855241810995e-05, "loss": 0.1687, "step": 15665, "teacher_loss": 0.16304980218410492 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.2391153872013092, "learning_rate": 1.8653652440350036e-05, "loss": 0.222, "step": 15666, "teacher_loss": 0.2200511395931244 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.4334554970264435, "learning_rate": 1.8651449555121785e-05, "loss": 0.2428, "step": 15667, "teacher_loss": 0.22165828943252563 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5943639278411865, "learning_rate": 1.8649246586176737e-05, "loss": 0.4046, "step": 15668, "teacher_loss": 0.3835371136665344 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.4538796842098236, "learning_rate": 1.8647043533565407e-05, "loss": 0.1859, "step": 15669, "teacher_loss": 0.15614989399909973 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5664341449737549, "learning_rate": 1.8644840397338305e-05, "loss": 0.2139, "step": 15670, "teacher_loss": 0.17472794651985168 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.3884039521217346, "learning_rate": 1.8642637177545937e-05, "loss": 0.2022, "step": 15671, "teacher_loss": 0.1815069019794464 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.9344276189804077, "learning_rate": 1.8640433874238828e-05, "loss": 0.3869, "step": 15672, "teacher_loss": 0.3260282576084137 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.3322821259498596, "learning_rate": 1.8638230487467477e-05, "loss": 0.3727, "step": 15673, "teacher_loss": 0.37714940309524536 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5182181000709534, "learning_rate": 1.8636027017282413e-05, "loss": 0.3984, "step": 15674, "teacher_loss": 0.38508763909339905 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5167944431304932, "learning_rate": 1.863382346373415e-05, "loss": 0.3163, "step": 15675, "teacher_loss": 0.29404598474502563 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.32424888014793396, "learning_rate": 1.863161982687321e-05, "loss": 0.2529, "step": 15676, "teacher_loss": 0.24494808912277222 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.2875154912471771, "learning_rate": 1.8629416106750114e-05, "loss": 0.2082, "step": 15677, "teacher_loss": 0.19933509826660156 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.25766521692276, "learning_rate": 1.8627212303415387e-05, "loss": 0.1554, "step": 15678, "teacher_loss": 0.144064798951149 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.4190814793109894, "learning_rate": 1.862500841691956e-05, "loss": 0.2516, "step": 15679, "teacher_loss": 0.2330310195684433 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.7138117551803589, "learning_rate": 1.8622804447313158e-05, "loss": 0.3107, "step": 15680, "teacher_loss": 0.2659236192703247 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.2788236439228058, "learning_rate": 1.862060039464671e-05, "loss": 0.2288, "step": 15681, "teacher_loss": 0.2232353836297989 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.5316205024719238, "learning_rate": 1.8618396258970756e-05, "loss": 0.352, "step": 15682, "teacher_loss": 0.33199450373649597 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.26798853278160095, "learning_rate": 1.861619204033582e-05, "loss": 0.2999, "step": 15683, "teacher_loss": 0.30348458886146545 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.6284560561180115, "learning_rate": 1.861398773879244e-05, "loss": 0.296, "step": 15684, "teacher_loss": 0.2590172290802002 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.3720332086086273, "learning_rate": 1.8611783354391155e-05, "loss": 0.1858, "step": 15685, "teacher_loss": 0.16506797075271606 }, { "compression_loss": 0.0, "epoch": 2.83, "label_loss": 0.6247307062149048, "learning_rate": 1.860957888718251e-05, "loss": 0.3464, "step": 15686, "teacher_loss": 0.315435528755188 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.5692695379257202, "learning_rate": 1.8607374337217047e-05, "loss": 0.2624, "step": 15687, "teacher_loss": 0.2282578945159912 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.6733032464981079, "learning_rate": 1.86051697045453e-05, "loss": 0.2681, "step": 15688, "teacher_loss": 0.22306840121746063 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 1.0777983665466309, "learning_rate": 1.860296498921782e-05, "loss": 0.3977, "step": 15689, "teacher_loss": 0.322085976600647 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.7687364220619202, "learning_rate": 1.8600760191285156e-05, "loss": 0.3301, "step": 15690, "teacher_loss": 0.2813347578048706 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.4539843201637268, "learning_rate": 1.859855531079786e-05, "loss": 0.3156, "step": 15691, "teacher_loss": 0.30026572942733765 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.5300315022468567, "learning_rate": 1.859635034780648e-05, "loss": 0.2781, "step": 15692, "teacher_loss": 0.2501027584075928 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.36437398195266724, "learning_rate": 1.8594145302361565e-05, "loss": 0.2249, "step": 15693, "teacher_loss": 0.20937681198120117 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.704693615436554, "learning_rate": 1.859194017451368e-05, "loss": 0.3094, "step": 15694, "teacher_loss": 0.26544734835624695 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.5063294172286987, "learning_rate": 1.858973496431337e-05, "loss": 0.2748, "step": 15695, "teacher_loss": 0.24907800555229187 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.2795817255973816, "learning_rate": 1.8587529671811196e-05, "loss": 0.1987, "step": 15696, "teacher_loss": 0.1897178739309311 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.23716753721237183, "learning_rate": 1.8585324297057733e-05, "loss": 0.3051, "step": 15697, "teacher_loss": 0.31267133355140686 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.2384091019630432, "learning_rate": 1.8583118840103527e-05, "loss": 0.1933, "step": 15698, "teacher_loss": 0.18823449313640594 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 1.171275019645691, "learning_rate": 1.858091330099915e-05, "loss": 0.4972, "step": 15699, "teacher_loss": 0.4222884774208069 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.7586273550987244, "learning_rate": 1.8578707679795167e-05, "loss": 0.3173, "step": 15700, "teacher_loss": 0.2682155966758728 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.9817002415657043, "learning_rate": 1.8576501976542147e-05, "loss": 0.2851, "step": 15701, "teacher_loss": 0.20771317183971405 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.22894296050071716, "learning_rate": 1.8574296191290656e-05, "loss": 0.2268, "step": 15702, "teacher_loss": 0.22658196091651917 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.7663373351097107, "learning_rate": 1.8572090324091273e-05, "loss": 0.328, "step": 15703, "teacher_loss": 0.2793191075325012 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.7877432107925415, "learning_rate": 1.856988437499457e-05, "loss": 0.3482, "step": 15704, "teacher_loss": 0.29936063289642334 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.527571976184845, "learning_rate": 1.856767834405112e-05, "loss": 0.3368, "step": 15705, "teacher_loss": 0.31564217805862427 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.5882084369659424, "learning_rate": 1.85654722313115e-05, "loss": 0.2295, "step": 15706, "teacher_loss": 0.18961066007614136 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.8490410447120667, "learning_rate": 1.8563266036826295e-05, "loss": 0.3169, "step": 15707, "teacher_loss": 0.2577856183052063 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.2687148153781891, "learning_rate": 1.8561059760646082e-05, "loss": 0.1715, "step": 15708, "teacher_loss": 0.16073903441429138 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.3655679225921631, "learning_rate": 1.8558853402821444e-05, "loss": 0.2495, "step": 15709, "teacher_loss": 0.23658691346645355 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.26435890793800354, "learning_rate": 1.8556646963402965e-05, "loss": 0.1666, "step": 15710, "teacher_loss": 0.15578149259090424 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.9528557062149048, "learning_rate": 1.8554440442441242e-05, "loss": 0.3636, "step": 15711, "teacher_loss": 0.29816627502441406 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.5803512334823608, "learning_rate": 1.8552233839986848e-05, "loss": 0.2559, "step": 15712, "teacher_loss": 0.21987971663475037 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.2601315379142761, "learning_rate": 1.8550027156090385e-05, "loss": 0.1929, "step": 15713, "teacher_loss": 0.18540364503860474 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.1583322286605835, "learning_rate": 1.854782039080245e-05, "loss": 0.1959, "step": 15714, "teacher_loss": 0.20003946125507355 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.5300459265708923, "learning_rate": 1.8545613544173623e-05, "loss": 0.2989, "step": 15715, "teacher_loss": 0.2732434868812561 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.49268850684165955, "learning_rate": 1.854340661625451e-05, "loss": 0.2754, "step": 15716, "teacher_loss": 0.2512151598930359 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.3916330635547638, "learning_rate": 1.8541199607095705e-05, "loss": 0.2226, "step": 15717, "teacher_loss": 0.20382243394851685 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.34222325682640076, "learning_rate": 1.853899251674781e-05, "loss": 0.1875, "step": 15718, "teacher_loss": 0.1703435331583023 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.6559106707572937, "learning_rate": 1.8536785345261428e-05, "loss": 0.1871, "step": 15719, "teacher_loss": 0.13496747612953186 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.807537853717804, "learning_rate": 1.8534578092687163e-05, "loss": 0.7528, "step": 15720, "teacher_loss": 0.7467369437217712 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.222446471452713, "learning_rate": 1.8532370759075616e-05, "loss": 0.2122, "step": 15721, "teacher_loss": 0.21110032498836517 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.44959917664527893, "learning_rate": 1.8530163344477406e-05, "loss": 0.2349, "step": 15722, "teacher_loss": 0.21101459860801697 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.7312188148498535, "learning_rate": 1.8527955848943125e-05, "loss": 0.6522, "step": 15723, "teacher_loss": 0.6433976292610168 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.3048573136329651, "learning_rate": 1.8525748272523397e-05, "loss": 0.2784, "step": 15724, "teacher_loss": 0.27547207474708557 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.419810026884079, "learning_rate": 1.852354061526884e-05, "loss": 0.2546, "step": 15725, "teacher_loss": 0.23624543845653534 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.4434985816478729, "learning_rate": 1.8521332877230047e-05, "loss": 0.3345, "step": 15726, "teacher_loss": 0.3223349452018738 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.4418366551399231, "learning_rate": 1.851912505845766e-05, "loss": 0.2966, "step": 15727, "teacher_loss": 0.28050196170806885 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.846725344657898, "learning_rate": 1.851691715900228e-05, "loss": 0.3162, "step": 15728, "teacher_loss": 0.2572518587112427 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.5746957063674927, "learning_rate": 1.8514709178914533e-05, "loss": 0.3229, "step": 15729, "teacher_loss": 0.2949431538581848 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.4985443949699402, "learning_rate": 1.8512501118245046e-05, "loss": 0.4783, "step": 15730, "teacher_loss": 0.4760809540748596 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.40349799394607544, "learning_rate": 1.8510292977044434e-05, "loss": 0.241, "step": 15731, "teacher_loss": 0.22298294305801392 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.4171718955039978, "learning_rate": 1.8508084755363335e-05, "loss": 0.301, "step": 15732, "teacher_loss": 0.2881176471710205 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.4362102746963501, "learning_rate": 1.8505876453252368e-05, "loss": 0.324, "step": 15733, "teacher_loss": 0.311529278755188 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.10716117918491364, "learning_rate": 1.8503668070762165e-05, "loss": 0.2117, "step": 15734, "teacher_loss": 0.2233637124300003 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.5705432891845703, "learning_rate": 1.850145960794336e-05, "loss": 0.1998, "step": 15735, "teacher_loss": 0.1586064100265503 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.3731476962566376, "learning_rate": 1.8499251064846576e-05, "loss": 0.267, "step": 15736, "teacher_loss": 0.2551867365837097 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.4048183560371399, "learning_rate": 1.8497042441522464e-05, "loss": 0.3202, "step": 15737, "teacher_loss": 0.31074345111846924 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.5419814586639404, "learning_rate": 1.8494833738021655e-05, "loss": 0.3041, "step": 15738, "teacher_loss": 0.2776755094528198 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.34955430030822754, "learning_rate": 1.8492624954394782e-05, "loss": 0.1913, "step": 15739, "teacher_loss": 0.1737421751022339 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.38567274808883667, "learning_rate": 1.849041609069249e-05, "loss": 0.2998, "step": 15740, "teacher_loss": 0.29028838872909546 }, { "compression_loss": 0.0, "epoch": 2.84, "label_loss": 0.4437521696090698, "learning_rate": 1.8488207146965423e-05, "loss": 0.2128, "step": 15741, "teacher_loss": 0.18712544441223145 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.3764735162258148, "learning_rate": 1.8485998123264222e-05, "loss": 0.413, "step": 15742, "teacher_loss": 0.41708898544311523 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.2732475996017456, "learning_rate": 1.8483789019639537e-05, "loss": 0.2142, "step": 15743, "teacher_loss": 0.20763376355171204 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.7141246795654297, "learning_rate": 1.8481579836142016e-05, "loss": 0.3103, "step": 15744, "teacher_loss": 0.26540425419807434 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5449739694595337, "learning_rate": 1.847937057282231e-05, "loss": 0.3066, "step": 15745, "teacher_loss": 0.28010839223861694 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.6905507445335388, "learning_rate": 1.8477161229731066e-05, "loss": 0.2589, "step": 15746, "teacher_loss": 0.21091032028198242 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 1.1155308485031128, "learning_rate": 1.847495180691894e-05, "loss": 0.4076, "step": 15747, "teacher_loss": 0.3289552927017212 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.22949522733688354, "learning_rate": 1.8472742304436586e-05, "loss": 0.214, "step": 15748, "teacher_loss": 0.21227796375751495 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5418639183044434, "learning_rate": 1.8470532722334664e-05, "loss": 0.22, "step": 15749, "teacher_loss": 0.1842201054096222 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.6393435597419739, "learning_rate": 1.8468323060663832e-05, "loss": 0.2975, "step": 15750, "teacher_loss": 0.2595379948616028 }, { "epoch": 2.85, "eval_exact_match": 79.75402081362347, "eval_f1": 87.38767630119374, "step": 15750 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.2720993757247925, "learning_rate": 1.8466113319474747e-05, "loss": 0.1668, "step": 15751, "teacher_loss": 0.1550801694393158 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.6986119747161865, "learning_rate": 1.8463903498818088e-05, "loss": 0.3543, "step": 15752, "teacher_loss": 0.31603682041168213 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.425250381231308, "learning_rate": 1.846169359874449e-05, "loss": 0.2327, "step": 15753, "teacher_loss": 0.21135637164115906 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.6598292589187622, "learning_rate": 1.8459483619304648e-05, "loss": 0.2813, "step": 15754, "teacher_loss": 0.23922353982925415 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.22085297107696533, "learning_rate": 1.8457273560549216e-05, "loss": 0.1534, "step": 15755, "teacher_loss": 0.14585380256175995 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5688523054122925, "learning_rate": 1.8455063422528865e-05, "loss": 0.2156, "step": 15756, "teacher_loss": 0.17639166116714478 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.8762666583061218, "learning_rate": 1.8452853205294273e-05, "loss": 0.391, "step": 15757, "teacher_loss": 0.33703452348709106 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.297713041305542, "learning_rate": 1.8450642908896104e-05, "loss": 0.2478, "step": 15758, "teacher_loss": 0.24224433302879333 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5520319938659668, "learning_rate": 1.844843253338504e-05, "loss": 0.2644, "step": 15759, "teacher_loss": 0.23243862390518188 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.40462255477905273, "learning_rate": 1.844622207881176e-05, "loss": 0.2567, "step": 15760, "teacher_loss": 0.24028655886650085 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.18966549634933472, "learning_rate": 1.8444011545226934e-05, "loss": 0.1812, "step": 15761, "teacher_loss": 0.18027850985527039 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.23252812027931213, "learning_rate": 1.8441800932681257e-05, "loss": 0.285, "step": 15762, "teacher_loss": 0.29081469774246216 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.30862972140312195, "learning_rate": 1.8439590241225396e-05, "loss": 0.1715, "step": 15763, "teacher_loss": 0.15621252357959747 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.29161760210990906, "learning_rate": 1.843737947091005e-05, "loss": 0.303, "step": 15764, "teacher_loss": 0.3042460083961487 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.8093780279159546, "learning_rate": 1.843516862178589e-05, "loss": 0.286, "step": 15765, "teacher_loss": 0.22782929241657257 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.2768884301185608, "learning_rate": 1.843295769390362e-05, "loss": 0.1797, "step": 15766, "teacher_loss": 0.1689174473285675 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.27470147609710693, "learning_rate": 1.8430746687313923e-05, "loss": 0.2324, "step": 15767, "teacher_loss": 0.2276635766029358 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5782203078269958, "learning_rate": 1.8428535602067486e-05, "loss": 0.38, "step": 15768, "teacher_loss": 0.35796087980270386 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.9145200252532959, "learning_rate": 1.8426324438215005e-05, "loss": 0.4915, "step": 15769, "teacher_loss": 0.44447994232177734 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.4878411889076233, "learning_rate": 1.842411319580718e-05, "loss": 0.2783, "step": 15770, "teacher_loss": 0.25507229566574097 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5145754218101501, "learning_rate": 1.842190187489471e-05, "loss": 0.2432, "step": 15771, "teacher_loss": 0.212994784116745 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.46705102920532227, "learning_rate": 1.8419690475528286e-05, "loss": 0.2684, "step": 15772, "teacher_loss": 0.24628250300884247 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.6455156207084656, "learning_rate": 1.841747899775861e-05, "loss": 0.3441, "step": 15773, "teacher_loss": 0.3105820417404175 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.7152342200279236, "learning_rate": 1.8415267441636388e-05, "loss": 0.4826, "step": 15774, "teacher_loss": 0.4567483067512512 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.7802227735519409, "learning_rate": 1.8413055807212324e-05, "loss": 0.2188, "step": 15775, "teacher_loss": 0.1564652919769287 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.19174207746982574, "learning_rate": 1.8410844094537124e-05, "loss": 0.1793, "step": 15776, "teacher_loss": 0.1778654158115387 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.15918049216270447, "learning_rate": 1.8408632303661494e-05, "loss": 0.1691, "step": 15777, "teacher_loss": 0.17019785940647125 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.29411840438842773, "learning_rate": 1.8406420434636144e-05, "loss": 0.1796, "step": 15778, "teacher_loss": 0.16691213846206665 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.41873395442962646, "learning_rate": 1.8404208487511786e-05, "loss": 0.1891, "step": 15779, "teacher_loss": 0.1635519564151764 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.3061790466308594, "learning_rate": 1.8401996462339138e-05, "loss": 0.2715, "step": 15780, "teacher_loss": 0.26762598752975464 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.38915109634399414, "learning_rate": 1.8399784359168904e-05, "loss": 0.2281, "step": 15781, "teacher_loss": 0.21018287539482117 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.3290069103240967, "learning_rate": 1.839757217805182e-05, "loss": 0.1862, "step": 15782, "teacher_loss": 0.1703346222639084 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.3589189350605011, "learning_rate": 1.839535991903858e-05, "loss": 0.246, "step": 15783, "teacher_loss": 0.233504980802536 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5270801782608032, "learning_rate": 1.839314758217992e-05, "loss": 0.2206, "step": 15784, "teacher_loss": 0.1865430474281311 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.33380383253097534, "learning_rate": 1.839093516752656e-05, "loss": 0.201, "step": 15785, "teacher_loss": 0.18623086810112 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5628454685211182, "learning_rate": 1.838872267512922e-05, "loss": 0.3038, "step": 15786, "teacher_loss": 0.27499938011169434 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5338721871376038, "learning_rate": 1.8386510105038636e-05, "loss": 0.1982, "step": 15787, "teacher_loss": 0.16092006862163544 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.6219146847724915, "learning_rate": 1.8384297457305524e-05, "loss": 0.337, "step": 15788, "teacher_loss": 0.3053082227706909 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.4425116777420044, "learning_rate": 1.838208473198062e-05, "loss": 0.2227, "step": 15789, "teacher_loss": 0.198236882686615 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.7788664102554321, "learning_rate": 1.8379871929114652e-05, "loss": 0.3461, "step": 15790, "teacher_loss": 0.29796069860458374 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5995031595230103, "learning_rate": 1.8377659048758347e-05, "loss": 0.2554, "step": 15791, "teacher_loss": 0.21720127761363983 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.24850031733512878, "learning_rate": 1.8375446090962458e-05, "loss": 0.2696, "step": 15792, "teacher_loss": 0.27193427085876465 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.3667680621147156, "learning_rate": 1.8373233055777705e-05, "loss": 0.2201, "step": 15793, "teacher_loss": 0.20380929112434387 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.5167076587677002, "learning_rate": 1.837101994325483e-05, "loss": 0.2164, "step": 15794, "teacher_loss": 0.18301963806152344 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.7842326164245605, "learning_rate": 1.8368806753444578e-05, "loss": 0.3552, "step": 15795, "teacher_loss": 0.3075253963470459 }, { "compression_loss": 0.0, "epoch": 2.85, "label_loss": 0.610200822353363, "learning_rate": 1.8366593486397688e-05, "loss": 0.2828, "step": 15796, "teacher_loss": 0.24646055698394775 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.1728951632976532, "learning_rate": 1.8364380142164904e-05, "loss": 0.1396, "step": 15797, "teacher_loss": 0.13586542010307312 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.816399335861206, "learning_rate": 1.8362166720796966e-05, "loss": 0.2978, "step": 15798, "teacher_loss": 0.24016831815242767 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.2552238702774048, "learning_rate": 1.8359953222344626e-05, "loss": 0.1907, "step": 15799, "teacher_loss": 0.1835801601409912 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.7772147059440613, "learning_rate": 1.835773964685863e-05, "loss": 0.3823, "step": 15800, "teacher_loss": 0.3384694457054138 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 1.3603225946426392, "learning_rate": 1.8355525994389737e-05, "loss": 0.3377, "step": 15801, "teacher_loss": 0.22407333552837372 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.3928898572921753, "learning_rate": 1.835331226498869e-05, "loss": 0.2548, "step": 15802, "teacher_loss": 0.2394246608018875 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.33300483226776123, "learning_rate": 1.8351098458706246e-05, "loss": 0.2359, "step": 15803, "teacher_loss": 0.22513073682785034 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.5865911245346069, "learning_rate": 1.834888457559316e-05, "loss": 0.2456, "step": 15804, "teacher_loss": 0.20774272084236145 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.48009568452835083, "learning_rate": 1.8346670615700195e-05, "loss": 0.3015, "step": 15805, "teacher_loss": 0.28160303831100464 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 1.0230073928833008, "learning_rate": 1.8344456579078103e-05, "loss": 0.3164, "step": 15806, "teacher_loss": 0.2378537356853485 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.5411422252655029, "learning_rate": 1.8342242465777655e-05, "loss": 0.1899, "step": 15807, "teacher_loss": 0.15092778205871582 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.47372472286224365, "learning_rate": 1.8340028275849602e-05, "loss": 0.2638, "step": 15808, "teacher_loss": 0.24047425389289856 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.37042713165283203, "learning_rate": 1.8337814009344716e-05, "loss": 0.3391, "step": 15809, "teacher_loss": 0.33559519052505493 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.3425653874874115, "learning_rate": 1.8335599666313764e-05, "loss": 0.1931, "step": 15810, "teacher_loss": 0.17654749751091003 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.21195080876350403, "learning_rate": 1.8333385246807507e-05, "loss": 0.1911, "step": 15811, "teacher_loss": 0.18874311447143555 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.355308473110199, "learning_rate": 1.833117075087673e-05, "loss": 0.2373, "step": 15812, "teacher_loss": 0.2242126166820526 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.23206931352615356, "learning_rate": 1.8328956178572187e-05, "loss": 0.2131, "step": 15813, "teacher_loss": 0.21103033423423767 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 1.2173418998718262, "learning_rate": 1.8326741529944663e-05, "loss": 0.4846, "step": 15814, "teacher_loss": 0.40314981341362 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.4791605472564697, "learning_rate": 1.832452680504493e-05, "loss": 0.3586, "step": 15815, "teacher_loss": 0.34525907039642334 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.943343460559845, "learning_rate": 1.8322312003923757e-05, "loss": 0.4278, "step": 15816, "teacher_loss": 0.3705439567565918 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.9232819080352783, "learning_rate": 1.832009712663194e-05, "loss": 0.298, "step": 15817, "teacher_loss": 0.2284882366657257 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.41511350870132446, "learning_rate": 1.8317882173220244e-05, "loss": 0.2363, "step": 15818, "teacher_loss": 0.216459259390831 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.22387650609016418, "learning_rate": 1.831566714373946e-05, "loss": 0.2385, "step": 15819, "teacher_loss": 0.24014881253242493 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.389695405960083, "learning_rate": 1.8313452038240375e-05, "loss": 0.4148, "step": 15820, "teacher_loss": 0.4176251292228699 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.6074966788291931, "learning_rate": 1.831123685677376e-05, "loss": 0.4295, "step": 15821, "teacher_loss": 0.40977340936660767 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 1.0168144702911377, "learning_rate": 1.8309021599390415e-05, "loss": 0.4211, "step": 15822, "teacher_loss": 0.3549244999885559 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.16227078437805176, "learning_rate": 1.830680626614113e-05, "loss": 0.2178, "step": 15823, "teacher_loss": 0.22392481565475464 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.5151613354682922, "learning_rate": 1.830459085707668e-05, "loss": 0.2983, "step": 15824, "teacher_loss": 0.27420979738235474 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.33182772994041443, "learning_rate": 1.8302375372247878e-05, "loss": 0.2378, "step": 15825, "teacher_loss": 0.22733959555625916 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.4617161452770233, "learning_rate": 1.830015981170551e-05, "loss": 0.3127, "step": 15826, "teacher_loss": 0.29609590768814087 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.2880086898803711, "learning_rate": 1.829794417550037e-05, "loss": 0.2567, "step": 15827, "teacher_loss": 0.2532292604446411 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.4655439257621765, "learning_rate": 1.829572846368326e-05, "loss": 0.4208, "step": 15828, "teacher_loss": 0.4157954454421997 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.2411046028137207, "learning_rate": 1.8293512676304973e-05, "loss": 0.2337, "step": 15829, "teacher_loss": 0.2328571379184723 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.32122257351875305, "learning_rate": 1.8291296813416315e-05, "loss": 0.2781, "step": 15830, "teacher_loss": 0.2732689380645752 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.7128485441207886, "learning_rate": 1.8289080875068094e-05, "loss": 0.2558, "step": 15831, "teacher_loss": 0.20498883724212646 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.4479658603668213, "learning_rate": 1.8286864861311105e-05, "loss": 0.3956, "step": 15832, "teacher_loss": 0.38976895809173584 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.5889577269554138, "learning_rate": 1.8284648772196162e-05, "loss": 0.4092, "step": 15833, "teacher_loss": 0.3891795873641968 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.3624516725540161, "learning_rate": 1.828243260777407e-05, "loss": 0.3621, "step": 15834, "teacher_loss": 0.3620643615722656 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.4362243115901947, "learning_rate": 1.828021636809564e-05, "loss": 0.3267, "step": 15835, "teacher_loss": 0.31456223130226135 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.3359057307243347, "learning_rate": 1.8278000053211677e-05, "loss": 0.2108, "step": 15836, "teacher_loss": 0.19689424335956573 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.41603606939315796, "learning_rate": 1.8275783663173013e-05, "loss": 0.336, "step": 15837, "teacher_loss": 0.3270907998085022 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.7121304273605347, "learning_rate": 1.827356719803044e-05, "loss": 0.3189, "step": 15838, "teacher_loss": 0.2752155363559723 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.37949061393737793, "learning_rate": 1.8271350657834792e-05, "loss": 0.2746, "step": 15839, "teacher_loss": 0.2629939317703247 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.5888032913208008, "learning_rate": 1.826913404263688e-05, "loss": 0.4566, "step": 15840, "teacher_loss": 0.441903293132782 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.6193324327468872, "learning_rate": 1.826691735248752e-05, "loss": 0.2668, "step": 15841, "teacher_loss": 0.22764629125595093 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.2719542980194092, "learning_rate": 1.8264700587437547e-05, "loss": 0.1629, "step": 15842, "teacher_loss": 0.15083414316177368 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.5249314308166504, "learning_rate": 1.8262483747537777e-05, "loss": 0.2956, "step": 15843, "teacher_loss": 0.27013659477233887 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.6424548625946045, "learning_rate": 1.8260266832839032e-05, "loss": 0.339, "step": 15844, "teacher_loss": 0.3052673935890198 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.3115832805633545, "learning_rate": 1.825804984339215e-05, "loss": 0.3466, "step": 15845, "teacher_loss": 0.3504369556903839 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.6472749710083008, "learning_rate": 1.8255832779247946e-05, "loss": 0.2736, "step": 15846, "teacher_loss": 0.23211097717285156 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.8693272471427917, "learning_rate": 1.8253615640457263e-05, "loss": 0.2758, "step": 15847, "teacher_loss": 0.20985053479671478 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 1.0800511837005615, "learning_rate": 1.8251398427070926e-05, "loss": 0.3452, "step": 15848, "teacher_loss": 0.2635941207408905 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.8767670392990112, "learning_rate": 1.824918113913977e-05, "loss": 0.34, "step": 15849, "teacher_loss": 0.28037768602371216 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.8337864279747009, "learning_rate": 1.824696377671464e-05, "loss": 0.3117, "step": 15850, "teacher_loss": 0.2536882162094116 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 1.2983940839767456, "learning_rate": 1.8244746339846353e-05, "loss": 0.5908, "step": 15851, "teacher_loss": 0.512146532535553 }, { "compression_loss": 0.0, "epoch": 2.86, "label_loss": 0.376639187335968, "learning_rate": 1.824252882858577e-05, "loss": 0.2354, "step": 15852, "teacher_loss": 0.2196727991104126 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.15303528308868408, "learning_rate": 1.824031124298372e-05, "loss": 0.1917, "step": 15853, "teacher_loss": 0.1960466206073761 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.41173696517944336, "learning_rate": 1.823809358309104e-05, "loss": 0.3155, "step": 15854, "teacher_loss": 0.3048304319381714 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.28301042318344116, "learning_rate": 1.8235875848958593e-05, "loss": 0.1679, "step": 15855, "teacher_loss": 0.15508979558944702 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.34113240242004395, "learning_rate": 1.8233658040637212e-05, "loss": 0.3067, "step": 15856, "teacher_loss": 0.3028240203857422 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.7279713749885559, "learning_rate": 1.8231440158177747e-05, "loss": 0.2719, "step": 15857, "teacher_loss": 0.2212262749671936 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.9390513896942139, "learning_rate": 1.8229222201631045e-05, "loss": 0.3198, "step": 15858, "teacher_loss": 0.250988245010376 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.3111320734024048, "learning_rate": 1.822700417104796e-05, "loss": 0.1913, "step": 15859, "teacher_loss": 0.17796854674816132 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.3001253008842468, "learning_rate": 1.822478606647935e-05, "loss": 0.2889, "step": 15860, "teacher_loss": 0.2876701056957245 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.21206986904144287, "learning_rate": 1.822256788797606e-05, "loss": 0.2026, "step": 15861, "teacher_loss": 0.20155739784240723 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.3080791234970093, "learning_rate": 1.822034963558895e-05, "loss": 0.3215, "step": 15862, "teacher_loss": 0.32301846146583557 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.34815913438796997, "learning_rate": 1.8218131309368876e-05, "loss": 0.2234, "step": 15863, "teacher_loss": 0.20953653752803802 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.629808783531189, "learning_rate": 1.8215912909366704e-05, "loss": 0.3136, "step": 15864, "teacher_loss": 0.27846044301986694 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.33816325664520264, "learning_rate": 1.821369443563329e-05, "loss": 0.3147, "step": 15865, "teacher_loss": 0.3120826482772827 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.7259360551834106, "learning_rate": 1.8211475888219492e-05, "loss": 0.2269, "step": 15866, "teacher_loss": 0.17143885791301727 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.1640152633190155, "learning_rate": 1.8209257267176185e-05, "loss": 0.2365, "step": 15867, "teacher_loss": 0.24452602863311768 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.21413137018680573, "learning_rate": 1.8207038572554232e-05, "loss": 0.2584, "step": 15868, "teacher_loss": 0.26330065727233887 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.26071488857269287, "learning_rate": 1.8204819804404497e-05, "loss": 0.1597, "step": 15869, "teacher_loss": 0.1484990268945694 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.3123800456523895, "learning_rate": 1.8202600962777856e-05, "loss": 0.2677, "step": 15870, "teacher_loss": 0.2627692222595215 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.8645068407058716, "learning_rate": 1.820038204772517e-05, "loss": 0.4702, "step": 15871, "teacher_loss": 0.4263474941253662 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.6720243692398071, "learning_rate": 1.8198163059297324e-05, "loss": 0.336, "step": 15872, "teacher_loss": 0.29860880970954895 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.13621313869953156, "learning_rate": 1.8195943997545187e-05, "loss": 0.1866, "step": 15873, "teacher_loss": 0.19216597080230713 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.33425289392471313, "learning_rate": 1.8193724862519634e-05, "loss": 0.2293, "step": 15874, "teacher_loss": 0.2176315188407898 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.3725906014442444, "learning_rate": 1.819150565427155e-05, "loss": 0.2276, "step": 15875, "teacher_loss": 0.2114555984735489 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.1661052107810974, "learning_rate": 1.8189286372851805e-05, "loss": 0.1528, "step": 15876, "teacher_loss": 0.15127533674240112 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 1.0482935905456543, "learning_rate": 1.818706701831129e-05, "loss": 0.6401, "step": 15877, "teacher_loss": 0.5946966409683228 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.6520440578460693, "learning_rate": 1.818484759070088e-05, "loss": 0.3157, "step": 15878, "teacher_loss": 0.2783043086528778 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.7370253801345825, "learning_rate": 1.8182628090071462e-05, "loss": 0.3632, "step": 15879, "teacher_loss": 0.321697473526001 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.3806958496570587, "learning_rate": 1.8180408516473933e-05, "loss": 0.2371, "step": 15880, "teacher_loss": 0.22116263210773468 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.18035593628883362, "learning_rate": 1.817818886995916e-05, "loss": 0.1591, "step": 15881, "teacher_loss": 0.15674445033073425 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.3787170648574829, "learning_rate": 1.8175969150578052e-05, "loss": 0.3046, "step": 15882, "teacher_loss": 0.29638195037841797 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 1.3779008388519287, "learning_rate": 1.817374935838149e-05, "loss": 0.3741, "step": 15883, "teacher_loss": 0.26260918378829956 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.8832082748413086, "learning_rate": 1.8171529493420375e-05, "loss": 0.3496, "step": 15884, "teacher_loss": 0.29030150175094604 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.47625166177749634, "learning_rate": 1.816930955574559e-05, "loss": 0.2284, "step": 15885, "teacher_loss": 0.2009044885635376 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.19757282733917236, "learning_rate": 1.8167089545408046e-05, "loss": 0.1843, "step": 15886, "teacher_loss": 0.18281352519989014 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.4135333299636841, "learning_rate": 1.8164869462458635e-05, "loss": 0.3219, "step": 15887, "teacher_loss": 0.3117671012878418 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.22263991832733154, "learning_rate": 1.816264930694825e-05, "loss": 0.2269, "step": 15888, "teacher_loss": 0.22738727927207947 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.6423806548118591, "learning_rate": 1.81604290789278e-05, "loss": 0.4091, "step": 15889, "teacher_loss": 0.3831944763660431 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.31817013025283813, "learning_rate": 1.815820877844819e-05, "loss": 0.2287, "step": 15890, "teacher_loss": 0.2187390923500061 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.6518374681472778, "learning_rate": 1.815598840556032e-05, "loss": 0.3494, "step": 15891, "teacher_loss": 0.3157833516597748 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.24078303575515747, "learning_rate": 1.8153767960315095e-05, "loss": 0.2509, "step": 15892, "teacher_loss": 0.25207698345184326 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.6049313545227051, "learning_rate": 1.815154744276343e-05, "loss": 0.2603, "step": 15893, "teacher_loss": 0.2220241129398346 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.3835017681121826, "learning_rate": 1.8149326852956232e-05, "loss": 0.2012, "step": 15894, "teacher_loss": 0.18099913001060486 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.4521665573120117, "learning_rate": 1.814710619094441e-05, "loss": 0.2787, "step": 15895, "teacher_loss": 0.2594741880893707 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.17418499290943146, "learning_rate": 1.8144885456778873e-05, "loss": 0.1879, "step": 15896, "teacher_loss": 0.18939995765686035 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.3316029906272888, "learning_rate": 1.8142664650510546e-05, "loss": 0.2851, "step": 15897, "teacher_loss": 0.27993136644363403 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.7412432432174683, "learning_rate": 1.8140443772190344e-05, "loss": 0.2286, "step": 15898, "teacher_loss": 0.17165914177894592 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.23797626793384552, "learning_rate": 1.8138222821869176e-05, "loss": 0.1936, "step": 15899, "teacher_loss": 0.18867117166519165 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.7001538276672363, "learning_rate": 1.813600179959797e-05, "loss": 0.2643, "step": 15900, "teacher_loss": 0.21591657400131226 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.3734312951564789, "learning_rate": 1.8133780705427647e-05, "loss": 0.2557, "step": 15901, "teacher_loss": 0.24259501695632935 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.4090441167354584, "learning_rate": 1.8131559539409126e-05, "loss": 0.2703, "step": 15902, "teacher_loss": 0.25490033626556396 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.8420337438583374, "learning_rate": 1.8129338301593335e-05, "loss": 0.318, "step": 15903, "teacher_loss": 0.25973206758499146 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.5946215391159058, "learning_rate": 1.8127116992031197e-05, "loss": 0.2694, "step": 15904, "teacher_loss": 0.23325365781784058 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.36245644092559814, "learning_rate": 1.8124895610773645e-05, "loss": 0.2677, "step": 15905, "teacher_loss": 0.25722014904022217 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.9016590118408203, "learning_rate": 1.81226741578716e-05, "loss": 0.3018, "step": 15906, "teacher_loss": 0.23515459895133972 }, { "compression_loss": 0.0, "epoch": 2.87, "label_loss": 0.5252824425697327, "learning_rate": 1.8120452633376004e-05, "loss": 0.2709, "step": 15907, "teacher_loss": 0.24266907572746277 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.764914870262146, "learning_rate": 1.8118231037337785e-05, "loss": 0.4951, "step": 15908, "teacher_loss": 0.46513789892196655 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.41471171379089355, "learning_rate": 1.811600936980787e-05, "loss": 0.2832, "step": 15909, "teacher_loss": 0.2685871720314026 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.23086172342300415, "learning_rate": 1.811378763083721e-05, "loss": 0.2129, "step": 15910, "teacher_loss": 0.21089771389961243 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.27980712056159973, "learning_rate": 1.8111565820476732e-05, "loss": 0.1937, "step": 15911, "teacher_loss": 0.1840856969356537 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.4259776473045349, "learning_rate": 1.8109343938777383e-05, "loss": 0.2256, "step": 15912, "teacher_loss": 0.2032942771911621 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.5708742737770081, "learning_rate": 1.8107121985790092e-05, "loss": 0.2301, "step": 15913, "teacher_loss": 0.19223767518997192 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.31685516238212585, "learning_rate": 1.8104899961565816e-05, "loss": 0.2355, "step": 15914, "teacher_loss": 0.226412832736969 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.4490571618080139, "learning_rate": 1.8102677866155488e-05, "loss": 0.1976, "step": 15915, "teacher_loss": 0.16968148946762085 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.3662424087524414, "learning_rate": 1.8100455699610065e-05, "loss": 0.209, "step": 15916, "teacher_loss": 0.19152499735355377 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.32432007789611816, "learning_rate": 1.809823346198048e-05, "loss": 0.2702, "step": 15917, "teacher_loss": 0.2642146348953247 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.4476051330566406, "learning_rate": 1.8096011153317698e-05, "loss": 0.2318, "step": 15918, "teacher_loss": 0.20779427886009216 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.3224039077758789, "learning_rate": 1.809378877367266e-05, "loss": 0.2295, "step": 15919, "teacher_loss": 0.21917583048343658 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.2092219591140747, "learning_rate": 1.8091566323096325e-05, "loss": 0.1907, "step": 15920, "teacher_loss": 0.18868398666381836 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.36096876859664917, "learning_rate": 1.8089343801639632e-05, "loss": 0.236, "step": 15921, "teacher_loss": 0.22213931381702423 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.4118530750274658, "learning_rate": 1.808712120935356e-05, "loss": 0.2229, "step": 15922, "teacher_loss": 0.20195114612579346 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.40382254123687744, "learning_rate": 1.808489854628905e-05, "loss": 0.2719, "step": 15923, "teacher_loss": 0.2572861909866333 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.4486439824104309, "learning_rate": 1.8082675812497066e-05, "loss": 0.3284, "step": 15924, "teacher_loss": 0.3150879740715027 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.34547877311706543, "learning_rate": 1.8080453008028567e-05, "loss": 0.2311, "step": 15925, "teacher_loss": 0.2183745801448822 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.2674420475959778, "learning_rate": 1.8078230132934514e-05, "loss": 0.2298, "step": 15926, "teacher_loss": 0.22563272714614868 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.6948617100715637, "learning_rate": 1.8076007187265876e-05, "loss": 0.3094, "step": 15927, "teacher_loss": 0.2666250765323639 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.2115933895111084, "learning_rate": 1.8073784171073617e-05, "loss": 0.216, "step": 15928, "teacher_loss": 0.21645301580429077 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.29225659370422363, "learning_rate": 1.8071561084408695e-05, "loss": 0.2151, "step": 15929, "teacher_loss": 0.20654284954071045 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.43161699175834656, "learning_rate": 1.8069337927322097e-05, "loss": 0.3134, "step": 15930, "teacher_loss": 0.30021822452545166 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.5122411847114563, "learning_rate": 1.8067114699864774e-05, "loss": 0.229, "step": 15931, "teacher_loss": 0.19747330248355865 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.49110111594200134, "learning_rate": 1.806489140208771e-05, "loss": 0.2171, "step": 15932, "teacher_loss": 0.18667101860046387 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.5662893056869507, "learning_rate": 1.806266803404188e-05, "loss": 0.2721, "step": 15933, "teacher_loss": 0.2394472360610962 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.6437488794326782, "learning_rate": 1.8060444595778246e-05, "loss": 0.272, "step": 15934, "teacher_loss": 0.23064376413822174 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.5459778904914856, "learning_rate": 1.8058221087347803e-05, "loss": 0.2756, "step": 15935, "teacher_loss": 0.24557873606681824 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.43965017795562744, "learning_rate": 1.805599750880151e-05, "loss": 0.325, "step": 15936, "teacher_loss": 0.3122938573360443 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.135902538895607, "learning_rate": 1.805377386019036e-05, "loss": 0.306, "step": 15937, "teacher_loss": 0.32492056488990784 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.4700288772583008, "learning_rate": 1.805155014156533e-05, "loss": 0.3548, "step": 15938, "teacher_loss": 0.342024564743042 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.5970551371574402, "learning_rate": 1.8049326352977404e-05, "loss": 0.2825, "step": 15939, "teacher_loss": 0.24754837155342102 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.3149511218070984, "learning_rate": 1.804710249447757e-05, "loss": 0.1341, "step": 15940, "teacher_loss": 0.11395937949419022 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.5430590510368347, "learning_rate": 1.804487856611681e-05, "loss": 0.2289, "step": 15941, "teacher_loss": 0.1939672827720642 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.33586010336875916, "learning_rate": 1.804265456794611e-05, "loss": 0.2516, "step": 15942, "teacher_loss": 0.24219557642936707 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.17955368757247925, "learning_rate": 1.804043050001647e-05, "loss": 0.1948, "step": 15943, "teacher_loss": 0.1964605450630188 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.45494702458381653, "learning_rate": 1.8038206362378868e-05, "loss": 0.2111, "step": 15944, "teacher_loss": 0.18398284912109375 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.5782331228256226, "learning_rate": 1.8035982155084308e-05, "loss": 0.2974, "step": 15945, "teacher_loss": 0.26622846722602844 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.34627142548561096, "learning_rate": 1.8033757878183776e-05, "loss": 0.1794, "step": 15946, "teacher_loss": 0.16088120639324188 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.8087563514709473, "learning_rate": 1.8031533531728272e-05, "loss": 0.3477, "step": 15947, "teacher_loss": 0.29646411538124084 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.42827725410461426, "learning_rate": 1.8029309115768796e-05, "loss": 0.2148, "step": 15948, "teacher_loss": 0.19104623794555664 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.2689998745918274, "learning_rate": 1.8027084630356344e-05, "loss": 0.1968, "step": 15949, "teacher_loss": 0.18875108659267426 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.6298221945762634, "learning_rate": 1.8024860075541913e-05, "loss": 0.342, "step": 15950, "teacher_loss": 0.3099777400493622 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.8050061464309692, "learning_rate": 1.802263545137651e-05, "loss": 0.2598, "step": 15951, "teacher_loss": 0.199222594499588 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.926358699798584, "learning_rate": 1.802041075791114e-05, "loss": 0.3293, "step": 15952, "teacher_loss": 0.2629215717315674 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.4693329632282257, "learning_rate": 1.801818599519681e-05, "loss": 0.2656, "step": 15953, "teacher_loss": 0.24299326539039612 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 1.0085959434509277, "learning_rate": 1.801596116328452e-05, "loss": 0.434, "step": 15954, "teacher_loss": 0.3701905608177185 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.4580909311771393, "learning_rate": 1.8013736262225285e-05, "loss": 0.2974, "step": 15955, "teacher_loss": 0.27949029207229614 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.8941679000854492, "learning_rate": 1.801151129207011e-05, "loss": 0.3346, "step": 15956, "teacher_loss": 0.2723815441131592 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.32722964882850647, "learning_rate": 1.8009286252870013e-05, "loss": 0.2254, "step": 15957, "teacher_loss": 0.21410246193408966 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.2847384810447693, "learning_rate": 1.8007061144676005e-05, "loss": 0.1826, "step": 15958, "teacher_loss": 0.1712675541639328 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.4693653881549835, "learning_rate": 1.8004835967539098e-05, "loss": 0.235, "step": 15959, "teacher_loss": 0.20900863409042358 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 1.1480679512023926, "learning_rate": 1.8002610721510315e-05, "loss": 0.3061, "step": 15960, "teacher_loss": 0.2125958949327469 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.7316836714744568, "learning_rate": 1.8000385406640667e-05, "loss": 0.3188, "step": 15961, "teacher_loss": 0.2729114890098572 }, { "compression_loss": 0.0, "epoch": 2.88, "label_loss": 0.33481651544570923, "learning_rate": 1.7998160022981177e-05, "loss": 0.1959, "step": 15962, "teacher_loss": 0.18050611019134521 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.3492404818534851, "learning_rate": 1.7995934570582872e-05, "loss": 0.297, "step": 15963, "teacher_loss": 0.29121989011764526 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.6515673995018005, "learning_rate": 1.7993709049496765e-05, "loss": 0.3738, "step": 15964, "teacher_loss": 0.342968225479126 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.24512922763824463, "learning_rate": 1.7991483459773887e-05, "loss": 0.2537, "step": 15965, "teacher_loss": 0.25464287400245667 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.24002496898174286, "learning_rate": 1.798925780146526e-05, "loss": 0.2675, "step": 15966, "teacher_loss": 0.2706074118614197 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.32068461179733276, "learning_rate": 1.798703207462191e-05, "loss": 0.2741, "step": 15967, "teacher_loss": 0.26891613006591797 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.38085508346557617, "learning_rate": 1.798480627929488e-05, "loss": 0.2742, "step": 15968, "teacher_loss": 0.26235491037368774 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.3184136748313904, "learning_rate": 1.7982580415535182e-05, "loss": 0.3242, "step": 15969, "teacher_loss": 0.3248189687728882 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.5267320871353149, "learning_rate": 1.7980354483393865e-05, "loss": 0.2546, "step": 15970, "teacher_loss": 0.22439610958099365 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.5789691805839539, "learning_rate": 1.797812848292195e-05, "loss": 0.2764, "step": 15971, "teacher_loss": 0.24276217818260193 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.19721221923828125, "learning_rate": 1.797590241417048e-05, "loss": 0.224, "step": 15972, "teacher_loss": 0.22701910138130188 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 1.1301004886627197, "learning_rate": 1.7973676277190486e-05, "loss": 0.2947, "step": 15973, "teacher_loss": 0.20184342563152313 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.7524248957633972, "learning_rate": 1.7971450072033016e-05, "loss": 0.3497, "step": 15974, "teacher_loss": 0.30495405197143555 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.3591300845146179, "learning_rate": 1.79692237987491e-05, "loss": 0.2108, "step": 15975, "teacher_loss": 0.19436979293823242 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.16149577498435974, "learning_rate": 1.796699745738979e-05, "loss": 0.2423, "step": 15976, "teacher_loss": 0.25131040811538696 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.3153558671474457, "learning_rate": 1.7964771048006116e-05, "loss": 0.2446, "step": 15977, "teacher_loss": 0.2367090880870819 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.26709863543510437, "learning_rate": 1.796254457064914e-05, "loss": 0.2734, "step": 15978, "teacher_loss": 0.2741333246231079 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.7244864106178284, "learning_rate": 1.7960318025369894e-05, "loss": 0.286, "step": 15979, "teacher_loss": 0.23732250928878784 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.9245574474334717, "learning_rate": 1.795809141221943e-05, "loss": 0.3033, "step": 15980, "teacher_loss": 0.23424173891544342 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.46007609367370605, "learning_rate": 1.7955864731248802e-05, "loss": 0.2898, "step": 15981, "teacher_loss": 0.2708512544631958 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.15726551413536072, "learning_rate": 1.7953637982509055e-05, "loss": 0.175, "step": 15982, "teacher_loss": 0.1769482046365738 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.4426161050796509, "learning_rate": 1.7951411166051247e-05, "loss": 0.2501, "step": 15983, "teacher_loss": 0.22872647643089294 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.38759392499923706, "learning_rate": 1.7949184281926426e-05, "loss": 0.2208, "step": 15984, "teacher_loss": 0.20229625701904297 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 1.1463823318481445, "learning_rate": 1.7946957330185656e-05, "loss": 0.3437, "step": 15985, "teacher_loss": 0.2545274496078491 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.2256568968296051, "learning_rate": 1.7944730310879987e-05, "loss": 0.1881, "step": 15986, "teacher_loss": 0.18394413590431213 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.62260901927948, "learning_rate": 1.794250322406048e-05, "loss": 0.2296, "step": 15987, "teacher_loss": 0.18590421974658966 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.707524299621582, "learning_rate": 1.79402760697782e-05, "loss": 0.3909, "step": 15988, "teacher_loss": 0.3557557761669159 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.498782753944397, "learning_rate": 1.79380488480842e-05, "loss": 0.2433, "step": 15989, "teacher_loss": 0.21486930549144745 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.6074224710464478, "learning_rate": 1.7935821559029554e-05, "loss": 0.3893, "step": 15990, "teacher_loss": 0.36505138874053955 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.42969998717308044, "learning_rate": 1.793359420266532e-05, "loss": 0.2366, "step": 15991, "teacher_loss": 0.21510009467601776 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.82274329662323, "learning_rate": 1.793136677904256e-05, "loss": 0.4297, "step": 15992, "teacher_loss": 0.3860262334346771 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.3003634512424469, "learning_rate": 1.7929139288212363e-05, "loss": 0.2321, "step": 15993, "teacher_loss": 0.22450459003448486 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.6007168292999268, "learning_rate": 1.7926911730225772e-05, "loss": 0.387, "step": 15994, "teacher_loss": 0.36320769786834717 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.40766245126724243, "learning_rate": 1.7924684105133873e-05, "loss": 0.2391, "step": 15995, "teacher_loss": 0.22042471170425415 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.5066090822219849, "learning_rate": 1.792245641298774e-05, "loss": 0.2717, "step": 15996, "teacher_loss": 0.24564197659492493 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.5403831601142883, "learning_rate": 1.7920228653838435e-05, "loss": 0.4249, "step": 15997, "teacher_loss": 0.4120757579803467 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.24504396319389343, "learning_rate": 1.7918000827737052e-05, "loss": 0.2568, "step": 15998, "teacher_loss": 0.25805264711380005 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.20005804300308228, "learning_rate": 1.7915772934734653e-05, "loss": 0.2389, "step": 15999, "teacher_loss": 0.24320833384990692 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.6726173162460327, "learning_rate": 1.7913544974882322e-05, "loss": 0.3026, "step": 16000, "teacher_loss": 0.26144686341285706 }, { "epoch": 2.89, "eval_exact_match": 79.8864711447493, "eval_f1": 87.51815242249732, "step": 16000 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.914963960647583, "learning_rate": 1.7911316948231144e-05, "loss": 0.5497, "step": 16001, "teacher_loss": 0.5090828537940979 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.2682403326034546, "learning_rate": 1.790908885483219e-05, "loss": 0.2327, "step": 16002, "teacher_loss": 0.22879105806350708 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.45626530051231384, "learning_rate": 1.7906860694736556e-05, "loss": 0.2172, "step": 16003, "teacher_loss": 0.19069020450115204 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.2516730725765228, "learning_rate": 1.7904632467995324e-05, "loss": 0.2615, "step": 16004, "teacher_loss": 0.26263898611068726 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.7904136180877686, "learning_rate": 1.7902404174659574e-05, "loss": 0.3381, "step": 16005, "teacher_loss": 0.2878707945346832 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.3381238281726837, "learning_rate": 1.79001758147804e-05, "loss": 0.3125, "step": 16006, "teacher_loss": 0.30966639518737793 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.20560550689697266, "learning_rate": 1.7897947388408885e-05, "loss": 0.1979, "step": 16007, "teacher_loss": 0.1970043033361435 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.7509417533874512, "learning_rate": 1.7895718895596126e-05, "loss": 0.3176, "step": 16008, "teacher_loss": 0.26940953731536865 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 1.0460160970687866, "learning_rate": 1.789349033639322e-05, "loss": 0.304, "step": 16009, "teacher_loss": 0.22155362367630005 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.5968755483627319, "learning_rate": 1.789126171085125e-05, "loss": 0.3314, "step": 16010, "teacher_loss": 0.3018585443496704 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.44796669483184814, "learning_rate": 1.788903301902132e-05, "loss": 0.234, "step": 16011, "teacher_loss": 0.21017369627952576 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.18092384934425354, "learning_rate": 1.7886804260954528e-05, "loss": 0.1869, "step": 16012, "teacher_loss": 0.18758562207221985 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.15488232672214508, "learning_rate": 1.7884575436701964e-05, "loss": 0.1648, "step": 16013, "teacher_loss": 0.16591133177280426 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 1.0253793001174927, "learning_rate": 1.7882346546314735e-05, "loss": 0.5451, "step": 16014, "teacher_loss": 0.49170827865600586 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.4046880602836609, "learning_rate": 1.788011758984394e-05, "loss": 0.3106, "step": 16015, "teacher_loss": 0.3000958263874054 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.06968683004379272, "learning_rate": 1.7877888567340687e-05, "loss": 0.1745, "step": 16016, "teacher_loss": 0.1861688494682312 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.6674351692199707, "learning_rate": 1.7875659478856077e-05, "loss": 0.446, "step": 16017, "teacher_loss": 0.42139798402786255 }, { "compression_loss": 0.0, "epoch": 2.89, "label_loss": 0.5845698714256287, "learning_rate": 1.7873430324441218e-05, "loss": 0.2796, "step": 16018, "teacher_loss": 0.2457694709300995 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.43772268295288086, "learning_rate": 1.7871201104147212e-05, "loss": 0.2538, "step": 16019, "teacher_loss": 0.23335182666778564 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.2942679524421692, "learning_rate": 1.7868971818025178e-05, "loss": 0.3938, "step": 16020, "teacher_loss": 0.40480470657348633 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.27934524416923523, "learning_rate": 1.7866742466126216e-05, "loss": 0.1647, "step": 16021, "teacher_loss": 0.1520131230354309 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 1.0127445459365845, "learning_rate": 1.7864513048501446e-05, "loss": 0.5147, "step": 16022, "teacher_loss": 0.4593789577484131 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.2737566828727722, "learning_rate": 1.786228356520199e-05, "loss": 0.2658, "step": 16023, "teacher_loss": 0.26493918895721436 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.8956804871559143, "learning_rate": 1.7860054016278943e-05, "loss": 0.3444, "step": 16024, "teacher_loss": 0.2831432819366455 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.32857370376586914, "learning_rate": 1.785782440178343e-05, "loss": 0.2529, "step": 16025, "teacher_loss": 0.2444915771484375 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.437171995639801, "learning_rate": 1.785559472176658e-05, "loss": 0.2524, "step": 16026, "teacher_loss": 0.23187844455242157 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 1.0220277309417725, "learning_rate": 1.7853364976279498e-05, "loss": 0.2716, "step": 16027, "teacher_loss": 0.1882309764623642 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.3576350808143616, "learning_rate": 1.7851135165373322e-05, "loss": 0.1813, "step": 16028, "teacher_loss": 0.1616886556148529 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.45193690061569214, "learning_rate": 1.784890528909915e-05, "loss": 0.2437, "step": 16029, "teacher_loss": 0.22054174542427063 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.5340882539749146, "learning_rate": 1.784667534750813e-05, "loss": 0.3684, "step": 16030, "teacher_loss": 0.3499506115913391 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.5889751315116882, "learning_rate": 1.784444534065138e-05, "loss": 0.2103, "step": 16031, "teacher_loss": 0.16824448108673096 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.2519285976886749, "learning_rate": 1.7842215268580024e-05, "loss": 0.1661, "step": 16032, "teacher_loss": 0.1565803438425064 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.17461583018302917, "learning_rate": 1.783998513134519e-05, "loss": 0.1815, "step": 16033, "teacher_loss": 0.18231001496315002 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.7595190405845642, "learning_rate": 1.7837754928998016e-05, "loss": 0.3799, "step": 16034, "teacher_loss": 0.33769965171813965 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.19194740056991577, "learning_rate": 1.783552466158963e-05, "loss": 0.1847, "step": 16035, "teacher_loss": 0.18393424153327942 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.497952401638031, "learning_rate": 1.7833294329171163e-05, "loss": 0.3125, "step": 16036, "teacher_loss": 0.29184794425964355 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.41504085063934326, "learning_rate": 1.783106393179375e-05, "loss": 0.3834, "step": 16037, "teacher_loss": 0.37989360094070435 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.4221075773239136, "learning_rate": 1.782883346950853e-05, "loss": 0.2627, "step": 16038, "teacher_loss": 0.24500735104084015 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 1.0650999546051025, "learning_rate": 1.7826602942366643e-05, "loss": 0.2836, "step": 16039, "teacher_loss": 0.1967909038066864 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.4444456398487091, "learning_rate": 1.7824372350419223e-05, "loss": 0.3626, "step": 16040, "teacher_loss": 0.35346710681915283 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.5500121116638184, "learning_rate": 1.782214169371741e-05, "loss": 0.2662, "step": 16041, "teacher_loss": 0.23464587330818176 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.29292920231819153, "learning_rate": 1.7819910972312355e-05, "loss": 0.25, "step": 16042, "teacher_loss": 0.24519295990467072 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.2686951160430908, "learning_rate": 1.7817680186255195e-05, "loss": 0.2027, "step": 16043, "teacher_loss": 0.19531291723251343 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.34421679377555847, "learning_rate": 1.781544933559707e-05, "loss": 0.3148, "step": 16044, "teacher_loss": 0.3114929795265198 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.4632073938846588, "learning_rate": 1.781321842038914e-05, "loss": 0.2803, "step": 16045, "teacher_loss": 0.2600013017654419 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.7540364861488342, "learning_rate": 1.7810987440682545e-05, "loss": 0.2989, "step": 16046, "teacher_loss": 0.24829471111297607 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.43955904245376587, "learning_rate": 1.7808756396528433e-05, "loss": 0.2104, "step": 16047, "teacher_loss": 0.18499056994915009 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.34506499767303467, "learning_rate": 1.780652528797796e-05, "loss": 0.2615, "step": 16048, "teacher_loss": 0.25218769907951355 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.5236640572547913, "learning_rate": 1.7804294115082272e-05, "loss": 0.2305, "step": 16049, "teacher_loss": 0.19791346788406372 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.7024568319320679, "learning_rate": 1.7802062877892532e-05, "loss": 0.253, "step": 16050, "teacher_loss": 0.20302355289459229 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.45694369077682495, "learning_rate": 1.779983157645989e-05, "loss": 0.2481, "step": 16051, "teacher_loss": 0.22492089867591858 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.815610408782959, "learning_rate": 1.7797600210835507e-05, "loss": 0.4499, "step": 16052, "teacher_loss": 0.4092215895652771 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.32092657685279846, "learning_rate": 1.7795368781070536e-05, "loss": 0.1856, "step": 16053, "teacher_loss": 0.17059555649757385 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.3048647344112396, "learning_rate": 1.779313728721614e-05, "loss": 0.1897, "step": 16054, "teacher_loss": 0.17687265574932098 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.4967675805091858, "learning_rate": 1.7790905729323482e-05, "loss": 0.256, "step": 16055, "teacher_loss": 0.2292841374874115 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.19485768675804138, "learning_rate": 1.778867410744372e-05, "loss": 0.2138, "step": 16056, "teacher_loss": 0.2158549427986145 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.5421093106269836, "learning_rate": 1.778644242162802e-05, "loss": 0.2159, "step": 16057, "teacher_loss": 0.17962437868118286 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.9199850559234619, "learning_rate": 1.778421067192756e-05, "loss": 0.4391, "step": 16058, "teacher_loss": 0.38563990592956543 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.18647682666778564, "learning_rate": 1.7781978858393485e-05, "loss": 0.1842, "step": 16059, "teacher_loss": 0.18389815092086792 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.5808572769165039, "learning_rate": 1.777974698107698e-05, "loss": 0.4089, "step": 16060, "teacher_loss": 0.38982248306274414 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.2736385762691498, "learning_rate": 1.7777515040029213e-05, "loss": 0.1812, "step": 16061, "teacher_loss": 0.17098172008991241 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 1.1857458353042603, "learning_rate": 1.777528303530135e-05, "loss": 0.3164, "step": 16062, "teacher_loss": 0.21980780363082886 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.25786224007606506, "learning_rate": 1.777305096694457e-05, "loss": 0.3953, "step": 16063, "teacher_loss": 0.4106091856956482 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.4659283757209778, "learning_rate": 1.7770818835010045e-05, "loss": 0.304, "step": 16064, "teacher_loss": 0.28598618507385254 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.3318653702735901, "learning_rate": 1.7768586639548947e-05, "loss": 0.2172, "step": 16065, "teacher_loss": 0.20446240901947021 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.6898326873779297, "learning_rate": 1.7766354380612463e-05, "loss": 0.2831, "step": 16066, "teacher_loss": 0.23785468935966492 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.7938921451568604, "learning_rate": 1.776412205825177e-05, "loss": 0.4072, "step": 16067, "teacher_loss": 0.3641902804374695 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 1.0556951761245728, "learning_rate": 1.7761889672518038e-05, "loss": 0.4154, "step": 16068, "teacher_loss": 0.3442896604537964 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.3701055645942688, "learning_rate": 1.775965722346246e-05, "loss": 0.2603, "step": 16069, "teacher_loss": 0.24807268381118774 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.477749764919281, "learning_rate": 1.7757424711136217e-05, "loss": 0.2776, "step": 16070, "teacher_loss": 0.2553953528404236 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.6850447058677673, "learning_rate": 1.775519213559049e-05, "loss": 0.477, "step": 16071, "teacher_loss": 0.45388537645339966 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.2957621216773987, "learning_rate": 1.775295949687647e-05, "loss": 0.224, "step": 16072, "teacher_loss": 0.21604913473129272 }, { "compression_loss": 0.0, "epoch": 2.9, "label_loss": 0.4405668377876282, "learning_rate": 1.7750726795045345e-05, "loss": 0.2589, "step": 16073, "teacher_loss": 0.23867294192314148 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.3325265347957611, "learning_rate": 1.7748494030148296e-05, "loss": 0.292, "step": 16074, "teacher_loss": 0.28747135400772095 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.3731164038181305, "learning_rate": 1.7746261202236522e-05, "loss": 0.3192, "step": 16075, "teacher_loss": 0.3132528066635132 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.8894391655921936, "learning_rate": 1.7744028311361212e-05, "loss": 0.3906, "step": 16076, "teacher_loss": 0.3351427912712097 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.22205477952957153, "learning_rate": 1.7741795357573556e-05, "loss": 0.1731, "step": 16077, "teacher_loss": 0.16768944263458252 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.42038851976394653, "learning_rate": 1.7739562340924762e-05, "loss": 0.2521, "step": 16078, "teacher_loss": 0.23342561721801758 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.568663477897644, "learning_rate": 1.773732926146601e-05, "loss": 0.3841, "step": 16079, "teacher_loss": 0.36354494094848633 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.8479280471801758, "learning_rate": 1.7735096119248505e-05, "loss": 0.3881, "step": 16080, "teacher_loss": 0.3370633125305176 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.36185094714164734, "learning_rate": 1.7732862914323447e-05, "loss": 0.292, "step": 16081, "teacher_loss": 0.28420397639274597 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.37595734000205994, "learning_rate": 1.7730629646742038e-05, "loss": 0.2051, "step": 16082, "teacher_loss": 0.18616746366024017 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.33760541677474976, "learning_rate": 1.772839631655548e-05, "loss": 0.2243, "step": 16083, "teacher_loss": 0.21172216534614563 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.3132728636264801, "learning_rate": 1.772616292381497e-05, "loss": 0.233, "step": 16084, "teacher_loss": 0.2240440845489502 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.5714585781097412, "learning_rate": 1.772392946857172e-05, "loss": 0.3657, "step": 16085, "teacher_loss": 0.34282103180885315 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.30047088861465454, "learning_rate": 1.7721695950876937e-05, "loss": 0.2521, "step": 16086, "teacher_loss": 0.246731236577034 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.5033674240112305, "learning_rate": 1.7719462370781816e-05, "loss": 0.2638, "step": 16087, "teacher_loss": 0.23716311156749725 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.25135719776153564, "learning_rate": 1.7717228728337587e-05, "loss": 0.2415, "step": 16088, "teacher_loss": 0.24039265513420105 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.28981077671051025, "learning_rate": 1.7714995023595443e-05, "loss": 0.1837, "step": 16089, "teacher_loss": 0.17186924815177917 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.41580283641815186, "learning_rate": 1.7712761256606603e-05, "loss": 0.2221, "step": 16090, "teacher_loss": 0.20057857036590576 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.31485021114349365, "learning_rate": 1.7710527427422285e-05, "loss": 0.2584, "step": 16091, "teacher_loss": 0.252141535282135 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.5772066712379456, "learning_rate": 1.7708293536093697e-05, "loss": 0.4929, "step": 16092, "teacher_loss": 0.4835689067840576 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.46490949392318726, "learning_rate": 1.7706059582672058e-05, "loss": 0.2385, "step": 16093, "teacher_loss": 0.21330301463603973 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.2244323343038559, "learning_rate": 1.770382556720859e-05, "loss": 0.2339, "step": 16094, "teacher_loss": 0.23494362831115723 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.3616204261779785, "learning_rate": 1.7701591489754503e-05, "loss": 0.2179, "step": 16095, "teacher_loss": 0.20198285579681396 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.43935900926589966, "learning_rate": 1.7699357350361024e-05, "loss": 0.4883, "step": 16096, "teacher_loss": 0.4937725067138672 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.8279876708984375, "learning_rate": 1.7697123149079374e-05, "loss": 0.3442, "step": 16097, "teacher_loss": 0.29044121503829956 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.5152071118354797, "learning_rate": 1.7694888885960782e-05, "loss": 0.3062, "step": 16098, "teacher_loss": 0.28297024965286255 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.10091327875852585, "learning_rate": 1.769265456105646e-05, "loss": 0.1812, "step": 16099, "teacher_loss": 0.19010859727859497 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.9394775032997131, "learning_rate": 1.7690420174417644e-05, "loss": 0.3604, "step": 16100, "teacher_loss": 0.29601871967315674 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.6475400924682617, "learning_rate": 1.7688185726095562e-05, "loss": 0.2933, "step": 16101, "teacher_loss": 0.2539290487766266 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.8079171180725098, "learning_rate": 1.7685951216141438e-05, "loss": 0.5045, "step": 16102, "teacher_loss": 0.47076326608657837 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.261150985956192, "learning_rate": 1.7683716644606508e-05, "loss": 0.1593, "step": 16103, "teacher_loss": 0.1479683816432953 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.2505064606666565, "learning_rate": 1.7681482011541997e-05, "loss": 0.2793, "step": 16104, "teacher_loss": 0.28248023986816406 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.6228090524673462, "learning_rate": 1.7679247316999153e-05, "loss": 0.4014, "step": 16105, "teacher_loss": 0.3767520487308502 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.6806678175926208, "learning_rate": 1.7677012561029193e-05, "loss": 0.2471, "step": 16106, "teacher_loss": 0.1989212930202484 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.47563856840133667, "learning_rate": 1.767477774368336e-05, "loss": 0.4794, "step": 16107, "teacher_loss": 0.4798167943954468 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.7107974290847778, "learning_rate": 1.7672542865012898e-05, "loss": 0.3072, "step": 16108, "teacher_loss": 0.26232224702835083 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.5897939801216125, "learning_rate": 1.7670307925069037e-05, "loss": 0.2354, "step": 16109, "teacher_loss": 0.19600075483322144 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.5974275469779968, "learning_rate": 1.766807292390302e-05, "loss": 0.3761, "step": 16110, "teacher_loss": 0.35150012373924255 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.596744179725647, "learning_rate": 1.7665837861566095e-05, "loss": 0.2724, "step": 16111, "teacher_loss": 0.23638194799423218 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.24147331714630127, "learning_rate": 1.7663602738109498e-05, "loss": 0.2063, "step": 16112, "teacher_loss": 0.20243993401527405 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.12071894109249115, "learning_rate": 1.7661367553584474e-05, "loss": 0.1478, "step": 16113, "teacher_loss": 0.15078537166118622 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.499862939119339, "learning_rate": 1.7659132308042277e-05, "loss": 0.232, "step": 16114, "teacher_loss": 0.20223723351955414 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.3025949001312256, "learning_rate": 1.765689700153414e-05, "loss": 0.1934, "step": 16115, "teacher_loss": 0.18127796053886414 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.464596688747406, "learning_rate": 1.765466163411133e-05, "loss": 0.2866, "step": 16116, "teacher_loss": 0.26687300205230713 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.6942428350448608, "learning_rate": 1.765242620582508e-05, "loss": 0.3075, "step": 16117, "teacher_loss": 0.2645556926727295 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.9362626075744629, "learning_rate": 1.7650190716726654e-05, "loss": 0.3366, "step": 16118, "teacher_loss": 0.27001315355300903 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.35826563835144043, "learning_rate": 1.7647955166867298e-05, "loss": 0.2517, "step": 16119, "teacher_loss": 0.23980633914470673 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.41671645641326904, "learning_rate": 1.764571955629827e-05, "loss": 0.3084, "step": 16120, "teacher_loss": 0.29631686210632324 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.5379695296287537, "learning_rate": 1.7643483885070827e-05, "loss": 0.3412, "step": 16121, "teacher_loss": 0.31934288144111633 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.29966095089912415, "learning_rate": 1.7641248153236223e-05, "loss": 0.268, "step": 16122, "teacher_loss": 0.2645174264907837 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.2360333800315857, "learning_rate": 1.7639012360845717e-05, "loss": 0.2522, "step": 16123, "teacher_loss": 0.2539408802986145 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.3037000894546509, "learning_rate": 1.763677650795057e-05, "loss": 0.3281, "step": 16124, "teacher_loss": 0.330766499042511 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.646991491317749, "learning_rate": 1.763454059460204e-05, "loss": 0.3752, "step": 16125, "teacher_loss": 0.3450066149234772 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.4843207001686096, "learning_rate": 1.7632304620851398e-05, "loss": 0.2651, "step": 16126, "teacher_loss": 0.24073311686515808 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.28041505813598633, "learning_rate": 1.76300685867499e-05, "loss": 0.2381, "step": 16127, "teacher_loss": 0.23344899713993073 }, { "compression_loss": 0.0, "epoch": 2.91, "label_loss": 0.3682040572166443, "learning_rate": 1.762783249234882e-05, "loss": 0.232, "step": 16128, "teacher_loss": 0.21683257818222046 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.26198697090148926, "learning_rate": 1.762559633769941e-05, "loss": 0.277, "step": 16129, "teacher_loss": 0.27867424488067627 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.19668696820735931, "learning_rate": 1.7623360122852953e-05, "loss": 0.2147, "step": 16130, "teacher_loss": 0.21672853827476501 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.21880286931991577, "learning_rate": 1.7621123847860715e-05, "loss": 0.2192, "step": 16131, "teacher_loss": 0.21927449107170105 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.34071192145347595, "learning_rate": 1.7618887512773966e-05, "loss": 0.2158, "step": 16132, "teacher_loss": 0.20189067721366882 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.8398120403289795, "learning_rate": 1.761665111764398e-05, "loss": 0.3458, "step": 16133, "teacher_loss": 0.2908574342727661 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.5805809497833252, "learning_rate": 1.761441466252202e-05, "loss": 0.2841, "step": 16134, "teacher_loss": 0.2511390447616577 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.24854423105716705, "learning_rate": 1.7612178147459377e-05, "loss": 0.2268, "step": 16135, "teacher_loss": 0.224374458193779 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.9784622192382812, "learning_rate": 1.760994157250732e-05, "loss": 0.3885, "step": 16136, "teacher_loss": 0.32300254702568054 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.3983399271965027, "learning_rate": 1.7607704937717123e-05, "loss": 0.3277, "step": 16137, "teacher_loss": 0.31987595558166504 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.4180619716644287, "learning_rate": 1.7605468243140074e-05, "loss": 0.3356, "step": 16138, "teacher_loss": 0.3264451026916504 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.7414376735687256, "learning_rate": 1.7603231488827453e-05, "loss": 0.4018, "step": 16139, "teacher_loss": 0.36406221985816956 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.3331107497215271, "learning_rate": 1.7600994674830536e-05, "loss": 0.2841, "step": 16140, "teacher_loss": 0.2786262035369873 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.44286176562309265, "learning_rate": 1.7598757801200612e-05, "loss": 0.3226, "step": 16141, "teacher_loss": 0.309234619140625 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.20023499429225922, "learning_rate": 1.7596520867988957e-05, "loss": 0.2079, "step": 16142, "teacher_loss": 0.20873233675956726 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.532241940498352, "learning_rate": 1.7594283875246868e-05, "loss": 0.3196, "step": 16143, "teacher_loss": 0.2960240840911865 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.7308896780014038, "learning_rate": 1.7592046823025627e-05, "loss": 0.406, "step": 16144, "teacher_loss": 0.36990946531295776 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.29296427965164185, "learning_rate": 1.758980971137652e-05, "loss": 0.3111, "step": 16145, "teacher_loss": 0.3130940794944763 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.6873419880867004, "learning_rate": 1.758757254035085e-05, "loss": 0.2975, "step": 16146, "teacher_loss": 0.2541968524456024 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.5063971877098083, "learning_rate": 1.758533530999989e-05, "loss": 0.3331, "step": 16147, "teacher_loss": 0.3137899339199066 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.43581122159957886, "learning_rate": 1.7583098020374946e-05, "loss": 0.2708, "step": 16148, "teacher_loss": 0.25241971015930176 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.6329385042190552, "learning_rate": 1.758086067152731e-05, "loss": 0.2483, "step": 16149, "teacher_loss": 0.20553848147392273 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.22338402271270752, "learning_rate": 1.7578623263508275e-05, "loss": 0.2685, "step": 16150, "teacher_loss": 0.2735450565814972 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.30802640318870544, "learning_rate": 1.7576385796369144e-05, "loss": 0.1825, "step": 16151, "teacher_loss": 0.16849884390830994 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.1767214834690094, "learning_rate": 1.7574148270161206e-05, "loss": 0.2979, "step": 16152, "teacher_loss": 0.31136196851730347 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.3767133951187134, "learning_rate": 1.757191068493577e-05, "loss": 0.2479, "step": 16153, "teacher_loss": 0.23358631134033203 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.7234150171279907, "learning_rate": 1.7569673040744126e-05, "loss": 0.4466, "step": 16154, "teacher_loss": 0.41580742597579956 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.4831986427307129, "learning_rate": 1.7567435337637588e-05, "loss": 0.2167, "step": 16155, "teacher_loss": 0.1871432214975357 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.3983255624771118, "learning_rate": 1.7565197575667458e-05, "loss": 0.2861, "step": 16156, "teacher_loss": 0.27364423871040344 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.5769867897033691, "learning_rate": 1.7562959754885035e-05, "loss": 0.4258, "step": 16157, "teacher_loss": 0.409015417098999 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.6339594125747681, "learning_rate": 1.7560721875341632e-05, "loss": 0.445, "step": 16158, "teacher_loss": 0.42395538091659546 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.26187747716903687, "learning_rate": 1.7558483937088546e-05, "loss": 0.1517, "step": 16159, "teacher_loss": 0.1394776701927185 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.6097872257232666, "learning_rate": 1.75562459401771e-05, "loss": 0.2761, "step": 16160, "teacher_loss": 0.23902052640914917 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.6100024580955505, "learning_rate": 1.7554007884658602e-05, "loss": 0.2712, "step": 16161, "teacher_loss": 0.23352715373039246 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.497979998588562, "learning_rate": 1.7551769770584357e-05, "loss": 0.2614, "step": 16162, "teacher_loss": 0.23510530591011047 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.9240494966506958, "learning_rate": 1.7549531598005678e-05, "loss": 0.2771, "step": 16163, "teacher_loss": 0.20525413751602173 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.31376272439956665, "learning_rate": 1.7547293366973885e-05, "loss": 0.2234, "step": 16164, "teacher_loss": 0.21331624686717987 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.45483916997909546, "learning_rate": 1.75450550775403e-05, "loss": 0.2459, "step": 16165, "teacher_loss": 0.22266817092895508 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.442954421043396, "learning_rate": 1.7542816729756226e-05, "loss": 0.2633, "step": 16166, "teacher_loss": 0.24332848191261292 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.4294344186782837, "learning_rate": 1.7540578323672986e-05, "loss": 0.352, "step": 16167, "teacher_loss": 0.34341752529144287 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.39837056398391724, "learning_rate": 1.7538339859341902e-05, "loss": 0.2946, "step": 16168, "teacher_loss": 0.28304722905158997 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.23290793597698212, "learning_rate": 1.7536101336814302e-05, "loss": 0.1838, "step": 16169, "teacher_loss": 0.17831340432167053 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.45772433280944824, "learning_rate": 1.7533862756141495e-05, "loss": 0.2702, "step": 16170, "teacher_loss": 0.24932971596717834 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.15961036086082458, "learning_rate": 1.7531624117374818e-05, "loss": 0.2181, "step": 16171, "teacher_loss": 0.22460046410560608 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.5282003283500671, "learning_rate": 1.7529385420565588e-05, "loss": 0.2384, "step": 16172, "teacher_loss": 0.2061898559331894 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.46457311511039734, "learning_rate": 1.752714666576513e-05, "loss": 0.3658, "step": 16173, "teacher_loss": 0.3548293113708496 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.48269230127334595, "learning_rate": 1.7524907853024782e-05, "loss": 0.3196, "step": 16174, "teacher_loss": 0.3015066981315613 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.4050183892250061, "learning_rate": 1.7522668982395863e-05, "loss": 0.2519, "step": 16175, "teacher_loss": 0.23483791947364807 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.16208238899707794, "learning_rate": 1.752043005392971e-05, "loss": 0.1718, "step": 16176, "teacher_loss": 0.1729292869567871 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.47579246759414673, "learning_rate": 1.7518191067677648e-05, "loss": 0.28, "step": 16177, "teacher_loss": 0.2582804560661316 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.33389919996261597, "learning_rate": 1.7515952023691022e-05, "loss": 0.2996, "step": 16178, "teacher_loss": 0.2958042621612549 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.36439526081085205, "learning_rate": 1.7513712922021154e-05, "loss": 0.2229, "step": 16179, "teacher_loss": 0.20723237097263336 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.36983248591423035, "learning_rate": 1.7511473762719382e-05, "loss": 0.3186, "step": 16180, "teacher_loss": 0.3128817677497864 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.30682337284088135, "learning_rate": 1.7509234545837052e-05, "loss": 0.1638, "step": 16181, "teacher_loss": 0.1478997766971588 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.3992833197116852, "learning_rate": 1.7506995271425494e-05, "loss": 0.1854, "step": 16182, "teacher_loss": 0.16160094738006592 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.4172820746898651, "learning_rate": 1.7504755939536056e-05, "loss": 0.2763, "step": 16183, "teacher_loss": 0.2606709897518158 }, { "compression_loss": 0.0, "epoch": 2.92, "label_loss": 0.3798571228981018, "learning_rate": 1.750251655022007e-05, "loss": 0.3021, "step": 16184, "teacher_loss": 0.2934991121292114 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.46387526392936707, "learning_rate": 1.7500277103528883e-05, "loss": 0.1727, "step": 16185, "teacher_loss": 0.1403772234916687 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.7701764106750488, "learning_rate": 1.749803759951384e-05, "loss": 0.446, "step": 16186, "teacher_loss": 0.4099322557449341 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 1.0217154026031494, "learning_rate": 1.749579803822628e-05, "loss": 1.0812, "step": 16187, "teacher_loss": 1.0877916812896729 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.711355447769165, "learning_rate": 1.749355841971755e-05, "loss": 0.2979, "step": 16188, "teacher_loss": 0.2519662380218506 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.5177268385887146, "learning_rate": 1.7491318744039012e-05, "loss": 0.2221, "step": 16189, "teacher_loss": 0.1891995370388031 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.6185563802719116, "learning_rate": 1.7489079011242002e-05, "loss": 0.3184, "step": 16190, "teacher_loss": 0.2850569486618042 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.5217098593711853, "learning_rate": 1.748683922137787e-05, "loss": 0.2274, "step": 16191, "teacher_loss": 0.19475026428699493 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.1249813660979271, "learning_rate": 1.748459937449797e-05, "loss": 0.1696, "step": 16192, "teacher_loss": 0.17457376420497894 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.6198744773864746, "learning_rate": 1.7482359470653656e-05, "loss": 0.325, "step": 16193, "teacher_loss": 0.2922342121601105 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 1.0326619148254395, "learning_rate": 1.7480119509896285e-05, "loss": 0.4159, "step": 16194, "teacher_loss": 0.3473173975944519 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.5046898126602173, "learning_rate": 1.747787949227721e-05, "loss": 0.2326, "step": 16195, "teacher_loss": 0.2023729830980301 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.21299424767494202, "learning_rate": 1.7475639417847783e-05, "loss": 0.1895, "step": 16196, "teacher_loss": 0.18686988949775696 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.2575538754463196, "learning_rate": 1.7473399286659366e-05, "loss": 0.1551, "step": 16197, "teacher_loss": 0.14368784427642822 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.9524407982826233, "learning_rate": 1.747115909876332e-05, "loss": 0.2605, "step": 16198, "teacher_loss": 0.18356439471244812 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.25883394479751587, "learning_rate": 1.746891885421101e-05, "loss": 0.2105, "step": 16199, "teacher_loss": 0.20517179369926453 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.5914641618728638, "learning_rate": 1.7466678553053788e-05, "loss": 0.4274, "step": 16200, "teacher_loss": 0.40914636850357056 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.33066707849502563, "learning_rate": 1.7464438195343025e-05, "loss": 0.254, "step": 16201, "teacher_loss": 0.24547211825847626 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.38130831718444824, "learning_rate": 1.746219778113008e-05, "loss": 0.2571, "step": 16202, "teacher_loss": 0.24333316087722778 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.34671711921691895, "learning_rate": 1.7459957310466325e-05, "loss": 0.1723, "step": 16203, "teacher_loss": 0.15288150310516357 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.41479361057281494, "learning_rate": 1.7457716783403122e-05, "loss": 0.2055, "step": 16204, "teacher_loss": 0.18227511644363403 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 1.1521586179733276, "learning_rate": 1.7455476199991838e-05, "loss": 0.5271, "step": 16205, "teacher_loss": 0.45759499073028564 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.39160391688346863, "learning_rate": 1.7453235560283855e-05, "loss": 0.2155, "step": 16206, "teacher_loss": 0.19596587121486664 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.6835144758224487, "learning_rate": 1.7450994864330532e-05, "loss": 0.2674, "step": 16207, "teacher_loss": 0.22113800048828125 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.5899465680122375, "learning_rate": 1.744875411218324e-05, "loss": 0.2991, "step": 16208, "teacher_loss": 0.26673072576522827 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.24086038768291473, "learning_rate": 1.7446513303893367e-05, "loss": 0.1614, "step": 16209, "teacher_loss": 0.15262514352798462 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.4065621793270111, "learning_rate": 1.7444272439512272e-05, "loss": 0.2729, "step": 16210, "teacher_loss": 0.25803348422050476 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.4345285892486572, "learning_rate": 1.7442031519091343e-05, "loss": 0.2669, "step": 16211, "teacher_loss": 0.24826453626155853 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.3786526322364807, "learning_rate": 1.743979054268195e-05, "loss": 0.3106, "step": 16212, "teacher_loss": 0.3030116558074951 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.3400603234767914, "learning_rate": 1.7437549510335475e-05, "loss": 0.262, "step": 16213, "teacher_loss": 0.25330159068107605 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.3885974884033203, "learning_rate": 1.7435308422103298e-05, "loss": 0.2524, "step": 16214, "teacher_loss": 0.2372906506061554 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.3973779082298279, "learning_rate": 1.74330672780368e-05, "loss": 0.3326, "step": 16215, "teacher_loss": 0.3254256248474121 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.38280054926872253, "learning_rate": 1.743082607818737e-05, "loss": 0.1683, "step": 16216, "teacher_loss": 0.14444591104984283 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 1.3200329542160034, "learning_rate": 1.7428584822606378e-05, "loss": 0.4854, "step": 16217, "teacher_loss": 0.39266103506088257 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.2061062455177307, "learning_rate": 1.7426343511345215e-05, "loss": 0.1861, "step": 16218, "teacher_loss": 0.18386892974376678 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.8778562545776367, "learning_rate": 1.7424102144455276e-05, "loss": 0.5423, "step": 16219, "teacher_loss": 0.5050198435783386 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.4748685956001282, "learning_rate": 1.7421860721987944e-05, "loss": 0.2362, "step": 16220, "teacher_loss": 0.209650918841362 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.41377168893814087, "learning_rate": 1.7419619243994607e-05, "loss": 0.1578, "step": 16221, "teacher_loss": 0.12932580709457397 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.3093533515930176, "learning_rate": 1.7417377710526646e-05, "loss": 0.1763, "step": 16222, "teacher_loss": 0.16154059767723083 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.6148918271064758, "learning_rate": 1.741513612163547e-05, "loss": 0.3033, "step": 16223, "teacher_loss": 0.2687157988548279 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.344913125038147, "learning_rate": 1.7412894477372462e-05, "loss": 0.2304, "step": 16224, "teacher_loss": 0.21768099069595337 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.25588029623031616, "learning_rate": 1.7410652777789018e-05, "loss": 0.1551, "step": 16225, "teacher_loss": 0.1438492387533188 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.4032304286956787, "learning_rate": 1.7408411022936535e-05, "loss": 0.3067, "step": 16226, "teacher_loss": 0.296006977558136 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.4316675662994385, "learning_rate": 1.7406169212866405e-05, "loss": 0.2109, "step": 16227, "teacher_loss": 0.1864151656627655 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.24068279564380646, "learning_rate": 1.7403927347630028e-05, "loss": 0.1561, "step": 16228, "teacher_loss": 0.14674007892608643 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.7074235677719116, "learning_rate": 1.740168542727881e-05, "loss": 0.3919, "step": 16229, "teacher_loss": 0.3567984104156494 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.6305174231529236, "learning_rate": 1.7399443451864138e-05, "loss": 0.2308, "step": 16230, "teacher_loss": 0.18639595806598663 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.6713123321533203, "learning_rate": 1.739720142143743e-05, "loss": 0.3527, "step": 16231, "teacher_loss": 0.31731051206588745 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.69814532995224, "learning_rate": 1.7394959336050076e-05, "loss": 0.333, "step": 16232, "teacher_loss": 0.2923782169818878 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.5767325162887573, "learning_rate": 1.7392717195753482e-05, "loss": 0.299, "step": 16233, "teacher_loss": 0.2681657671928406 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.49995118379592896, "learning_rate": 1.7390475000599063e-05, "loss": 0.25, "step": 16234, "teacher_loss": 0.22225210070610046 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.3751608729362488, "learning_rate": 1.7388232750638214e-05, "loss": 0.2718, "step": 16235, "teacher_loss": 0.2603573203086853 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.644255518913269, "learning_rate": 1.7385990445922347e-05, "loss": 0.5252, "step": 16236, "teacher_loss": 0.5119647979736328 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.32858479022979736, "learning_rate": 1.738374808650288e-05, "loss": 0.1897, "step": 16237, "teacher_loss": 0.17430666089057922 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.4562036395072937, "learning_rate": 1.738150567243121e-05, "loss": 0.2564, "step": 16238, "teacher_loss": 0.2341517210006714 }, { "compression_loss": 0.0, "epoch": 2.93, "label_loss": 0.25114375352859497, "learning_rate": 1.737926320375876e-05, "loss": 0.1806, "step": 16239, "teacher_loss": 0.1727811098098755 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.378387451171875, "learning_rate": 1.737702068053693e-05, "loss": 0.2352, "step": 16240, "teacher_loss": 0.21928784251213074 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.43779659271240234, "learning_rate": 1.737477810281715e-05, "loss": 0.3617, "step": 16241, "teacher_loss": 0.3532874584197998 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.4488775134086609, "learning_rate": 1.7372535470650827e-05, "loss": 0.3713, "step": 16242, "teacher_loss": 0.36263734102249146 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.3894228935241699, "learning_rate": 1.7370292784089377e-05, "loss": 0.2033, "step": 16243, "teacher_loss": 0.1826312392950058 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.4521743655204773, "learning_rate": 1.736805004318422e-05, "loss": 0.2173, "step": 16244, "teacher_loss": 0.1911720335483551 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5785133838653564, "learning_rate": 1.7365807247986776e-05, "loss": 0.3274, "step": 16245, "teacher_loss": 0.2995510697364807 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5688097476959229, "learning_rate": 1.7363564398548466e-05, "loss": 0.2708, "step": 16246, "teacher_loss": 0.23773327469825745 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.3906450569629669, "learning_rate": 1.736132149492071e-05, "loss": 0.2493, "step": 16247, "teacher_loss": 0.23362880945205688 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5007302761077881, "learning_rate": 1.735907853715493e-05, "loss": 0.2363, "step": 16248, "teacher_loss": 0.20691628754138947 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.4081491529941559, "learning_rate": 1.735683552530255e-05, "loss": 0.1904, "step": 16249, "teacher_loss": 0.1661631464958191 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.861845850944519, "learning_rate": 1.7354592459415003e-05, "loss": 0.2617, "step": 16250, "teacher_loss": 0.19499847292900085 }, { "epoch": 2.94, "eval_exact_match": 79.47965941343425, "eval_f1": 87.22782178210471, "step": 16250 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.8419839143753052, "learning_rate": 1.735234933954371e-05, "loss": 0.3644, "step": 16251, "teacher_loss": 0.3113023638725281 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.3901873826980591, "learning_rate": 1.7350106165740097e-05, "loss": 0.2734, "step": 16252, "teacher_loss": 0.26039940118789673 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.38331127166748047, "learning_rate": 1.7347862938055595e-05, "loss": 0.27, "step": 16253, "teacher_loss": 0.2573677897453308 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5922526121139526, "learning_rate": 1.734561965654164e-05, "loss": 0.2926, "step": 16254, "teacher_loss": 0.25933918356895447 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.6473124027252197, "learning_rate": 1.7343376321249654e-05, "loss": 0.284, "step": 16255, "teacher_loss": 0.2436695694923401 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.4468892812728882, "learning_rate": 1.7341132932231075e-05, "loss": 0.226, "step": 16256, "teacher_loss": 0.20140531659126282 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.876840353012085, "learning_rate": 1.7338889489537336e-05, "loss": 0.4509, "step": 16257, "teacher_loss": 0.4036021828651428 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.7752852439880371, "learning_rate": 1.7336645993219876e-05, "loss": 0.3929, "step": 16258, "teacher_loss": 0.3503738045692444 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.25751954317092896, "learning_rate": 1.7334402443330126e-05, "loss": 0.2534, "step": 16259, "teacher_loss": 0.25288885831832886 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.1455126702785492, "learning_rate": 1.733215883991953e-05, "loss": 0.1277, "step": 16260, "teacher_loss": 0.12575474381446838 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.42787203192710876, "learning_rate": 1.7329915183039523e-05, "loss": 0.203, "step": 16261, "teacher_loss": 0.17806656658649445 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.3982592821121216, "learning_rate": 1.7327671472741547e-05, "loss": 0.2261, "step": 16262, "teacher_loss": 0.20698818564414978 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.3402353525161743, "learning_rate": 1.732542770907704e-05, "loss": 0.2395, "step": 16263, "teacher_loss": 0.22830656170845032 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5215482711791992, "learning_rate": 1.7323183892097455e-05, "loss": 0.2055, "step": 16264, "teacher_loss": 0.1703799068927765 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.87959885597229, "learning_rate": 1.7320940021854217e-05, "loss": 0.3469, "step": 16265, "teacher_loss": 0.2877114415168762 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.34240660071372986, "learning_rate": 1.731869609839879e-05, "loss": 0.2189, "step": 16266, "teacher_loss": 0.20512527227401733 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.39977091550827026, "learning_rate": 1.7316452121782614e-05, "loss": 0.2623, "step": 16267, "teacher_loss": 0.24700471758842468 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.23541511595249176, "learning_rate": 1.7314208092057133e-05, "loss": 0.1834, "step": 16268, "teacher_loss": 0.17763873934745789 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.6094462871551514, "learning_rate": 1.7311964009273802e-05, "loss": 0.2581, "step": 16269, "teacher_loss": 0.21907475590705872 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.3922407031059265, "learning_rate": 1.7309719873484065e-05, "loss": 0.2978, "step": 16270, "teacher_loss": 0.287272572517395 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.3889186382293701, "learning_rate": 1.730747568473938e-05, "loss": 0.2678, "step": 16271, "teacher_loss": 0.25430089235305786 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.7005231380462646, "learning_rate": 1.730523144309119e-05, "loss": 0.2403, "step": 16272, "teacher_loss": 0.189142107963562 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.6324283480644226, "learning_rate": 1.7302987148590956e-05, "loss": 0.3775, "step": 16273, "teacher_loss": 0.34919309616088867 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.06284971535205841, "learning_rate": 1.7300742801290132e-05, "loss": 0.1635, "step": 16274, "teacher_loss": 0.17464952170848846 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5814729928970337, "learning_rate": 1.729849840124018e-05, "loss": 0.3112, "step": 16275, "teacher_loss": 0.28117141127586365 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5725794434547424, "learning_rate": 1.7296253948492546e-05, "loss": 0.4421, "step": 16276, "teacher_loss": 0.42760375142097473 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.2967584729194641, "learning_rate": 1.729400944309869e-05, "loss": 0.2443, "step": 16277, "teacher_loss": 0.23845678567886353 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.31133121252059937, "learning_rate": 1.7291764885110078e-05, "loss": 0.2415, "step": 16278, "teacher_loss": 0.23370006680488586 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5670042634010315, "learning_rate": 1.7289520274578167e-05, "loss": 0.2549, "step": 16279, "teacher_loss": 0.22019356489181519 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.36124199628829956, "learning_rate": 1.7287275611554425e-05, "loss": 0.2509, "step": 16280, "teacher_loss": 0.23863258957862854 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5369096994400024, "learning_rate": 1.7285030896090307e-05, "loss": 0.2824, "step": 16281, "teacher_loss": 0.25416868925094604 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.4974510669708252, "learning_rate": 1.728278612823728e-05, "loss": 0.3457, "step": 16282, "teacher_loss": 0.32883739471435547 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.4022822678089142, "learning_rate": 1.728054130804681e-05, "loss": 0.2584, "step": 16283, "teacher_loss": 0.24246428906917572 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.508563756942749, "learning_rate": 1.7278296435570372e-05, "loss": 0.2159, "step": 16284, "teacher_loss": 0.18336786329746246 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.6856218576431274, "learning_rate": 1.727605151085942e-05, "loss": 0.2553, "step": 16285, "teacher_loss": 0.2075391411781311 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.372144877910614, "learning_rate": 1.7273806533965437e-05, "loss": 0.227, "step": 16286, "teacher_loss": 0.21084558963775635 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.4844840168952942, "learning_rate": 1.7271561504939886e-05, "loss": 0.5519, "step": 16287, "teacher_loss": 0.5594152212142944 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.49252432584762573, "learning_rate": 1.7269316423834238e-05, "loss": 0.3128, "step": 16288, "teacher_loss": 0.292876660823822 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.2281474471092224, "learning_rate": 1.7267071290699973e-05, "loss": 0.196, "step": 16289, "teacher_loss": 0.1924777626991272 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.16755689680576324, "learning_rate": 1.7264826105588554e-05, "loss": 0.1538, "step": 16290, "teacher_loss": 0.15230529010295868 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5102900266647339, "learning_rate": 1.7262580868551465e-05, "loss": 0.2026, "step": 16291, "teacher_loss": 0.16836917400360107 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.49474066495895386, "learning_rate": 1.726033557964018e-05, "loss": 0.355, "step": 16292, "teacher_loss": 0.3395208716392517 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.3448638916015625, "learning_rate": 1.7258090238906177e-05, "loss": 0.2354, "step": 16293, "teacher_loss": 0.22324399650096893 }, { "compression_loss": 0.0, "epoch": 2.94, "label_loss": 0.5268954038619995, "learning_rate": 1.725584484640094e-05, "loss": 0.2394, "step": 16294, "teacher_loss": 0.20747356116771698 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.42755186557769775, "learning_rate": 1.725359940217594e-05, "loss": 0.2955, "step": 16295, "teacher_loss": 0.2808440923690796 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.6451007127761841, "learning_rate": 1.7251353906282658e-05, "loss": 0.2849, "step": 16296, "teacher_loss": 0.24487422406673431 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.3630632758140564, "learning_rate": 1.7249108358772588e-05, "loss": 0.2389, "step": 16297, "teacher_loss": 0.22506776452064514 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.3847913146018982, "learning_rate": 1.72468627596972e-05, "loss": 0.3577, "step": 16298, "teacher_loss": 0.3546708822250366 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.3812359571456909, "learning_rate": 1.7244617109107995e-05, "loss": 0.2024, "step": 16299, "teacher_loss": 0.18252722918987274 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.9431231021881104, "learning_rate": 1.7242371407056438e-05, "loss": 0.2437, "step": 16300, "teacher_loss": 0.16598841547966003 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.27630993723869324, "learning_rate": 1.7240125653594034e-05, "loss": 0.2831, "step": 16301, "teacher_loss": 0.28381648659706116 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.386505126953125, "learning_rate": 1.7237879848772267e-05, "loss": 0.1504, "step": 16302, "teacher_loss": 0.1241685301065445 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.346378892660141, "learning_rate": 1.7235633992642615e-05, "loss": 0.1937, "step": 16303, "teacher_loss": 0.1767573356628418 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.3373514413833618, "learning_rate": 1.7233388085256587e-05, "loss": 0.2207, "step": 16304, "teacher_loss": 0.2077610194683075 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.5133582949638367, "learning_rate": 1.7231142126665666e-05, "loss": 0.3009, "step": 16305, "teacher_loss": 0.27734804153442383 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.3472082018852234, "learning_rate": 1.722889611692134e-05, "loss": 0.2696, "step": 16306, "teacher_loss": 0.260980486869812 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 1.1247886419296265, "learning_rate": 1.7226650056075118e-05, "loss": 0.6099, "step": 16307, "teacher_loss": 0.552704393863678 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.42673367261886597, "learning_rate": 1.7224403944178474e-05, "loss": 0.2908, "step": 16308, "teacher_loss": 0.27568483352661133 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.5553944706916809, "learning_rate": 1.7222157781282924e-05, "loss": 0.2537, "step": 16309, "teacher_loss": 0.2202032506465912 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.5381714105606079, "learning_rate": 1.721991156743996e-05, "loss": 0.2726, "step": 16310, "teacher_loss": 0.24308007955551147 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.390067458152771, "learning_rate": 1.7217665302701075e-05, "loss": 0.3115, "step": 16311, "teacher_loss": 0.30278295278549194 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.9225806593894958, "learning_rate": 1.7215418987117774e-05, "loss": 0.3549, "step": 16312, "teacher_loss": 0.29182058572769165 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.6240533590316772, "learning_rate": 1.7213172620741556e-05, "loss": 0.253, "step": 16313, "teacher_loss": 0.21180428564548492 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.4970123767852783, "learning_rate": 1.721092620362393e-05, "loss": 0.2994, "step": 16314, "teacher_loss": 0.277407169342041 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.45748141407966614, "learning_rate": 1.720867973581639e-05, "loss": 0.2321, "step": 16315, "teacher_loss": 0.20705494284629822 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.38404062390327454, "learning_rate": 1.7206433217370452e-05, "loss": 0.196, "step": 16316, "teacher_loss": 0.17506375908851624 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.4473424553871155, "learning_rate": 1.7204186648337614e-05, "loss": 0.2612, "step": 16317, "teacher_loss": 0.2405557632446289 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.975817084312439, "learning_rate": 1.7201940028769384e-05, "loss": 0.3909, "step": 16318, "teacher_loss": 0.32595011591911316 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.286388635635376, "learning_rate": 1.719969335871727e-05, "loss": 0.328, "step": 16319, "teacher_loss": 0.33257192373275757 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.2465459406375885, "learning_rate": 1.7197446638232778e-05, "loss": 0.3276, "step": 16320, "teacher_loss": 0.3366280794143677 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.8590790033340454, "learning_rate": 1.7195199867367428e-05, "loss": 0.3324, "step": 16321, "teacher_loss": 0.273929625749588 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.2645612955093384, "learning_rate": 1.7192953046172726e-05, "loss": 0.1921, "step": 16322, "teacher_loss": 0.18405257165431976 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.22946320474147797, "learning_rate": 1.719070617470018e-05, "loss": 0.1404, "step": 16323, "teacher_loss": 0.13054664433002472 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.5966145992279053, "learning_rate": 1.7188459253001322e-05, "loss": 0.3846, "step": 16324, "teacher_loss": 0.36104580760002136 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.5277209877967834, "learning_rate": 1.718621228112764e-05, "loss": 0.2809, "step": 16325, "teacher_loss": 0.25347211956977844 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.4121139645576477, "learning_rate": 1.7183965259130676e-05, "loss": 0.2116, "step": 16326, "teacher_loss": 0.18931615352630615 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.441952645778656, "learning_rate": 1.7181718187061932e-05, "loss": 0.2549, "step": 16327, "teacher_loss": 0.23408719897270203 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.3557584285736084, "learning_rate": 1.717947106497293e-05, "loss": 0.2776, "step": 16328, "teacher_loss": 0.26895472407341003 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.35478830337524414, "learning_rate": 1.7177223892915196e-05, "loss": 0.2408, "step": 16329, "teacher_loss": 0.22809430956840515 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.6504161953926086, "learning_rate": 1.717497667094024e-05, "loss": 0.3005, "step": 16330, "teacher_loss": 0.2616676688194275 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.4984099268913269, "learning_rate": 1.7172729399099592e-05, "loss": 0.3456, "step": 16331, "teacher_loss": 0.3286668062210083 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.8170156478881836, "learning_rate": 1.7170482077444773e-05, "loss": 0.5282, "step": 16332, "teacher_loss": 0.4961158335208893 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.35584887862205505, "learning_rate": 1.7168234706027304e-05, "loss": 0.2352, "step": 16333, "teacher_loss": 0.2218213677406311 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.428577184677124, "learning_rate": 1.716598728489872e-05, "loss": 0.1892, "step": 16334, "teacher_loss": 0.1625823676586151 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.4782981276512146, "learning_rate": 1.7163739814110538e-05, "loss": 0.2786, "step": 16335, "teacher_loss": 0.25641757249832153 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.8452093005180359, "learning_rate": 1.7161492293714288e-05, "loss": 0.3128, "step": 16336, "teacher_loss": 0.2536107897758484 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.37375450134277344, "learning_rate": 1.7159244723761505e-05, "loss": 0.294, "step": 16337, "teacher_loss": 0.2851158380508423 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.45043981075286865, "learning_rate": 1.7156997104303712e-05, "loss": 0.2547, "step": 16338, "teacher_loss": 0.23295922577381134 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.7535474300384521, "learning_rate": 1.7154749435392442e-05, "loss": 0.32, "step": 16339, "teacher_loss": 0.27182990312576294 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.664901614189148, "learning_rate": 1.715250171707923e-05, "loss": 0.3492, "step": 16340, "teacher_loss": 0.3141516447067261 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.3617512285709381, "learning_rate": 1.7150253949415604e-05, "loss": 0.2934, "step": 16341, "teacher_loss": 0.2858373522758484 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.8883835673332214, "learning_rate": 1.7148006132453102e-05, "loss": 0.3001, "step": 16342, "teacher_loss": 0.23472237586975098 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.9331611394882202, "learning_rate": 1.7145758266243258e-05, "loss": 0.3861, "step": 16343, "teacher_loss": 0.32535603642463684 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.22941254079341888, "learning_rate": 1.7143510350837615e-05, "loss": 0.2551, "step": 16344, "teacher_loss": 0.25792190432548523 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.6563414335250854, "learning_rate": 1.7141262386287703e-05, "loss": 0.2796, "step": 16345, "teacher_loss": 0.23777785897254944 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.3304232954978943, "learning_rate": 1.7139014372645065e-05, "loss": 0.2058, "step": 16346, "teacher_loss": 0.1920068860054016 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.7204067707061768, "learning_rate": 1.7136766309961243e-05, "loss": 0.2529, "step": 16347, "teacher_loss": 0.20091986656188965 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.20554420351982117, "learning_rate": 1.713451819828777e-05, "loss": 0.1697, "step": 16348, "teacher_loss": 0.1657392382621765 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.39084890484809875, "learning_rate": 1.7132270037676198e-05, "loss": 0.2001, "step": 16349, "teacher_loss": 0.1789606213569641 }, { "compression_loss": 0.0, "epoch": 2.95, "label_loss": 0.1893659383058548, "learning_rate": 1.7130021828178066e-05, "loss": 0.225, "step": 16350, "teacher_loss": 0.2289954572916031 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.46683138608932495, "learning_rate": 1.712777356984492e-05, "loss": 0.3104, "step": 16351, "teacher_loss": 0.29303181171417236 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.4296014904975891, "learning_rate": 1.7125525262728308e-05, "loss": 0.2729, "step": 16352, "teacher_loss": 0.2554601728916168 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.3253156542778015, "learning_rate": 1.712327690687977e-05, "loss": 0.278, "step": 16353, "teacher_loss": 0.27270281314849854 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.16371138393878937, "learning_rate": 1.7121028502350864e-05, "loss": 0.1761, "step": 16354, "teacher_loss": 0.17745622992515564 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.18056559562683105, "learning_rate": 1.711878004919313e-05, "loss": 0.2001, "step": 16355, "teacher_loss": 0.2022608071565628 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.1301613748073578, "learning_rate": 1.7116531547458115e-05, "loss": 0.1799, "step": 16356, "teacher_loss": 0.1853765845298767 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.6129274964332581, "learning_rate": 1.711428299719739e-05, "loss": 0.2377, "step": 16357, "teacher_loss": 0.19595646858215332 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5263664126396179, "learning_rate": 1.7112034398462487e-05, "loss": 0.2359, "step": 16358, "teacher_loss": 0.20363107323646545 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.3070058822631836, "learning_rate": 1.710978575130497e-05, "loss": 0.2515, "step": 16359, "teacher_loss": 0.2453344464302063 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.4108779728412628, "learning_rate": 1.710753705577639e-05, "loss": 0.3026, "step": 16360, "teacher_loss": 0.2906193733215332 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.43675220012664795, "learning_rate": 1.7105288311928303e-05, "loss": 0.3134, "step": 16361, "teacher_loss": 0.2997252345085144 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.12622493505477905, "learning_rate": 1.7103039519812274e-05, "loss": 0.1655, "step": 16362, "teacher_loss": 0.1698666512966156 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.6012797355651855, "learning_rate": 1.7100790679479845e-05, "loss": 0.4318, "step": 16363, "teacher_loss": 0.41300714015960693 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.3440447449684143, "learning_rate": 1.709854179098259e-05, "loss": 0.3186, "step": 16364, "teacher_loss": 0.31572195887565613 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5214061737060547, "learning_rate": 1.7096292854372063e-05, "loss": 0.4038, "step": 16365, "teacher_loss": 0.39077338576316833 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.37245213985443115, "learning_rate": 1.7094043869699824e-05, "loss": 0.2563, "step": 16366, "teacher_loss": 0.24339744448661804 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5611665844917297, "learning_rate": 1.7091794837017438e-05, "loss": 0.2725, "step": 16367, "teacher_loss": 0.24038168787956238 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5801632404327393, "learning_rate": 1.7089545756376467e-05, "loss": 0.2354, "step": 16368, "teacher_loss": 0.19712863862514496 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5452362298965454, "learning_rate": 1.7087296627828478e-05, "loss": 0.2517, "step": 16369, "teacher_loss": 0.21910472214221954 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.2074383795261383, "learning_rate": 1.7085047451425035e-05, "loss": 0.1556, "step": 16370, "teacher_loss": 0.14980915188789368 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 1.001579999923706, "learning_rate": 1.7082798227217702e-05, "loss": 0.5016, "step": 16371, "teacher_loss": 0.4460577070713043 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.39290955662727356, "learning_rate": 1.7080548955258054e-05, "loss": 0.1881, "step": 16372, "teacher_loss": 0.16534891724586487 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5870050191879272, "learning_rate": 1.707829963559766e-05, "loss": 0.2937, "step": 16373, "teacher_loss": 0.2610911726951599 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.07918013632297516, "learning_rate": 1.707605026828808e-05, "loss": 0.2388, "step": 16374, "teacher_loss": 0.2565152645111084 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.47072678804397583, "learning_rate": 1.707380085338089e-05, "loss": 0.3791, "step": 16375, "teacher_loss": 0.36890822649002075 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.4134020209312439, "learning_rate": 1.7071551390927667e-05, "loss": 0.2552, "step": 16376, "teacher_loss": 0.23756662011146545 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.34473299980163574, "learning_rate": 1.7069301880979983e-05, "loss": 0.2172, "step": 16377, "teacher_loss": 0.20299117267131805 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.4672027826309204, "learning_rate": 1.7067052323589408e-05, "loss": 0.3335, "step": 16378, "teacher_loss": 0.3186095058917999 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.24797323346138, "learning_rate": 1.7064802718807526e-05, "loss": 0.2267, "step": 16379, "teacher_loss": 0.22430217266082764 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.22770924866199493, "learning_rate": 1.70625530666859e-05, "loss": 0.2809, "step": 16380, "teacher_loss": 0.2868365943431854 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5744701623916626, "learning_rate": 1.7060303367276123e-05, "loss": 0.2904, "step": 16381, "teacher_loss": 0.25887537002563477 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.4150558114051819, "learning_rate": 1.7058053620629768e-05, "loss": 0.2386, "step": 16382, "teacher_loss": 0.21898195147514343 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.44860827922821045, "learning_rate": 1.7055803826798406e-05, "loss": 0.318, "step": 16383, "teacher_loss": 0.30348485708236694 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.18151721358299255, "learning_rate": 1.705355398583363e-05, "loss": 0.1874, "step": 16384, "teacher_loss": 0.18804213404655457 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.8944460153579712, "learning_rate": 1.7051304097787018e-05, "loss": 0.4166, "step": 16385, "teacher_loss": 0.36347895860671997 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.28600141406059265, "learning_rate": 1.7049054162710154e-05, "loss": 0.2072, "step": 16386, "teacher_loss": 0.19845086336135864 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.7274162173271179, "learning_rate": 1.7046804180654623e-05, "loss": 0.5982, "step": 16387, "teacher_loss": 0.5837960243225098 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5957872271537781, "learning_rate": 1.7044554151672003e-05, "loss": 0.2966, "step": 16388, "teacher_loss": 0.2633640468120575 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.37750643491744995, "learning_rate": 1.7042304075813893e-05, "loss": 0.3656, "step": 16389, "teacher_loss": 0.3642995357513428 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.24716952443122864, "learning_rate": 1.7040053953131872e-05, "loss": 0.1606, "step": 16390, "teacher_loss": 0.15096841752529144 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.7059513330459595, "learning_rate": 1.7037803783677525e-05, "loss": 0.387, "step": 16391, "teacher_loss": 0.35151395201683044 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5893921852111816, "learning_rate": 1.7035553567502457e-05, "loss": 0.3849, "step": 16392, "teacher_loss": 0.3621862530708313 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5164104700088501, "learning_rate": 1.703330330465824e-05, "loss": 0.2122, "step": 16393, "teacher_loss": 0.1783895641565323 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5739178657531738, "learning_rate": 1.7031052995196475e-05, "loss": 0.2414, "step": 16394, "teacher_loss": 0.20444750785827637 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.38800162076950073, "learning_rate": 1.702880263916876e-05, "loss": 0.281, "step": 16395, "teacher_loss": 0.2691548764705658 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5381648540496826, "learning_rate": 1.7026552236626676e-05, "loss": 0.2259, "step": 16396, "teacher_loss": 0.1911858469247818 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.40319883823394775, "learning_rate": 1.7024301787621828e-05, "loss": 0.285, "step": 16397, "teacher_loss": 0.2718772888183594 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.4576597809791565, "learning_rate": 1.702205129220581e-05, "loss": 0.2842, "step": 16398, "teacher_loss": 0.2648809850215912 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.34787678718566895, "learning_rate": 1.7019800750430218e-05, "loss": 0.2234, "step": 16399, "teacher_loss": 0.20955616235733032 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.5116361379623413, "learning_rate": 1.701755016234665e-05, "loss": 0.3149, "step": 16400, "teacher_loss": 0.2930814027786255 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.6844651699066162, "learning_rate": 1.7015299528006702e-05, "loss": 0.3903, "step": 16401, "teacher_loss": 0.35759711265563965 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.2190747857093811, "learning_rate": 1.7013048847461982e-05, "loss": 0.232, "step": 16402, "teacher_loss": 0.23348617553710938 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.3803383708000183, "learning_rate": 1.701079812076409e-05, "loss": 0.2916, "step": 16403, "teacher_loss": 0.2816971242427826 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.6304861307144165, "learning_rate": 1.7008547347964625e-05, "loss": 0.4758, "step": 16404, "teacher_loss": 0.4586649537086487 }, { "compression_loss": 0.0, "epoch": 2.96, "label_loss": 0.6817525625228882, "learning_rate": 1.7006296529115186e-05, "loss": 0.3006, "step": 16405, "teacher_loss": 0.25820252299308777 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.5929562449455261, "learning_rate": 1.7004045664267386e-05, "loss": 0.3355, "step": 16406, "teacher_loss": 0.3068896234035492 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.37227070331573486, "learning_rate": 1.700179475347283e-05, "loss": 0.3724, "step": 16407, "teacher_loss": 0.3724183440208435 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.6756417751312256, "learning_rate": 1.6999543796783117e-05, "loss": 0.2342, "step": 16408, "teacher_loss": 0.18512097001075745 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.27274563908576965, "learning_rate": 1.699729279424986e-05, "loss": 0.1751, "step": 16409, "teacher_loss": 0.1642094850540161 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.30976438522338867, "learning_rate": 1.6995041745924672e-05, "loss": 0.2722, "step": 16410, "teacher_loss": 0.26806509494781494 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.4951731562614441, "learning_rate": 1.6992790651859155e-05, "loss": 0.2253, "step": 16411, "teacher_loss": 0.1953664869070053 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.5574835538864136, "learning_rate": 1.6990539512104925e-05, "loss": 0.2659, "step": 16412, "teacher_loss": 0.23350423574447632 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.2652541995048523, "learning_rate": 1.698828832671359e-05, "loss": 0.1691, "step": 16413, "teacher_loss": 0.15836912393569946 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.6703963875770569, "learning_rate": 1.6986037095736764e-05, "loss": 0.3054, "step": 16414, "teacher_loss": 0.26487770676612854 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.4167998433113098, "learning_rate": 1.6983785819226066e-05, "loss": 0.1525, "step": 16415, "teacher_loss": 0.12311267852783203 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.28624194860458374, "learning_rate": 1.6981534497233104e-05, "loss": 0.2461, "step": 16416, "teacher_loss": 0.24166792631149292 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.4696184992790222, "learning_rate": 1.69792831298095e-05, "loss": 0.2499, "step": 16417, "teacher_loss": 0.2255113273859024 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.4593292474746704, "learning_rate": 1.697703171700686e-05, "loss": 0.3819, "step": 16418, "teacher_loss": 0.37332725524902344 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.18037673830986023, "learning_rate": 1.6974780258876814e-05, "loss": 0.1976, "step": 16419, "teacher_loss": 0.19952178001403809 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.425466388463974, "learning_rate": 1.6972528755470983e-05, "loss": 0.3145, "step": 16420, "teacher_loss": 0.3021823465824127 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.5457009077072144, "learning_rate": 1.6970277206840976e-05, "loss": 0.3867, "step": 16421, "teacher_loss": 0.3690539002418518 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.2696635127067566, "learning_rate": 1.6968025613038424e-05, "loss": 0.2131, "step": 16422, "teacher_loss": 0.20682938396930695 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.45378175377845764, "learning_rate": 1.696577397411494e-05, "loss": 0.2452, "step": 16423, "teacher_loss": 0.22202830016613007 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.3617762327194214, "learning_rate": 1.6963522290122155e-05, "loss": 0.2023, "step": 16424, "teacher_loss": 0.1845371574163437 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.5048460960388184, "learning_rate": 1.696127056111169e-05, "loss": 0.2368, "step": 16425, "teacher_loss": 0.2069961130619049 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.45305269956588745, "learning_rate": 1.6959018787135172e-05, "loss": 0.2041, "step": 16426, "teacher_loss": 0.17649075388908386 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.40985673666000366, "learning_rate": 1.6956766968244226e-05, "loss": 0.3094, "step": 16427, "teacher_loss": 0.2982270419597626 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.5064584016799927, "learning_rate": 1.695451510449048e-05, "loss": 0.304, "step": 16428, "teacher_loss": 0.2815263867378235 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.353840172290802, "learning_rate": 1.6952263195925567e-05, "loss": 0.2339, "step": 16429, "teacher_loss": 0.2205272614955902 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.3625691533088684, "learning_rate": 1.69500112426011e-05, "loss": 0.2205, "step": 16430, "teacher_loss": 0.20474043488502502 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.8140820264816284, "learning_rate": 1.6947759244568734e-05, "loss": 0.5425, "step": 16431, "teacher_loss": 0.512336015701294 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.4460495710372925, "learning_rate": 1.6945507201880086e-05, "loss": 0.3836, "step": 16432, "teacher_loss": 0.3766152262687683 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.7889648675918579, "learning_rate": 1.6943255114586788e-05, "loss": 0.2922, "step": 16433, "teacher_loss": 0.23696674406528473 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.25190770626068115, "learning_rate": 1.6941002982740475e-05, "loss": 0.1738, "step": 16434, "teacher_loss": 0.16512750089168549 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.27945244312286377, "learning_rate": 1.693875080639279e-05, "loss": 0.2964, "step": 16435, "teacher_loss": 0.2983161211013794 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.33559486269950867, "learning_rate": 1.6936498585595355e-05, "loss": 0.2061, "step": 16436, "teacher_loss": 0.19168466329574585 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.37142181396484375, "learning_rate": 1.6934246320399818e-05, "loss": 0.2319, "step": 16437, "teacher_loss": 0.21643032133579254 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.2934309244155884, "learning_rate": 1.6931994010857804e-05, "loss": 0.211, "step": 16438, "teacher_loss": 0.20188406109809875 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.634645938873291, "learning_rate": 1.6929741657020964e-05, "loss": 0.3068, "step": 16439, "teacher_loss": 0.2703203558921814 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.358377605676651, "learning_rate": 1.692748925894094e-05, "loss": 0.2419, "step": 16440, "teacher_loss": 0.22893080115318298 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.7055276036262512, "learning_rate": 1.6925236816669358e-05, "loss": 0.316, "step": 16441, "teacher_loss": 0.2727735638618469 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.5994584560394287, "learning_rate": 1.6922984330257875e-05, "loss": 0.3804, "step": 16442, "teacher_loss": 0.35602110624313354 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.4030289649963379, "learning_rate": 1.692073179975812e-05, "loss": 0.2555, "step": 16443, "teacher_loss": 0.23912355303764343 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.28601977229118347, "learning_rate": 1.6918479225221744e-05, "loss": 0.2442, "step": 16444, "teacher_loss": 0.23960527777671814 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.3167138695716858, "learning_rate": 1.6916226606700396e-05, "loss": 0.2695, "step": 16445, "teacher_loss": 0.26420146226882935 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.6018577814102173, "learning_rate": 1.6913973944245713e-05, "loss": 0.2388, "step": 16446, "teacher_loss": 0.19851410388946533 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 1.1342229843139648, "learning_rate": 1.691172123790935e-05, "loss": 0.3924, "step": 16447, "teacher_loss": 0.3100038766860962 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.9424023032188416, "learning_rate": 1.6909468487742947e-05, "loss": 0.3751, "step": 16448, "teacher_loss": 0.3120918571949005 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.44704148173332214, "learning_rate": 1.6907215693798155e-05, "loss": 0.1759, "step": 16449, "teacher_loss": 0.14577841758728027 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.4822198748588562, "learning_rate": 1.690496285612663e-05, "loss": 0.2854, "step": 16450, "teacher_loss": 0.2634811997413635 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 1.1144938468933105, "learning_rate": 1.690270997478001e-05, "loss": 0.4803, "step": 16451, "teacher_loss": 0.40985995531082153 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.2706015706062317, "learning_rate": 1.6900457049809963e-05, "loss": 0.1745, "step": 16452, "teacher_loss": 0.163822203874588 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.29608991742134094, "learning_rate": 1.6898204081268134e-05, "loss": 0.23, "step": 16453, "teacher_loss": 0.22270649671554565 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.952675461769104, "learning_rate": 1.6895951069206175e-05, "loss": 0.3429, "step": 16454, "teacher_loss": 0.2751733958721161 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.31866759061813354, "learning_rate": 1.6893698013675742e-05, "loss": 0.2194, "step": 16455, "teacher_loss": 0.20841509103775024 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.22175806760787964, "learning_rate": 1.6891444914728484e-05, "loss": 0.1823, "step": 16456, "teacher_loss": 0.1779344379901886 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.31399017572402954, "learning_rate": 1.6889191772416073e-05, "loss": 0.2223, "step": 16457, "teacher_loss": 0.2121584266424179 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.35604584217071533, "learning_rate": 1.6886938586790158e-05, "loss": 0.2001, "step": 16458, "teacher_loss": 0.1828138530254364 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.3081555664539337, "learning_rate": 1.6884685357902395e-05, "loss": 0.2668, "step": 16459, "teacher_loss": 0.2622055411338806 }, { "compression_loss": 0.0, "epoch": 2.97, "label_loss": 0.699011504650116, "learning_rate": 1.688243208580445e-05, "loss": 0.438, "step": 16460, "teacher_loss": 0.4090268015861511 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.22945944964885712, "learning_rate": 1.6880178770547984e-05, "loss": 0.2791, "step": 16461, "teacher_loss": 0.28464236855506897 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.563493549823761, "learning_rate": 1.687792541218465e-05, "loss": 0.2913, "step": 16462, "teacher_loss": 0.2610647976398468 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.8003162145614624, "learning_rate": 1.6875672010766126e-05, "loss": 0.2721, "step": 16463, "teacher_loss": 0.21337465941905975 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.40353572368621826, "learning_rate": 1.6873418566344056e-05, "loss": 0.2317, "step": 16464, "teacher_loss": 0.21256610751152039 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.2510928511619568, "learning_rate": 1.6871165078970118e-05, "loss": 0.2974, "step": 16465, "teacher_loss": 0.3025716543197632 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.35506492853164673, "learning_rate": 1.6868911548695977e-05, "loss": 0.2236, "step": 16466, "teacher_loss": 0.20897358655929565 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.4220644533634186, "learning_rate": 1.68666579755733e-05, "loss": 0.2438, "step": 16467, "teacher_loss": 0.22401660680770874 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 1.0676326751708984, "learning_rate": 1.6864404359653742e-05, "loss": 0.3961, "step": 16468, "teacher_loss": 0.3214343786239624 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.26833629608154297, "learning_rate": 1.686215070098899e-05, "loss": 0.1411, "step": 16469, "teacher_loss": 0.12698838114738464 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.5252071022987366, "learning_rate": 1.6859896999630703e-05, "loss": 0.4438, "step": 16470, "teacher_loss": 0.43476295471191406 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.42853832244873047, "learning_rate": 1.685764325563055e-05, "loss": 0.2661, "step": 16471, "teacher_loss": 0.24807879328727722 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.49477314949035645, "learning_rate": 1.6855389469040217e-05, "loss": 0.2616, "step": 16472, "teacher_loss": 0.23570013046264648 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.3834612965583801, "learning_rate": 1.6853135639911357e-05, "loss": 0.239, "step": 16473, "teacher_loss": 0.2229882925748825 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.1757422238588333, "learning_rate": 1.685088176829566e-05, "loss": 0.2537, "step": 16474, "teacher_loss": 0.2623230814933777 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.7384520769119263, "learning_rate": 1.684862785424479e-05, "loss": 0.2589, "step": 16475, "teacher_loss": 0.20565253496170044 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.12380479276180267, "learning_rate": 1.684637389781042e-05, "loss": 0.1656, "step": 16476, "teacher_loss": 0.1702931821346283 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.6220978498458862, "learning_rate": 1.6844119899044242e-05, "loss": 0.3464, "step": 16477, "teacher_loss": 0.3157762885093689 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.49162235856056213, "learning_rate": 1.6841865857997917e-05, "loss": 0.1985, "step": 16478, "teacher_loss": 0.16591408848762512 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.35095080733299255, "learning_rate": 1.683961177472313e-05, "loss": 0.236, "step": 16479, "teacher_loss": 0.22325092554092407 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.26990777254104614, "learning_rate": 1.6837357649271565e-05, "loss": 0.268, "step": 16480, "teacher_loss": 0.2677558958530426 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.33736637234687805, "learning_rate": 1.6835103481694893e-05, "loss": 0.2441, "step": 16481, "teacher_loss": 0.2337876260280609 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.38045647740364075, "learning_rate": 1.6832849272044804e-05, "loss": 0.2291, "step": 16482, "teacher_loss": 0.2123154103755951 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.8675621151924133, "learning_rate": 1.683059502037298e-05, "loss": 0.48, "step": 16483, "teacher_loss": 0.4369434118270874 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 1.172670602798462, "learning_rate": 1.682834072673109e-05, "loss": 0.3942, "step": 16484, "teacher_loss": 0.3076777756214142 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.31893390417099, "learning_rate": 1.682608639117084e-05, "loss": 0.446, "step": 16485, "teacher_loss": 0.4601181745529175 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.2271900773048401, "learning_rate": 1.6823832013743893e-05, "loss": 0.1729, "step": 16486, "teacher_loss": 0.16684690117835999 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.3577190041542053, "learning_rate": 1.6821577594501955e-05, "loss": 0.3096, "step": 16487, "teacher_loss": 0.30424898862838745 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.7421973347663879, "learning_rate": 1.6819323133496702e-05, "loss": 0.263, "step": 16488, "teacher_loss": 0.2097875475883484 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.5481189489364624, "learning_rate": 1.681706863077982e-05, "loss": 0.2896, "step": 16489, "teacher_loss": 0.260861873626709 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.5275344848632812, "learning_rate": 1.6814814086403004e-05, "loss": 0.2338, "step": 16490, "teacher_loss": 0.20114298164844513 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.7977844476699829, "learning_rate": 1.6812559500417945e-05, "loss": 0.2862, "step": 16491, "teacher_loss": 0.22936393320560455 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.7897177934646606, "learning_rate": 1.6810304872876327e-05, "loss": 0.5354, "step": 16492, "teacher_loss": 0.507138729095459 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.22917184233665466, "learning_rate": 1.6808050203829845e-05, "loss": 0.192, "step": 16493, "teacher_loss": 0.18786287307739258 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.5464550256729126, "learning_rate": 1.6805795493330197e-05, "loss": 0.2946, "step": 16494, "teacher_loss": 0.2665936350822449 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.23428234457969666, "learning_rate": 1.6803540741429073e-05, "loss": 0.1962, "step": 16495, "teacher_loss": 0.19198592007160187 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.29213082790374756, "learning_rate": 1.6801285948178165e-05, "loss": 0.2206, "step": 16496, "teacher_loss": 0.21268031001091003 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.33765098452568054, "learning_rate": 1.679903111362917e-05, "loss": 0.3796, "step": 16497, "teacher_loss": 0.3842237591743469 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.3511248528957367, "learning_rate": 1.6796776237833783e-05, "loss": 0.2218, "step": 16498, "teacher_loss": 0.20748554170131683 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.49120283126831055, "learning_rate": 1.6794521320843708e-05, "loss": 0.2419, "step": 16499, "teacher_loss": 0.2142144739627838 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.5249272584915161, "learning_rate": 1.6792266362710637e-05, "loss": 0.2947, "step": 16500, "teacher_loss": 0.2690713405609131 }, { "epoch": 2.98, "eval_exact_match": 80.07568590350047, "eval_f1": 87.3856394382346, "step": 16500 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.3857632279396057, "learning_rate": 1.6790011363486273e-05, "loss": 0.3363, "step": 16501, "teacher_loss": 0.3307896554470062 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.8408334255218506, "learning_rate": 1.6787756323222316e-05, "loss": 0.2703, "step": 16502, "teacher_loss": 0.20691636204719543 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.581544041633606, "learning_rate": 1.6785501241970465e-05, "loss": 0.2262, "step": 16503, "teacher_loss": 0.18675866723060608 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.3728638291358948, "learning_rate": 1.678324611978242e-05, "loss": 0.1937, "step": 16504, "teacher_loss": 0.17378714680671692 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.28760457038879395, "learning_rate": 1.6780990956709897e-05, "loss": 0.2829, "step": 16505, "teacher_loss": 0.2823658585548401 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.34556347131729126, "learning_rate": 1.6778735752804586e-05, "loss": 0.2727, "step": 16506, "teacher_loss": 0.26455825567245483 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.6825772523880005, "learning_rate": 1.67764805081182e-05, "loss": 0.286, "step": 16507, "teacher_loss": 0.24195297062397003 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.429084450006485, "learning_rate": 1.677422522270244e-05, "loss": 0.4254, "step": 16508, "teacher_loss": 0.425011545419693 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.5202856659889221, "learning_rate": 1.677196989660901e-05, "loss": 0.2063, "step": 16509, "teacher_loss": 0.17143088579177856 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.3685051202774048, "learning_rate": 1.676971452988963e-05, "loss": 0.3815, "step": 16510, "teacher_loss": 0.38297921419143677 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.5092839002609253, "learning_rate": 1.6767459122595998e-05, "loss": 0.1889, "step": 16511, "teacher_loss": 0.15328705310821533 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 1.247352957725525, "learning_rate": 1.676520367477983e-05, "loss": 0.3729, "step": 16512, "teacher_loss": 0.27576154470443726 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.3702830672264099, "learning_rate": 1.6762948186492836e-05, "loss": 0.2241, "step": 16513, "teacher_loss": 0.20789921283721924 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.7963496446609497, "learning_rate": 1.6760692657786717e-05, "loss": 0.3029, "step": 16514, "teacher_loss": 0.2480759620666504 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.24863898754119873, "learning_rate": 1.67584370887132e-05, "loss": 0.2135, "step": 16515, "teacher_loss": 0.2095625102519989 }, { "compression_loss": 0.0, "epoch": 2.98, "label_loss": 0.3384447693824768, "learning_rate": 1.6756181479323994e-05, "loss": 0.2206, "step": 16516, "teacher_loss": 0.20747318863868713 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.745774507522583, "learning_rate": 1.675392582967081e-05, "loss": 0.3068, "step": 16517, "teacher_loss": 0.25808006525039673 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.32220590114593506, "learning_rate": 1.6751670139805365e-05, "loss": 0.1803, "step": 16518, "teacher_loss": 0.16451692581176758 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.5340006351470947, "learning_rate": 1.6749414409779372e-05, "loss": 0.2666, "step": 16519, "teacher_loss": 0.23688694834709167 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.4929790198802948, "learning_rate": 1.674715863964456e-05, "loss": 0.3195, "step": 16520, "teacher_loss": 0.300197958946228 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.38084110617637634, "learning_rate": 1.6744902829452634e-05, "loss": 0.2157, "step": 16521, "teacher_loss": 0.1973927617073059 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.4732380509376526, "learning_rate": 1.674264697925532e-05, "loss": 0.2229, "step": 16522, "teacher_loss": 0.195109024643898 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 1.4688782691955566, "learning_rate": 1.674039108910433e-05, "loss": 0.4256, "step": 16523, "teacher_loss": 0.30963513255119324 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.4075910449028015, "learning_rate": 1.6738135159051392e-05, "loss": 0.1982, "step": 16524, "teacher_loss": 0.17494887113571167 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 1.04481840133667, "learning_rate": 1.673587918914823e-05, "loss": 0.3296, "step": 16525, "teacher_loss": 0.25009918212890625 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.31203579902648926, "learning_rate": 1.673362317944656e-05, "loss": 0.2834, "step": 16526, "teacher_loss": 0.28017544746398926 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.2082904428243637, "learning_rate": 1.673136712999811e-05, "loss": 0.2383, "step": 16527, "teacher_loss": 0.24165067076683044 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.7093226909637451, "learning_rate": 1.6729111040854597e-05, "loss": 0.3567, "step": 16528, "teacher_loss": 0.31753838062286377 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.4865039587020874, "learning_rate": 1.672685491206776e-05, "loss": 0.2373, "step": 16529, "teacher_loss": 0.2096029818058014 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.58197021484375, "learning_rate": 1.6724598743689314e-05, "loss": 0.3135, "step": 16530, "teacher_loss": 0.2836433947086334 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.6645232439041138, "learning_rate": 1.6722342535770984e-05, "loss": 0.4378, "step": 16531, "teacher_loss": 0.4126243591308594 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.9061720371246338, "learning_rate": 1.672008628836451e-05, "loss": 0.3688, "step": 16532, "teacher_loss": 0.3090691566467285 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.4916762709617615, "learning_rate": 1.6717830001521613e-05, "loss": 0.3221, "step": 16533, "teacher_loss": 0.30325937271118164 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 1.0665887594223022, "learning_rate": 1.671557367529402e-05, "loss": 0.395, "step": 16534, "teacher_loss": 0.3203321099281311 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.7706929445266724, "learning_rate": 1.6713317309733475e-05, "loss": 0.2187, "step": 16535, "teacher_loss": 0.15739917755126953 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.5842939615249634, "learning_rate": 1.6711060904891695e-05, "loss": 0.3261, "step": 16536, "teacher_loss": 0.297380268573761 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.6443324089050293, "learning_rate": 1.670880446082042e-05, "loss": 0.3627, "step": 16537, "teacher_loss": 0.33142781257629395 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.5103682279586792, "learning_rate": 1.6706547977571382e-05, "loss": 0.2864, "step": 16538, "teacher_loss": 0.2615398168563843 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.3172757923603058, "learning_rate": 1.6704291455196313e-05, "loss": 0.2344, "step": 16539, "teacher_loss": 0.22523006796836853 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.3370593786239624, "learning_rate": 1.6702034893746953e-05, "loss": 0.2617, "step": 16540, "teacher_loss": 0.25331878662109375 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.14756816625595093, "learning_rate": 1.6699778293275033e-05, "loss": 0.211, "step": 16541, "teacher_loss": 0.2180483639240265 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.5081421732902527, "learning_rate": 1.6697521653832296e-05, "loss": 0.4599, "step": 16542, "teacher_loss": 0.4545362889766693 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.26985251903533936, "learning_rate": 1.6695264975470478e-05, "loss": 0.1717, "step": 16543, "teacher_loss": 0.16077929735183716 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.4301908612251282, "learning_rate": 1.6693008258241314e-05, "loss": 0.2931, "step": 16544, "teacher_loss": 0.2778944969177246 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.32233282923698425, "learning_rate": 1.669075150219655e-05, "loss": 0.2474, "step": 16545, "teacher_loss": 0.23904860019683838 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.6680991053581238, "learning_rate": 1.668849470738792e-05, "loss": 0.2547, "step": 16546, "teacher_loss": 0.20872417092323303 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.2690372169017792, "learning_rate": 1.6686237873867172e-05, "loss": 0.2655, "step": 16547, "teacher_loss": 0.2651504874229431 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.840910017490387, "learning_rate": 1.6683981001686042e-05, "loss": 0.3049, "step": 16548, "teacher_loss": 0.2453339844942093 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.45750534534454346, "learning_rate": 1.6681724090896273e-05, "loss": 0.182, "step": 16549, "teacher_loss": 0.1514366865158081 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.5426023006439209, "learning_rate": 1.667946714154962e-05, "loss": 0.3178, "step": 16550, "teacher_loss": 0.2927940785884857 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.26622992753982544, "learning_rate": 1.6677210153697815e-05, "loss": 0.1899, "step": 16551, "teacher_loss": 0.1814207285642624 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.39546990394592285, "learning_rate": 1.6674953127392617e-05, "loss": 0.3249, "step": 16552, "teacher_loss": 0.3171122074127197 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.31601548194885254, "learning_rate": 1.6672696062685757e-05, "loss": 0.2023, "step": 16553, "teacher_loss": 0.18966734409332275 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.7636637091636658, "learning_rate": 1.6670438959628996e-05, "loss": 0.3412, "step": 16554, "teacher_loss": 0.2942391633987427 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.6111501455307007, "learning_rate": 1.6668181818274077e-05, "loss": 0.3175, "step": 16555, "teacher_loss": 0.2848797142505646 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.704237163066864, "learning_rate": 1.6665924638672747e-05, "loss": 0.3498, "step": 16556, "teacher_loss": 0.31045907735824585 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.44039326906204224, "learning_rate": 1.6663667420876767e-05, "loss": 0.211, "step": 16557, "teacher_loss": 0.18547756969928741 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.624261200428009, "learning_rate": 1.6661410164937874e-05, "loss": 0.3824, "step": 16558, "teacher_loss": 0.3555159568786621 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.2920651137828827, "learning_rate": 1.6659152870907835e-05, "loss": 0.1634, "step": 16559, "teacher_loss": 0.14914312958717346 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.2501271963119507, "learning_rate": 1.665689553883839e-05, "loss": 0.2149, "step": 16560, "teacher_loss": 0.21100470423698425 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.1721057891845703, "learning_rate": 1.6654638168781295e-05, "loss": 0.1487, "step": 16561, "teacher_loss": 0.1461189240217209 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.6181367039680481, "learning_rate": 1.6652380760788312e-05, "loss": 0.305, "step": 16562, "teacher_loss": 0.2702442407608032 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.5905710458755493, "learning_rate": 1.665012331491119e-05, "loss": 0.2196, "step": 16563, "teacher_loss": 0.17832598090171814 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.4411013126373291, "learning_rate": 1.6647865831201686e-05, "loss": 0.213, "step": 16564, "teacher_loss": 0.18764615058898926 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.11961507052183151, "learning_rate": 1.664560830971157e-05, "loss": 0.165, "step": 16565, "teacher_loss": 0.17003558576107025 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.7063257694244385, "learning_rate": 1.6643350750492578e-05, "loss": 0.2899, "step": 16566, "teacher_loss": 0.2436167150735855 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.3910992741584778, "learning_rate": 1.6641093153596482e-05, "loss": 0.3091, "step": 16567, "teacher_loss": 0.299952894449234 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.5786798596382141, "learning_rate": 1.6638835519075044e-05, "loss": 0.2595, "step": 16568, "teacher_loss": 0.22404122352600098 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.3451366424560547, "learning_rate": 1.6636577846980014e-05, "loss": 0.2421, "step": 16569, "teacher_loss": 0.23060841858386993 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.4296899735927582, "learning_rate": 1.663432013736317e-05, "loss": 0.2129, "step": 16570, "teacher_loss": 0.18877197802066803 }, { "compression_loss": 0.0, "epoch": 2.99, "label_loss": 0.26885688304901123, "learning_rate": 1.663206239027626e-05, "loss": 0.2417, "step": 16571, "teacher_loss": 0.23864570260047913 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.34053635597229004, "learning_rate": 1.662980460577105e-05, "loss": 0.2715, "step": 16572, "teacher_loss": 0.2638576030731201 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.5596696734428406, "learning_rate": 1.662754678389931e-05, "loss": 0.3635, "step": 16573, "teacher_loss": 0.3417547345161438 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.45566555857658386, "learning_rate": 1.6625288924712803e-05, "loss": 0.2182, "step": 16574, "teacher_loss": 0.19183024764060974 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.3131885528564453, "learning_rate": 1.6623031028263292e-05, "loss": 0.2156, "step": 16575, "teacher_loss": 0.20480357110500336 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.6385051012039185, "learning_rate": 1.6620773094602546e-05, "loss": 0.3175, "step": 16576, "teacher_loss": 0.28179389238357544 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 1.177838921546936, "learning_rate": 1.6618515123782337e-05, "loss": 0.2932, "step": 16577, "teacher_loss": 0.1949024796485901 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.2383987307548523, "learning_rate": 1.6616257115854423e-05, "loss": 0.2055, "step": 16578, "teacher_loss": 0.2017991840839386 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.38498571515083313, "learning_rate": 1.6613999070870577e-05, "loss": 0.1875, "step": 16579, "teacher_loss": 0.16560658812522888 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.6439411640167236, "learning_rate": 1.6611740988882575e-05, "loss": 0.2697, "step": 16580, "teacher_loss": 0.22809717059135437 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.42243415117263794, "learning_rate": 1.6609482869942185e-05, "loss": 0.2345, "step": 16581, "teacher_loss": 0.21357378363609314 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.20221224427223206, "learning_rate": 1.6607224714101177e-05, "loss": 0.2179, "step": 16582, "teacher_loss": 0.21959705650806427 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.4948495626449585, "learning_rate": 1.6604966521411325e-05, "loss": 0.247, "step": 16583, "teacher_loss": 0.21945181488990784 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.4590347409248352, "learning_rate": 1.66027082919244e-05, "loss": 0.3248, "step": 16584, "teacher_loss": 0.3099316358566284 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.3514411449432373, "learning_rate": 1.6600450025692184e-05, "loss": 0.243, "step": 16585, "teacher_loss": 0.23097622394561768 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.35198014974594116, "learning_rate": 1.6598191722766443e-05, "loss": 0.216, "step": 16586, "teacher_loss": 0.20091629028320312 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.38897255063056946, "learning_rate": 1.659593338319896e-05, "loss": 0.2005, "step": 16587, "teacher_loss": 0.17952017486095428 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.7840558290481567, "learning_rate": 1.6593675007041512e-05, "loss": 0.3539, "step": 16588, "teacher_loss": 0.3061090111732483 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.7565650939941406, "learning_rate": 1.659141659434587e-05, "loss": 0.344, "step": 16589, "teacher_loss": 0.2981935143470764 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.39933091402053833, "learning_rate": 1.658915814516382e-05, "loss": 0.1869, "step": 16590, "teacher_loss": 0.1632416546344757 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.38342469930648804, "learning_rate": 1.6586899659547137e-05, "loss": 0.3322, "step": 16591, "teacher_loss": 0.3265453577041626 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.2905828058719635, "learning_rate": 1.6584641137547603e-05, "loss": 0.1981, "step": 16592, "teacher_loss": 0.18785709142684937 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.5319105386734009, "learning_rate": 1.6582382579216996e-05, "loss": 0.3245, "step": 16593, "teacher_loss": 0.3014667332172394 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.5163998007774353, "learning_rate": 1.65801239846071e-05, "loss": 0.1892, "step": 16594, "teacher_loss": 0.15287858247756958 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.20453672111034393, "learning_rate": 1.6577865353769704e-05, "loss": 0.1676, "step": 16595, "teacher_loss": 0.1634896695613861 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.7812080383300781, "learning_rate": 1.657560668675658e-05, "loss": 0.2918, "step": 16596, "teacher_loss": 0.2374630868434906 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.29870760440826416, "learning_rate": 1.6573347983619522e-05, "loss": 0.2748, "step": 16597, "teacher_loss": 0.2721807062625885 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 1.1312963962554932, "learning_rate": 1.657108924441031e-05, "loss": 0.702, "step": 16598, "teacher_loss": 0.6543383598327637 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 1.1894670724868774, "learning_rate": 1.6568830469180733e-05, "loss": 0.2987, "step": 16599, "teacher_loss": 0.19973570108413696 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.5973784327507019, "learning_rate": 1.6566571657982582e-05, "loss": 0.3253, "step": 16600, "teacher_loss": 0.29509496688842773 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.17872895300388336, "learning_rate": 1.6564312810867635e-05, "loss": 0.1739, "step": 16601, "teacher_loss": 0.17339558899402618 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.5491242408752441, "learning_rate": 1.656205392788768e-05, "loss": 0.2078, "step": 16602, "teacher_loss": 0.16988155245780945 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.7075532078742981, "learning_rate": 1.655979500909452e-05, "loss": 0.3171, "step": 16603, "teacher_loss": 0.2736976742744446 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.3452551066875458, "learning_rate": 1.6557536054539937e-05, "loss": 0.2116, "step": 16604, "teacher_loss": 0.19679176807403564 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.3967088758945465, "learning_rate": 1.6555277064275717e-05, "loss": 0.1871, "step": 16605, "teacher_loss": 0.16386398673057556 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.31807512044906616, "learning_rate": 1.655301803835366e-05, "loss": 0.2361, "step": 16606, "teacher_loss": 0.22699615359306335 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.7111173272132874, "learning_rate": 1.655075897682555e-05, "loss": 0.2677, "step": 16607, "teacher_loss": 0.21840360760688782 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.39180633425712585, "learning_rate": 1.654849987974319e-05, "loss": 0.2437, "step": 16608, "teacher_loss": 0.22723454236984253 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.3105844259262085, "learning_rate": 1.6546240747158375e-05, "loss": 0.1985, "step": 16609, "teacher_loss": 0.1860843449831009 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.6416542530059814, "learning_rate": 1.6543981579122895e-05, "loss": 0.3021, "step": 16610, "teacher_loss": 0.2644268274307251 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.2533203959465027, "learning_rate": 1.654172237568854e-05, "loss": 0.174, "step": 16611, "teacher_loss": 0.1651584804058075 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.36402270197868347, "learning_rate": 1.6539463136907116e-05, "loss": 0.1866, "step": 16612, "teacher_loss": 0.1668454110622406 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.4679250717163086, "learning_rate": 1.6537203862830416e-05, "loss": 0.2869, "step": 16613, "teacher_loss": 0.26680639386177063 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.35215193033218384, "learning_rate": 1.6534944553510244e-05, "loss": 0.2226, "step": 16614, "teacher_loss": 0.20815637707710266 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.6027519702911377, "learning_rate": 1.6532685208998398e-05, "loss": 0.2986, "step": 16615, "teacher_loss": 0.26476943492889404 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.794066309928894, "learning_rate": 1.6530425829346667e-05, "loss": 0.3167, "step": 16616, "teacher_loss": 0.26363813877105713 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.3358103930950165, "learning_rate": 1.6528166414606862e-05, "loss": 0.1929, "step": 16617, "teacher_loss": 0.17706041038036346 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.3245517611503601, "learning_rate": 1.652590696483079e-05, "loss": 0.2363, "step": 16618, "teacher_loss": 0.2265392392873764 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.3604736924171448, "learning_rate": 1.6523647480070235e-05, "loss": 0.2081, "step": 16619, "teacher_loss": 0.19117727875709534 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.48979148268699646, "learning_rate": 1.6521387960377023e-05, "loss": 0.2568, "step": 16620, "teacher_loss": 0.23089753091335297 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.39932042360305786, "learning_rate": 1.6519128405802937e-05, "loss": 0.2302, "step": 16621, "teacher_loss": 0.21137796342372894 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.5675336122512817, "learning_rate": 1.6516868816399798e-05, "loss": 0.2342, "step": 16622, "teacher_loss": 0.19711177051067352 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.42040157318115234, "learning_rate": 1.6514609192219403e-05, "loss": 0.2137, "step": 16623, "teacher_loss": 0.19070178270339966 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.6413109302520752, "learning_rate": 1.6512349533313555e-05, "loss": 0.4367, "step": 16624, "teacher_loss": 0.4140129089355469 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.2512364089488983, "learning_rate": 1.6510089839734078e-05, "loss": 0.2013, "step": 16625, "teacher_loss": 0.195703387260437 }, { "compression_loss": 0.0, "epoch": 3.0, "label_loss": 0.2540172040462494, "learning_rate": 1.6507830111532755e-05, "loss": 0.1943, "step": 16626, "teacher_loss": 0.18768110871315002 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.8628928661346436, "learning_rate": 1.6505570348761413e-05, "loss": 0.2209, "step": 16627, "teacher_loss": 0.14959722757339478 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.4223070740699768, "learning_rate": 1.650331055147186e-05, "loss": 0.2665, "step": 16628, "teacher_loss": 0.24918031692504883 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.29805606603622437, "learning_rate": 1.6501050719715903e-05, "loss": 0.2407, "step": 16629, "teacher_loss": 0.23434260487556458 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.5205034017562866, "learning_rate": 1.6498790853545355e-05, "loss": 0.2311, "step": 16630, "teacher_loss": 0.19898566603660583 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.3485151529312134, "learning_rate": 1.649653095301202e-05, "loss": 0.2221, "step": 16631, "teacher_loss": 0.20803453028202057 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.45519545674324036, "learning_rate": 1.649427101816772e-05, "loss": 0.2474, "step": 16632, "teacher_loss": 0.22427037358283997 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.42452654242515564, "learning_rate": 1.649201104906427e-05, "loss": 0.2039, "step": 16633, "teacher_loss": 0.17936545610427856 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.36761289834976196, "learning_rate": 1.6489751045753472e-05, "loss": 0.2311, "step": 16634, "teacher_loss": 0.21589171886444092 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.584121823310852, "learning_rate": 1.6487491008287157e-05, "loss": 0.2303, "step": 16635, "teacher_loss": 0.19100965559482574 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.28541135787963867, "learning_rate": 1.6485230936717126e-05, "loss": 0.1685, "step": 16636, "teacher_loss": 0.15548065304756165 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 1.1317944526672363, "learning_rate": 1.6482970831095205e-05, "loss": 0.2688, "step": 16637, "teacher_loss": 0.17290166020393372 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.5264564752578735, "learning_rate": 1.648071069147321e-05, "loss": 0.2745, "step": 16638, "teacher_loss": 0.24655510485172272 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.0864419937133789, "learning_rate": 1.647845051790296e-05, "loss": 0.146, "step": 16639, "teacher_loss": 0.15266850590705872 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.47381970286369324, "learning_rate": 1.6476190310436267e-05, "loss": 0.1876, "step": 16640, "teacher_loss": 0.15581119060516357 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.35481536388397217, "learning_rate": 1.647393006912496e-05, "loss": 0.2224, "step": 16641, "teacher_loss": 0.20765304565429688 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.2816130220890045, "learning_rate": 1.6471669794020854e-05, "loss": 0.2777, "step": 16642, "teacher_loss": 0.27730390429496765 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.4508807063102722, "learning_rate": 1.6469409485175773e-05, "loss": 0.2112, "step": 16643, "teacher_loss": 0.18454337120056152 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.18374405801296234, "learning_rate": 1.646714914264154e-05, "loss": 0.1827, "step": 16644, "teacher_loss": 0.18259277939796448 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.2373037487268448, "learning_rate": 1.6464888766469973e-05, "loss": 0.2039, "step": 16645, "teacher_loss": 0.20019501447677612 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.6330631971359253, "learning_rate": 1.6462628356712896e-05, "loss": 0.4479, "step": 16646, "teacher_loss": 0.42734837532043457 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.1081903874874115, "learning_rate": 1.646036791342214e-05, "loss": 0.1911, "step": 16647, "teacher_loss": 0.2003670483827591 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.3166697919368744, "learning_rate": 1.6458107436649526e-05, "loss": 0.2029, "step": 16648, "teacher_loss": 0.19026033580303192 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.33273324370384216, "learning_rate": 1.6455846926446875e-05, "loss": 0.2115, "step": 16649, "teacher_loss": 0.19801700115203857 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.30150771141052246, "learning_rate": 1.645358638286603e-05, "loss": 0.2404, "step": 16650, "teacher_loss": 0.23362015187740326 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.983533501625061, "learning_rate": 1.6451325805958796e-05, "loss": 0.4752, "step": 16651, "teacher_loss": 0.41876423358917236 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.44914886355400085, "learning_rate": 1.6449065195777018e-05, "loss": 0.2536, "step": 16652, "teacher_loss": 0.23183780908584595 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.5146439671516418, "learning_rate": 1.644680455237252e-05, "loss": 0.2204, "step": 16653, "teacher_loss": 0.18770773708820343 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.1629735827445984, "learning_rate": 1.6444543875797128e-05, "loss": 0.1417, "step": 16654, "teacher_loss": 0.1393834948539734 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.881411612033844, "learning_rate": 1.6442283166102677e-05, "loss": 0.4749, "step": 16655, "teacher_loss": 0.42971351742744446 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.4598774313926697, "learning_rate": 1.6440022423340998e-05, "loss": 0.2739, "step": 16656, "teacher_loss": 0.2532699704170227 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.9130034446716309, "learning_rate": 1.643776164756392e-05, "loss": 0.2743, "step": 16657, "teacher_loss": 0.20332211256027222 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.8764932155609131, "learning_rate": 1.6435500838823286e-05, "loss": 0.6019, "step": 16658, "teacher_loss": 0.5714399814605713 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.504037618637085, "learning_rate": 1.6433239997170913e-05, "loss": 0.2176, "step": 16659, "teacher_loss": 0.18580546975135803 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.4445747137069702, "learning_rate": 1.6430979122658646e-05, "loss": 0.3894, "step": 16660, "teacher_loss": 0.3832918405532837 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.8176935911178589, "learning_rate": 1.6428718215338323e-05, "loss": 0.3249, "step": 16661, "teacher_loss": 0.2701180875301361 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.3848899006843567, "learning_rate": 1.6426457275261766e-05, "loss": 0.2108, "step": 16662, "teacher_loss": 0.19150319695472717 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.8961437344551086, "learning_rate": 1.642419630248083e-05, "loss": 0.3663, "step": 16663, "teacher_loss": 0.307392954826355 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.32402679324150085, "learning_rate": 1.6421935297047335e-05, "loss": 0.1916, "step": 16664, "teacher_loss": 0.17685265839099884 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.5209680795669556, "learning_rate": 1.641967425901313e-05, "loss": 0.2905, "step": 16665, "teacher_loss": 0.26488834619522095 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.6518914699554443, "learning_rate": 1.641741318843005e-05, "loss": 0.3479, "step": 16666, "teacher_loss": 0.31413114070892334 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.2759491503238678, "learning_rate": 1.6415152085349934e-05, "loss": 0.2048, "step": 16667, "teacher_loss": 0.1968681514263153 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.42380067706108093, "learning_rate": 1.641289094982463e-05, "loss": 0.2665, "step": 16668, "teacher_loss": 0.24907664954662323 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.8824596405029297, "learning_rate": 1.6410629781905964e-05, "loss": 0.2831, "step": 16669, "teacher_loss": 0.21654212474822998 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.4889693856239319, "learning_rate": 1.640836858164579e-05, "loss": 0.2695, "step": 16670, "teacher_loss": 0.24510234594345093 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.36210858821868896, "learning_rate": 1.6406107349095943e-05, "loss": 0.1841, "step": 16671, "teacher_loss": 0.16429701447486877 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.3743099272251129, "learning_rate": 1.640384608430828e-05, "loss": 0.1896, "step": 16672, "teacher_loss": 0.16910046339035034 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.8900965452194214, "learning_rate": 1.640158478733463e-05, "loss": 0.2885, "step": 16673, "teacher_loss": 0.22168835997581482 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.6605490446090698, "learning_rate": 1.6399323458226844e-05, "loss": 0.4708, "step": 16674, "teacher_loss": 0.4496930241584778 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.2045832872390747, "learning_rate": 1.6397062097036764e-05, "loss": 0.1874, "step": 16675, "teacher_loss": 0.18551163375377655 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.546154797077179, "learning_rate": 1.6394800703816238e-05, "loss": 0.2874, "step": 16676, "teacher_loss": 0.2586747705936432 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.4462851285934448, "learning_rate": 1.6392539278617115e-05, "loss": 0.2188, "step": 16677, "teacher_loss": 0.19351482391357422 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.539980411529541, "learning_rate": 1.6390277821491243e-05, "loss": 0.2541, "step": 16678, "teacher_loss": 0.2223459631204605 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.1386830359697342, "learning_rate": 1.6388016332490464e-05, "loss": 0.1724, "step": 16679, "teacher_loss": 0.17617622017860413 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.5103582739830017, "learning_rate": 1.6385754811666637e-05, "loss": 0.2021, "step": 16680, "teacher_loss": 0.1678774654865265 }, { "compression_loss": 0.0, "epoch": 3.01, "label_loss": 0.6613031625747681, "learning_rate": 1.6383493259071607e-05, "loss": 0.2522, "step": 16681, "teacher_loss": 0.20675159990787506 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.22267544269561768, "learning_rate": 1.6381231674757223e-05, "loss": 0.1813, "step": 16682, "teacher_loss": 0.1767527461051941 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.7743809819221497, "learning_rate": 1.637897005877534e-05, "loss": 0.3101, "step": 16683, "teacher_loss": 0.2585359811782837 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.41415929794311523, "learning_rate": 1.63767084111778e-05, "loss": 0.2023, "step": 16684, "teacher_loss": 0.17875057458877563 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.7254762649536133, "learning_rate": 1.637444673201647e-05, "loss": 0.5528, "step": 16685, "teacher_loss": 0.5336033701896667 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.26265889406204224, "learning_rate": 1.63721850213432e-05, "loss": 0.3658, "step": 16686, "teacher_loss": 0.3772457242012024 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.22453008592128754, "learning_rate": 1.6369923279209835e-05, "loss": 0.1614, "step": 16687, "teacher_loss": 0.15439260005950928 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.4338299036026001, "learning_rate": 1.6367661505668243e-05, "loss": 0.2663, "step": 16688, "teacher_loss": 0.24764175713062286 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.7031207084655762, "learning_rate": 1.6365399700770267e-05, "loss": 0.6474, "step": 16689, "teacher_loss": 0.6412497758865356 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 1.307365894317627, "learning_rate": 1.6363137864567773e-05, "loss": 0.349, "step": 16690, "teacher_loss": 0.24253278970718384 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.24011573195457458, "learning_rate": 1.6360875997112616e-05, "loss": 0.1558, "step": 16691, "teacher_loss": 0.14643266797065735 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.7663685083389282, "learning_rate": 1.6358614098456648e-05, "loss": 0.2641, "step": 16692, "teacher_loss": 0.2083219289779663 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.398048460483551, "learning_rate": 1.6356352168651738e-05, "loss": 0.2141, "step": 16693, "teacher_loss": 0.19367438554763794 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.4373455047607422, "learning_rate": 1.635409020774973e-05, "loss": 0.3676, "step": 16694, "teacher_loss": 0.35987141728401184 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.3498024344444275, "learning_rate": 1.6351828215802502e-05, "loss": 0.1768, "step": 16695, "teacher_loss": 0.15755532681941986 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.3348073959350586, "learning_rate": 1.6349566192861905e-05, "loss": 0.1637, "step": 16696, "teacher_loss": 0.14464391767978668 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.30063748359680176, "learning_rate": 1.6347304138979797e-05, "loss": 0.223, "step": 16697, "teacher_loss": 0.21433916687965393 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.3812759220600128, "learning_rate": 1.6345042054208047e-05, "loss": 0.1913, "step": 16698, "teacher_loss": 0.17021194100379944 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 1.1720621585845947, "learning_rate": 1.6342779938598518e-05, "loss": 0.5735, "step": 16699, "teacher_loss": 0.5069878101348877 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.17568276822566986, "learning_rate": 1.634051779220307e-05, "loss": 0.1342, "step": 16700, "teacher_loss": 0.12953686714172363 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.1343419849872589, "learning_rate": 1.633825561507356e-05, "loss": 0.3337, "step": 16701, "teacher_loss": 0.35584571957588196 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.32095134258270264, "learning_rate": 1.633599340726187e-05, "loss": 0.2426, "step": 16702, "teacher_loss": 0.2338457703590393 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.8260831236839294, "learning_rate": 1.6333731168819854e-05, "loss": 0.3043, "step": 16703, "teacher_loss": 0.24637362360954285 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.23801273107528687, "learning_rate": 1.6331468899799383e-05, "loss": 0.2445, "step": 16704, "teacher_loss": 0.24520312249660492 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.24387332797050476, "learning_rate": 1.6329206600252316e-05, "loss": 0.2484, "step": 16705, "teacher_loss": 0.2489512711763382 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.739294171333313, "learning_rate": 1.6326944270230532e-05, "loss": 0.7563, "step": 16706, "teacher_loss": 0.7582250833511353 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.802885890007019, "learning_rate": 1.6324681909785896e-05, "loss": 0.5456, "step": 16707, "teacher_loss": 0.5170537829399109 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.21120405197143555, "learning_rate": 1.632241951897027e-05, "loss": 0.1691, "step": 16708, "teacher_loss": 0.16439834237098694 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.23087695240974426, "learning_rate": 1.6320157097835533e-05, "loss": 0.2062, "step": 16709, "teacher_loss": 0.20350268483161926 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.4218827486038208, "learning_rate": 1.631789464643355e-05, "loss": 0.234, "step": 16710, "teacher_loss": 0.21312332153320312 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.5379481315612793, "learning_rate": 1.6315632164816197e-05, "loss": 0.3182, "step": 16711, "teacher_loss": 0.29373788833618164 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.7547663450241089, "learning_rate": 1.631336965303534e-05, "loss": 0.2025, "step": 16712, "teacher_loss": 0.14108511805534363 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.295688271522522, "learning_rate": 1.6311107111142855e-05, "loss": 0.2818, "step": 16713, "teacher_loss": 0.28027287125587463 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.35632050037384033, "learning_rate": 1.6308844539190612e-05, "loss": 0.2214, "step": 16714, "teacher_loss": 0.2064087688922882 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.5817666053771973, "learning_rate": 1.6306581937230493e-05, "loss": 0.2431, "step": 16715, "teacher_loss": 0.20552444458007812 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.3169645667076111, "learning_rate": 1.6304319305314365e-05, "loss": 0.1998, "step": 16716, "teacher_loss": 0.1867600828409195 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.5003454685211182, "learning_rate": 1.6302056643494105e-05, "loss": 0.2467, "step": 16717, "teacher_loss": 0.21850891411304474 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.5456697940826416, "learning_rate": 1.6299793951821596e-05, "loss": 0.2641, "step": 16718, "teacher_loss": 0.23284760117530823 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.536749005317688, "learning_rate": 1.62975312303487e-05, "loss": 0.2572, "step": 16719, "teacher_loss": 0.22614958882331848 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 1.1537868976593018, "learning_rate": 1.629526847912731e-05, "loss": 0.4821, "step": 16720, "teacher_loss": 0.4074448347091675 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.25004416704177856, "learning_rate": 1.629300569820929e-05, "loss": 0.2943, "step": 16721, "teacher_loss": 0.29925674200057983 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.2055758237838745, "learning_rate": 1.6290742887646532e-05, "loss": 0.1891, "step": 16722, "teacher_loss": 0.18725544214248657 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.37162208557128906, "learning_rate": 1.6288480047490908e-05, "loss": 0.2105, "step": 16723, "teacher_loss": 0.19260048866271973 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.9930249452590942, "learning_rate": 1.62862171777943e-05, "loss": 0.3492, "step": 16724, "teacher_loss": 0.27770185470581055 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.5038953423500061, "learning_rate": 1.6283954278608587e-05, "loss": 0.2245, "step": 16725, "teacher_loss": 0.19348105788230896 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.25645095109939575, "learning_rate": 1.628169134998565e-05, "loss": 0.2623, "step": 16726, "teacher_loss": 0.26294687390327454 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.1329130083322525, "learning_rate": 1.6279428391977377e-05, "loss": 0.1864, "step": 16727, "teacher_loss": 0.19230203330516815 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.6769921779632568, "learning_rate": 1.6277165404635647e-05, "loss": 0.2872, "step": 16728, "teacher_loss": 0.2438545525074005 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 1.038051962852478, "learning_rate": 1.627490238801234e-05, "loss": 0.4215, "step": 16729, "teacher_loss": 0.352997362613678 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 1.3730661869049072, "learning_rate": 1.6272639342159346e-05, "loss": 0.3072, "step": 16730, "teacher_loss": 0.18873238563537598 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.2309224009513855, "learning_rate": 1.6270376267128544e-05, "loss": 0.2244, "step": 16731, "teacher_loss": 0.22373080253601074 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.5658903121948242, "learning_rate": 1.6268113162971826e-05, "loss": 0.3602, "step": 16732, "teacher_loss": 0.3373996913433075 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.24809661507606506, "learning_rate": 1.6265850029741078e-05, "loss": 0.1915, "step": 16733, "teacher_loss": 0.1851608157157898 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.3477705717086792, "learning_rate": 1.6263586867488183e-05, "loss": 0.2451, "step": 16734, "teacher_loss": 0.233689546585083 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.34659552574157715, "learning_rate": 1.6261323676265026e-05, "loss": 0.2363, "step": 16735, "teacher_loss": 0.22401948273181915 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.48806464672088623, "learning_rate": 1.62590604561235e-05, "loss": 0.2316, "step": 16736, "teacher_loss": 0.20304948091506958 }, { "compression_loss": 0.0, "epoch": 3.02, "label_loss": 0.25840654969215393, "learning_rate": 1.6256797207115495e-05, "loss": 0.3355, "step": 16737, "teacher_loss": 0.3440427780151367 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.2350911796092987, "learning_rate": 1.6254533929292893e-05, "loss": 0.2471, "step": 16738, "teacher_loss": 0.24843406677246094 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.15830382704734802, "learning_rate": 1.6252270622707592e-05, "loss": 0.1646, "step": 16739, "teacher_loss": 0.1652618646621704 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.4054409861564636, "learning_rate": 1.625000728741148e-05, "loss": 0.2331, "step": 16740, "teacher_loss": 0.2139054536819458 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.6492743492126465, "learning_rate": 1.6247743923456452e-05, "loss": 0.2859, "step": 16741, "teacher_loss": 0.24554742872714996 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.9160658717155457, "learning_rate": 1.624548053089439e-05, "loss": 0.4797, "step": 16742, "teacher_loss": 0.4311636686325073 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.32270270586013794, "learning_rate": 1.6243217109777202e-05, "loss": 0.2721, "step": 16743, "teacher_loss": 0.26653292775154114 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.2720167338848114, "learning_rate": 1.6240953660156768e-05, "loss": 0.251, "step": 16744, "teacher_loss": 0.24869826436042786 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.6041932106018066, "learning_rate": 1.623869018208499e-05, "loss": 0.2917, "step": 16745, "teacher_loss": 0.25694602727890015 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.3712504208087921, "learning_rate": 1.6236426675613754e-05, "loss": 0.2623, "step": 16746, "teacher_loss": 0.25016146898269653 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.7163546681404114, "learning_rate": 1.6234163140794965e-05, "loss": 0.3116, "step": 16747, "teacher_loss": 0.26661065220832825 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.5679436922073364, "learning_rate": 1.623189957768052e-05, "loss": 0.2821, "step": 16748, "teacher_loss": 0.2503289580345154 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.44359537959098816, "learning_rate": 1.62296359863223e-05, "loss": 0.2096, "step": 16749, "teacher_loss": 0.18361210823059082 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.3506343960762024, "learning_rate": 1.622737236677222e-05, "loss": 0.3918, "step": 16750, "teacher_loss": 0.39637070894241333 }, { "epoch": 3.03, "eval_exact_match": 79.6972563859981, "eval_f1": 87.28700317981962, "step": 16750 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.2943986654281616, "learning_rate": 1.6225108719082173e-05, "loss": 0.3572, "step": 16751, "teacher_loss": 0.364205539226532 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.9274237751960754, "learning_rate": 1.622284504330405e-05, "loss": 0.4402, "step": 16752, "teacher_loss": 0.38603758811950684 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.3011430501937866, "learning_rate": 1.622058133948976e-05, "loss": 0.1812, "step": 16753, "teacher_loss": 0.16790489852428436 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.5951123237609863, "learning_rate": 1.6218317607691208e-05, "loss": 0.2445, "step": 16754, "teacher_loss": 0.2055935263633728 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.42148837447166443, "learning_rate": 1.6216053847960272e-05, "loss": 0.2174, "step": 16755, "teacher_loss": 0.19471558928489685 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.624860405921936, "learning_rate": 1.621379006034888e-05, "loss": 0.3436, "step": 16756, "teacher_loss": 0.3123078942298889 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.35093092918395996, "learning_rate": 1.621152624490891e-05, "loss": 0.2182, "step": 16757, "teacher_loss": 0.2034684121608734 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.7728095054626465, "learning_rate": 1.620926240169228e-05, "loss": 0.3431, "step": 16758, "teacher_loss": 0.2953372597694397 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.4074169099330902, "learning_rate": 1.6206998530750893e-05, "loss": 0.1994, "step": 16759, "teacher_loss": 0.17628496885299683 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.2834703028202057, "learning_rate": 1.620473463213664e-05, "loss": 0.1608, "step": 16760, "teacher_loss": 0.14721494913101196 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.40931612253189087, "learning_rate": 1.6202470705901436e-05, "loss": 0.2805, "step": 16761, "teacher_loss": 0.2662176489830017 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.5422577261924744, "learning_rate": 1.6200206752097187e-05, "loss": 0.2985, "step": 16762, "teacher_loss": 0.2714230418205261 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.23011064529418945, "learning_rate": 1.6197942770775795e-05, "loss": 0.2529, "step": 16763, "teacher_loss": 0.25548604130744934 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.3078126907348633, "learning_rate": 1.6195678761989167e-05, "loss": 0.3545, "step": 16764, "teacher_loss": 0.35966312885284424 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.12039228528738022, "learning_rate": 1.619341472578921e-05, "loss": 0.1559, "step": 16765, "teacher_loss": 0.15988051891326904 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.7397499084472656, "learning_rate": 1.619115066222783e-05, "loss": 0.2494, "step": 16766, "teacher_loss": 0.1949431598186493 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.4226497411727905, "learning_rate": 1.618888657135694e-05, "loss": 0.3021, "step": 16767, "teacher_loss": 0.28872549533843994 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.44641709327697754, "learning_rate": 1.618662245322844e-05, "loss": 0.2524, "step": 16768, "teacher_loss": 0.23082174360752106 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.3515903055667877, "learning_rate": 1.6184358307894246e-05, "loss": 0.2078, "step": 16769, "teacher_loss": 0.19177690148353577 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.40109825134277344, "learning_rate": 1.618209413540627e-05, "loss": 0.3817, "step": 16770, "teacher_loss": 0.3795184791088104 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.8166964054107666, "learning_rate": 1.6179829935816416e-05, "loss": 0.2392, "step": 16771, "teacher_loss": 0.17506438493728638 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.6074659824371338, "learning_rate": 1.61775657091766e-05, "loss": 0.3083, "step": 16772, "teacher_loss": 0.2750290036201477 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.35683396458625793, "learning_rate": 1.617530145553874e-05, "loss": 0.2194, "step": 16773, "teacher_loss": 0.20417499542236328 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.2445635199546814, "learning_rate": 1.617303717495473e-05, "loss": 0.1653, "step": 16774, "teacher_loss": 0.15646912157535553 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.4458203911781311, "learning_rate": 1.6170772867476495e-05, "loss": 0.2147, "step": 16775, "teacher_loss": 0.18897312879562378 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.2752021551132202, "learning_rate": 1.616850853315596e-05, "loss": 0.2119, "step": 16776, "teacher_loss": 0.2049192637205124 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.16564348340034485, "learning_rate": 1.6166244172045017e-05, "loss": 0.1907, "step": 16777, "teacher_loss": 0.1934380829334259 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.5233674049377441, "learning_rate": 1.6163979784195594e-05, "loss": 0.2539, "step": 16778, "teacher_loss": 0.22399696707725525 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.34260302782058716, "learning_rate": 1.6161715369659607e-05, "loss": 0.2149, "step": 16779, "teacher_loss": 0.20074602961540222 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.7575348019599915, "learning_rate": 1.615945092848897e-05, "loss": 0.2639, "step": 16780, "teacher_loss": 0.20899678766727448 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.18293337523937225, "learning_rate": 1.61571864607356e-05, "loss": 0.1578, "step": 16781, "teacher_loss": 0.15500061213970184 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.7627553343772888, "learning_rate": 1.6154921966451407e-05, "loss": 0.3333, "step": 16782, "teacher_loss": 0.28560519218444824 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.3845599293708801, "learning_rate": 1.615265744568832e-05, "loss": 0.2519, "step": 16783, "teacher_loss": 0.23713265359401703 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.5760377645492554, "learning_rate": 1.6150392898498258e-05, "loss": 0.2775, "step": 16784, "teacher_loss": 0.24431398510932922 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.4641030430793762, "learning_rate": 1.6148128324933127e-05, "loss": 0.2235, "step": 16785, "teacher_loss": 0.1967858076095581 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.55330491065979, "learning_rate": 1.6145863725044864e-05, "loss": 0.1809, "step": 16786, "teacher_loss": 0.13947996497154236 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.5893657207489014, "learning_rate": 1.6143599098885377e-05, "loss": 0.3349, "step": 16787, "teacher_loss": 0.30662715435028076 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.9667890071868896, "learning_rate": 1.614133444650659e-05, "loss": 0.3687, "step": 16788, "teacher_loss": 0.30226409435272217 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.32840967178344727, "learning_rate": 1.613906976796043e-05, "loss": 0.177, "step": 16789, "teacher_loss": 0.16014347970485687 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.08945631235837936, "learning_rate": 1.613680506329881e-05, "loss": 0.161, "step": 16790, "teacher_loss": 0.16891761124134064 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.27621936798095703, "learning_rate": 1.6134540332573665e-05, "loss": 0.1545, "step": 16791, "teacher_loss": 0.14095818996429443 }, { "compression_loss": 0.0, "epoch": 3.03, "label_loss": 0.5001845955848694, "learning_rate": 1.613227557583691e-05, "loss": 0.2949, "step": 16792, "teacher_loss": 0.27207979559898376 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.2931975722312927, "learning_rate": 1.613001079314047e-05, "loss": 0.1978, "step": 16793, "teacher_loss": 0.18719851970672607 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.584324836730957, "learning_rate": 1.6127745984536266e-05, "loss": 0.2103, "step": 16794, "teacher_loss": 0.16873955726623535 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.34020107984542847, "learning_rate": 1.6125481150076232e-05, "loss": 0.242, "step": 16795, "teacher_loss": 0.23103654384613037 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.161312997341156, "learning_rate": 1.612321628981229e-05, "loss": 0.1787, "step": 16796, "teacher_loss": 0.1806664764881134 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.16806629300117493, "learning_rate": 1.6120951403796367e-05, "loss": 0.1304, "step": 16797, "teacher_loss": 0.12616702914237976 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.5515435934066772, "learning_rate": 1.6118686492080386e-05, "loss": 0.2025, "step": 16798, "teacher_loss": 0.16368865966796875 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.3803243637084961, "learning_rate": 1.6116421554716278e-05, "loss": 0.2177, "step": 16799, "teacher_loss": 0.1996442824602127 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.24220040440559387, "learning_rate": 1.6114156591755972e-05, "loss": 0.1706, "step": 16800, "teacher_loss": 0.16261771321296692 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.9484876394271851, "learning_rate": 1.6111891603251396e-05, "loss": 0.35, "step": 16801, "teacher_loss": 0.2835564911365509 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.2501353621482849, "learning_rate": 1.6109626589254475e-05, "loss": 0.2175, "step": 16802, "teacher_loss": 0.21384716033935547 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.5608738660812378, "learning_rate": 1.610736154981715e-05, "loss": 0.2393, "step": 16803, "teacher_loss": 0.20356637239456177 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.302284300327301, "learning_rate": 1.610509648499134e-05, "loss": 0.1925, "step": 16804, "teacher_loss": 0.18029913306236267 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.06422247737646103, "learning_rate": 1.610283139482898e-05, "loss": 0.1579, "step": 16805, "teacher_loss": 0.1682727336883545 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.4555017948150635, "learning_rate": 1.6100566279382013e-05, "loss": 0.2161, "step": 16806, "teacher_loss": 0.1895502507686615 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.2842525839805603, "learning_rate": 1.609830113870235e-05, "loss": 0.2747, "step": 16807, "teacher_loss": 0.27369171380996704 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.4931330680847168, "learning_rate": 1.6096035972841937e-05, "loss": 0.2869, "step": 16808, "teacher_loss": 0.264007031917572 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.7901811599731445, "learning_rate": 1.6093770781852708e-05, "loss": 0.3363, "step": 16809, "teacher_loss": 0.28585487604141235 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.569908857345581, "learning_rate": 1.6091505565786588e-05, "loss": 0.3198, "step": 16810, "teacher_loss": 0.29203182458877563 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.3426416516304016, "learning_rate": 1.6089240324695526e-05, "loss": 0.332, "step": 16811, "teacher_loss": 0.3308408856391907 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.2697465717792511, "learning_rate": 1.6086975058631443e-05, "loss": 0.1465, "step": 16812, "teacher_loss": 0.13275158405303955 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.2798083424568176, "learning_rate": 1.6084709767646285e-05, "loss": 0.1417, "step": 16813, "teacher_loss": 0.1263493001461029 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.26262956857681274, "learning_rate": 1.6082444451791986e-05, "loss": 0.1688, "step": 16814, "teacher_loss": 0.15836410224437714 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.4059748351573944, "learning_rate": 1.608017911112047e-05, "loss": 0.1877, "step": 16815, "teacher_loss": 0.16341149806976318 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.4099876880645752, "learning_rate": 1.6077913745683696e-05, "loss": 0.2056, "step": 16816, "teacher_loss": 0.1828356683254242 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.3752114772796631, "learning_rate": 1.607564835553359e-05, "loss": 0.2599, "step": 16817, "teacher_loss": 0.24708473682403564 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.702766478061676, "learning_rate": 1.6073382940722088e-05, "loss": 0.2528, "step": 16818, "teacher_loss": 0.20275646448135376 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.5798918008804321, "learning_rate": 1.6071117501301138e-05, "loss": 0.4542, "step": 16819, "teacher_loss": 0.44027501344680786 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.35304880142211914, "learning_rate": 1.606885203732267e-05, "loss": 0.2081, "step": 16820, "teacher_loss": 0.19195863604545593 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.3803020715713501, "learning_rate": 1.606658654883863e-05, "loss": 0.2607, "step": 16821, "teacher_loss": 0.24745498597621918 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.3758506178855896, "learning_rate": 1.6064321035900965e-05, "loss": 0.2387, "step": 16822, "teacher_loss": 0.2235088348388672 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.9602616429328918, "learning_rate": 1.6062055498561607e-05, "loss": 0.7645, "step": 16823, "teacher_loss": 0.7427017688751221 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.6457028388977051, "learning_rate": 1.6059789936872495e-05, "loss": 0.4047, "step": 16824, "teacher_loss": 0.37791305780410767 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.2610934376716614, "learning_rate": 1.6057524350885583e-05, "loss": 0.2145, "step": 16825, "teacher_loss": 0.20931777358055115 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.5805801749229431, "learning_rate": 1.6055258740652806e-05, "loss": 0.2681, "step": 16826, "teacher_loss": 0.2334115207195282 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.2725808620452881, "learning_rate": 1.6052993106226105e-05, "loss": 0.4213, "step": 16827, "teacher_loss": 0.4378345012664795 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.3599764108657837, "learning_rate": 1.6050727447657437e-05, "loss": 0.2117, "step": 16828, "teacher_loss": 0.19525441527366638 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.4869891405105591, "learning_rate": 1.6048461764998735e-05, "loss": 0.2003, "step": 16829, "teacher_loss": 0.1684727966785431 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.5623921155929565, "learning_rate": 1.6046196058301953e-05, "loss": 0.3062, "step": 16830, "teacher_loss": 0.27769070863723755 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.4406111240386963, "learning_rate": 1.6043930327619028e-05, "loss": 0.2307, "step": 16831, "teacher_loss": 0.20736992359161377 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.2761671245098114, "learning_rate": 1.604166457300191e-05, "loss": 0.1625, "step": 16832, "teacher_loss": 0.1498270034790039 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.4341892600059509, "learning_rate": 1.6039398794502548e-05, "loss": 0.1562, "step": 16833, "teacher_loss": 0.1253231167793274 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.6963376998901367, "learning_rate": 1.6037132992172887e-05, "loss": 0.265, "step": 16834, "teacher_loss": 0.21707329154014587 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.7535719871520996, "learning_rate": 1.6034867166064873e-05, "loss": 0.2651, "step": 16835, "teacher_loss": 0.21079209446907043 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.45142465829849243, "learning_rate": 1.6032601316230466e-05, "loss": 0.2751, "step": 16836, "teacher_loss": 0.2554752230644226 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.31602269411087036, "learning_rate": 1.6030335442721598e-05, "loss": 0.1912, "step": 16837, "teacher_loss": 0.17727622389793396 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.8295608758926392, "learning_rate": 1.602806954559023e-05, "loss": 0.2234, "step": 16838, "teacher_loss": 0.15609216690063477 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.440433144569397, "learning_rate": 1.602580362488831e-05, "loss": 0.216, "step": 16839, "teacher_loss": 0.19102974236011505 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.6695796251296997, "learning_rate": 1.6023537680667787e-05, "loss": 0.5044, "step": 16840, "teacher_loss": 0.48600631952285767 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.6110231280326843, "learning_rate": 1.602127171298062e-05, "loss": 0.2144, "step": 16841, "teacher_loss": 0.1703658401966095 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.3728519678115845, "learning_rate": 1.6019005721878748e-05, "loss": 0.2577, "step": 16842, "teacher_loss": 0.24487125873565674 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.26037514209747314, "learning_rate": 1.6016739707414134e-05, "loss": 0.2101, "step": 16843, "teacher_loss": 0.2045416235923767 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.5497496128082275, "learning_rate": 1.6014473669638725e-05, "loss": 0.2606, "step": 16844, "teacher_loss": 0.2284727692604065 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.4091755151748657, "learning_rate": 1.6012207608604473e-05, "loss": 0.2539, "step": 16845, "teacher_loss": 0.23665225505828857 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.4850279986858368, "learning_rate": 1.6009941524363338e-05, "loss": 0.2828, "step": 16846, "teacher_loss": 0.2603681981563568 }, { "compression_loss": 0.0, "epoch": 3.04, "label_loss": 0.36309248208999634, "learning_rate": 1.6007675416967273e-05, "loss": 0.2457, "step": 16847, "teacher_loss": 0.232618510723114 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.6456424593925476, "learning_rate": 1.600540928646823e-05, "loss": 0.5705, "step": 16848, "teacher_loss": 0.5621359944343567 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.5721041560173035, "learning_rate": 1.6003143132918172e-05, "loss": 0.3132, "step": 16849, "teacher_loss": 0.2844192385673523 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.6573474407196045, "learning_rate": 1.6000876956369043e-05, "loss": 0.3054, "step": 16850, "teacher_loss": 0.26624369621276855 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.31568092107772827, "learning_rate": 1.599861075687281e-05, "loss": 0.2458, "step": 16851, "teacher_loss": 0.2380181849002838 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.4519786834716797, "learning_rate": 1.5996344534481427e-05, "loss": 0.1766, "step": 16852, "teacher_loss": 0.14605773985385895 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.46846020221710205, "learning_rate": 1.5994078289246843e-05, "loss": 0.189, "step": 16853, "teacher_loss": 0.15791219472885132 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.23215147852897644, "learning_rate": 1.5991812021221033e-05, "loss": 0.2055, "step": 16854, "teacher_loss": 0.20257382094860077 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.4606871008872986, "learning_rate": 1.5989545730455945e-05, "loss": 0.3178, "step": 16855, "teacher_loss": 0.30187657475471497 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.4988008141517639, "learning_rate": 1.5987279417003537e-05, "loss": 0.2922, "step": 16856, "teacher_loss": 0.2692581117153168 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.16587047278881073, "learning_rate": 1.5985013080915772e-05, "loss": 0.156, "step": 16857, "teacher_loss": 0.1548621654510498 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.3116499185562134, "learning_rate": 1.5982746722244612e-05, "loss": 0.1833, "step": 16858, "teacher_loss": 0.1689913421869278 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.7125014662742615, "learning_rate": 1.5980480341042017e-05, "loss": 0.2575, "step": 16859, "teacher_loss": 0.2069072276353836 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.5630989074707031, "learning_rate": 1.5978213937359946e-05, "loss": 0.3625, "step": 16860, "teacher_loss": 0.3402237594127655 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.6866254806518555, "learning_rate": 1.5975947511250367e-05, "loss": 0.292, "step": 16861, "teacher_loss": 0.24809899926185608 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.5564385056495667, "learning_rate": 1.597368106276523e-05, "loss": 0.1946, "step": 16862, "teacher_loss": 0.15442153811454773 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.3809937834739685, "learning_rate": 1.597141459195651e-05, "loss": 0.2144, "step": 16863, "teacher_loss": 0.19593197107315063 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.2710877060890198, "learning_rate": 1.5969148098876166e-05, "loss": 0.2463, "step": 16864, "teacher_loss": 0.24349215626716614 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.3203345537185669, "learning_rate": 1.596688158357616e-05, "loss": 0.208, "step": 16865, "teacher_loss": 0.19549402594566345 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.44225579500198364, "learning_rate": 1.596461504610846e-05, "loss": 0.3731, "step": 16866, "teacher_loss": 0.36539965867996216 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.4427652955055237, "learning_rate": 1.5962348486525028e-05, "loss": 0.2447, "step": 16867, "teacher_loss": 0.22273138165473938 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 1.1535390615463257, "learning_rate": 1.596008190487783e-05, "loss": 0.2724, "step": 16868, "teacher_loss": 0.17445990443229675 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.23975099623203278, "learning_rate": 1.5957815301218834e-05, "loss": 0.3467, "step": 16869, "teacher_loss": 0.35854339599609375 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.32493311166763306, "learning_rate": 1.59555486756e-05, "loss": 0.2069, "step": 16870, "teacher_loss": 0.19383135437965393 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.27010229229927063, "learning_rate": 1.5953282028073307e-05, "loss": 0.1967, "step": 16871, "teacher_loss": 0.1884966790676117 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.23377005755901337, "learning_rate": 1.5951015358690712e-05, "loss": 0.1878, "step": 16872, "teacher_loss": 0.18272638320922852 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.3354255259037018, "learning_rate": 1.5948748667504182e-05, "loss": 0.2218, "step": 16873, "teacher_loss": 0.2092185914516449 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.6702821254730225, "learning_rate": 1.5946481954565696e-05, "loss": 0.2524, "step": 16874, "teacher_loss": 0.20599710941314697 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.6687851548194885, "learning_rate": 1.5944215219927212e-05, "loss": 0.4103, "step": 16875, "teacher_loss": 0.3815535604953766 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.6331831216812134, "learning_rate": 1.5941948463640708e-05, "loss": 0.2273, "step": 16876, "teacher_loss": 0.18219245970249176 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.18871107697486877, "learning_rate": 1.5939681685758146e-05, "loss": 0.1874, "step": 16877, "teacher_loss": 0.18720856308937073 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.13560549914836884, "learning_rate": 1.59374148863315e-05, "loss": 0.1569, "step": 16878, "teacher_loss": 0.1592504382133484 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.26225972175598145, "learning_rate": 1.5935148065412743e-05, "loss": 0.1709, "step": 16879, "teacher_loss": 0.16069883108139038 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.6609208583831787, "learning_rate": 1.5932881223053847e-05, "loss": 0.2505, "step": 16880, "teacher_loss": 0.20492485165596008 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.3135026693344116, "learning_rate": 1.593061435930678e-05, "loss": 0.2494, "step": 16881, "teacher_loss": 0.24232399463653564 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.36897093057632446, "learning_rate": 1.592834747422351e-05, "loss": 0.254, "step": 16882, "teacher_loss": 0.2412070631980896 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.5133674144744873, "learning_rate": 1.5926080567856023e-05, "loss": 0.268, "step": 16883, "teacher_loss": 0.24070867896080017 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.1623694896697998, "learning_rate": 1.592381364025628e-05, "loss": 0.1423, "step": 16884, "teacher_loss": 0.14011451601982117 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.3547600507736206, "learning_rate": 1.5921546691476264e-05, "loss": 0.207, "step": 16885, "teacher_loss": 0.19056382775306702 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.3432597517967224, "learning_rate": 1.5919279721567948e-05, "loss": 0.2675, "step": 16886, "teacher_loss": 0.25912314653396606 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.33007028698921204, "learning_rate": 1.5917012730583298e-05, "loss": 0.1806, "step": 16887, "teacher_loss": 0.1640370786190033 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.37300965189933777, "learning_rate": 1.5914745718574297e-05, "loss": 0.2466, "step": 16888, "teacher_loss": 0.2325739860534668 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.2121027410030365, "learning_rate": 1.591247868559292e-05, "loss": 0.2052, "step": 16889, "teacher_loss": 0.20447131991386414 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.20480406284332275, "learning_rate": 1.591021163169114e-05, "loss": 0.1459, "step": 16890, "teacher_loss": 0.13940449059009552 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.35742485523223877, "learning_rate": 1.590794455692094e-05, "loss": 0.2103, "step": 16891, "teacher_loss": 0.19391420483589172 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.41289693117141724, "learning_rate": 1.5905677461334292e-05, "loss": 0.3053, "step": 16892, "teacher_loss": 0.29330432415008545 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.41480934619903564, "learning_rate": 1.5903410344983175e-05, "loss": 0.1612, "step": 16893, "teacher_loss": 0.1330462545156479 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.20442867279052734, "learning_rate": 1.590114320791957e-05, "loss": 0.2163, "step": 16894, "teacher_loss": 0.21761064231395721 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.4068581461906433, "learning_rate": 1.589887605019545e-05, "loss": 0.3528, "step": 16895, "teacher_loss": 0.34677520394325256 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.5025511384010315, "learning_rate": 1.5896608871862798e-05, "loss": 0.2409, "step": 16896, "teacher_loss": 0.21177448332309723 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.36766624450683594, "learning_rate": 1.589434167297359e-05, "loss": 0.1621, "step": 16897, "teacher_loss": 0.13930313289165497 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.29141315817832947, "learning_rate": 1.589207445357981e-05, "loss": 0.2332, "step": 16898, "teacher_loss": 0.2267286479473114 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.35825392603874207, "learning_rate": 1.588980721373344e-05, "loss": 0.2524, "step": 16899, "teacher_loss": 0.2406141757965088 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.7143042087554932, "learning_rate": 1.5887539953486456e-05, "loss": 0.292, "step": 16900, "teacher_loss": 0.24505400657653809 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.8673523664474487, "learning_rate": 1.5885272672890842e-05, "loss": 0.342, "step": 16901, "teacher_loss": 0.28361114859580994 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.7867000102996826, "learning_rate": 1.5883005371998582e-05, "loss": 0.3843, "step": 16902, "teacher_loss": 0.3395382761955261 }, { "compression_loss": 0.0, "epoch": 3.05, "label_loss": 0.8298860788345337, "learning_rate": 1.5880738050861654e-05, "loss": 0.3379, "step": 16903, "teacher_loss": 0.28318527340888977 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.3373580276966095, "learning_rate": 1.5878470709532044e-05, "loss": 0.1931, "step": 16904, "teacher_loss": 0.1770373284816742 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.30741333961486816, "learning_rate": 1.5876203348061732e-05, "loss": 0.2574, "step": 16905, "teacher_loss": 0.2518293261528015 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.3876432776451111, "learning_rate": 1.587393596650271e-05, "loss": 0.1968, "step": 16906, "teacher_loss": 0.17564880847930908 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.36496394872665405, "learning_rate": 1.5871668564906955e-05, "loss": 0.2015, "step": 16907, "teacher_loss": 0.18335093557834625 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.5074715614318848, "learning_rate": 1.586940114332645e-05, "loss": 0.23, "step": 16908, "teacher_loss": 0.19916322827339172 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.20610252022743225, "learning_rate": 1.5867133701813183e-05, "loss": 0.1502, "step": 16909, "teacher_loss": 0.14400246739387512 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.2280387282371521, "learning_rate": 1.586486624041914e-05, "loss": 0.1973, "step": 16910, "teacher_loss": 0.1938643902540207 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.08964599668979645, "learning_rate": 1.586259875919631e-05, "loss": 0.1632, "step": 16911, "teacher_loss": 0.17136666178703308 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.43679535388946533, "learning_rate": 1.586033125819668e-05, "loss": 0.2046, "step": 16912, "teacher_loss": 0.17881129682064056 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.14137499034404755, "learning_rate": 1.5858063737472222e-05, "loss": 0.1481, "step": 16913, "teacher_loss": 0.1488851010799408 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.5175946354866028, "learning_rate": 1.585579619707494e-05, "loss": 0.2593, "step": 16914, "teacher_loss": 0.2306080162525177 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.2763390839099884, "learning_rate": 1.5853528637056827e-05, "loss": 0.2868, "step": 16915, "teacher_loss": 0.2879959046840668 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.2820940911769867, "learning_rate": 1.5851261057469852e-05, "loss": 0.216, "step": 16916, "teacher_loss": 0.2086283564567566 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.373432993888855, "learning_rate": 1.5848993458366012e-05, "loss": 0.2451, "step": 16917, "teacher_loss": 0.23082809150218964 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.2175893634557724, "learning_rate": 1.58467258397973e-05, "loss": 0.25, "step": 16918, "teacher_loss": 0.2535994350910187 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.49444180727005005, "learning_rate": 1.5844458201815702e-05, "loss": 0.2795, "step": 16919, "teacher_loss": 0.25562554597854614 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.13361681997776031, "learning_rate": 1.584219054447321e-05, "loss": 0.1913, "step": 16920, "teacher_loss": 0.19768434762954712 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.5936633348464966, "learning_rate": 1.583992286782182e-05, "loss": 0.2298, "step": 16921, "teacher_loss": 0.1893216073513031 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.49842774868011475, "learning_rate": 1.5837655171913508e-05, "loss": 0.3824, "step": 16922, "teacher_loss": 0.36948540806770325 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.5964106321334839, "learning_rate": 1.583538745680028e-05, "loss": 0.285, "step": 16923, "teacher_loss": 0.25036680698394775 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.3094998300075531, "learning_rate": 1.5833119722534118e-05, "loss": 0.2301, "step": 16924, "teacher_loss": 0.22126713395118713 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.384113609790802, "learning_rate": 1.5830851969167018e-05, "loss": 0.1884, "step": 16925, "teacher_loss": 0.16669228672981262 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.45244431495666504, "learning_rate": 1.5828584196750977e-05, "loss": 0.3083, "step": 16926, "teacher_loss": 0.2922462224960327 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.47019338607788086, "learning_rate": 1.5826316405337983e-05, "loss": 0.2526, "step": 16927, "teacher_loss": 0.2283751666545868 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.27009373903274536, "learning_rate": 1.582404859498003e-05, "loss": 0.2023, "step": 16928, "teacher_loss": 0.19472911953926086 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.4683631956577301, "learning_rate": 1.5821780765729118e-05, "loss": 0.2965, "step": 16929, "teacher_loss": 0.2773500084877014 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.9500136375427246, "learning_rate": 1.581951291763723e-05, "loss": 0.6972, "step": 16930, "teacher_loss": 0.6691627502441406 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.22023798525333405, "learning_rate": 1.5817245050756374e-05, "loss": 0.195, "step": 16931, "teacher_loss": 0.19219177961349487 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.4518231749534607, "learning_rate": 1.5814977165138537e-05, "loss": 0.226, "step": 16932, "teacher_loss": 0.20095695555210114 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.3171052634716034, "learning_rate": 1.5812709260835715e-05, "loss": 0.2211, "step": 16933, "teacher_loss": 0.2104271799325943 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.6728929877281189, "learning_rate": 1.581044133789991e-05, "loss": 0.3857, "step": 16934, "teacher_loss": 0.3538045883178711 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.20353052020072937, "learning_rate": 1.5808173396383113e-05, "loss": 0.1571, "step": 16935, "teacher_loss": 0.15188682079315186 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.6556011438369751, "learning_rate": 1.580590543633732e-05, "loss": 0.2818, "step": 16936, "teacher_loss": 0.24026557803153992 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.31891047954559326, "learning_rate": 1.580363745781454e-05, "loss": 0.2147, "step": 16937, "teacher_loss": 0.2030760794878006 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.6376428008079529, "learning_rate": 1.5801369460866753e-05, "loss": 0.248, "step": 16938, "teacher_loss": 0.20466148853302002 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.37075769901275635, "learning_rate": 1.5799101445545973e-05, "loss": 0.1963, "step": 16939, "teacher_loss": 0.17692790925502777 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.7148482799530029, "learning_rate": 1.5796833411904192e-05, "loss": 0.2267, "step": 16940, "teacher_loss": 0.17249146103858948 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.9486251473426819, "learning_rate": 1.5794565359993408e-05, "loss": 0.3398, "step": 16941, "teacher_loss": 0.27218031883239746 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.35436415672302246, "learning_rate": 1.5792297289865618e-05, "loss": 0.2208, "step": 16942, "teacher_loss": 0.20599046349525452 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.301593542098999, "learning_rate": 1.5790029201572834e-05, "loss": 0.2045, "step": 16943, "teacher_loss": 0.19374148547649384 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.48216670751571655, "learning_rate": 1.5787761095167047e-05, "loss": 0.2561, "step": 16944, "teacher_loss": 0.23099717497825623 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.3535589873790741, "learning_rate": 1.578549297070026e-05, "loss": 0.278, "step": 16945, "teacher_loss": 0.26956436038017273 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.39084386825561523, "learning_rate": 1.5783224828224476e-05, "loss": 0.239, "step": 16946, "teacher_loss": 0.22210997343063354 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.19451268017292023, "learning_rate": 1.578095666779169e-05, "loss": 0.1521, "step": 16947, "teacher_loss": 0.14734452962875366 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.5897947549819946, "learning_rate": 1.5778688489453912e-05, "loss": 0.3652, "step": 16948, "teacher_loss": 0.3402193784713745 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.8268476724624634, "learning_rate": 1.5776420293263144e-05, "loss": 0.4349, "step": 16949, "teacher_loss": 0.39138779044151306 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.5780760645866394, "learning_rate": 1.5774152079271382e-05, "loss": 0.3589, "step": 16950, "teacher_loss": 0.33452218770980835 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.31508561968803406, "learning_rate": 1.5771883847530637e-05, "loss": 0.1979, "step": 16951, "teacher_loss": 0.18491590023040771 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.39863935112953186, "learning_rate": 1.5769615598092908e-05, "loss": 0.226, "step": 16952, "teacher_loss": 0.20682290196418762 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.34072333574295044, "learning_rate": 1.5767347331010202e-05, "loss": 0.2546, "step": 16953, "teacher_loss": 0.2449931502342224 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.414497435092926, "learning_rate": 1.576507904633452e-05, "loss": 0.2762, "step": 16954, "teacher_loss": 0.2608224153518677 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.9077692627906799, "learning_rate": 1.576281074411787e-05, "loss": 0.2855, "step": 16955, "teacher_loss": 0.216361865401268 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.36713796854019165, "learning_rate": 1.5760542424412256e-05, "loss": 0.2001, "step": 16956, "teacher_loss": 0.1815125048160553 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.3706607520580292, "learning_rate": 1.5758274087269686e-05, "loss": 0.3722, "step": 16957, "teacher_loss": 0.37235724925994873 }, { "compression_loss": 0.0, "epoch": 3.06, "label_loss": 0.3271789848804474, "learning_rate": 1.5756005732742165e-05, "loss": 0.1859, "step": 16958, "teacher_loss": 0.17024821043014526 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.3409363925457001, "learning_rate": 1.57537373608817e-05, "loss": 0.2063, "step": 16959, "teacher_loss": 0.19136835634708405 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.4970812201499939, "learning_rate": 1.5751468971740296e-05, "loss": 0.2739, "step": 16960, "teacher_loss": 0.24905620515346527 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.6212697625160217, "learning_rate": 1.574920056536996e-05, "loss": 0.2069, "step": 16961, "teacher_loss": 0.1608992964029312 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.2803244888782501, "learning_rate": 1.5746932141822705e-05, "loss": 0.1998, "step": 16962, "teacher_loss": 0.190854012966156 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.5244549512863159, "learning_rate": 1.574466370115053e-05, "loss": 0.2889, "step": 16963, "teacher_loss": 0.2626722753047943 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.42564713954925537, "learning_rate": 1.5742395243405458e-05, "loss": 0.1807, "step": 16964, "teacher_loss": 0.15346242487430573 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.9706597924232483, "learning_rate": 1.574012676863948e-05, "loss": 0.5769, "step": 16965, "teacher_loss": 0.5330967903137207 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.5636691451072693, "learning_rate": 1.5737858276904617e-05, "loss": 0.2442, "step": 16966, "teacher_loss": 0.20874431729316711 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.75600266456604, "learning_rate": 1.5735589768252875e-05, "loss": 0.356, "step": 16967, "teacher_loss": 0.31152307987213135 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.6100039482116699, "learning_rate": 1.5733321242736263e-05, "loss": 0.3166, "step": 16968, "teacher_loss": 0.2839508354663849 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.5439411401748657, "learning_rate": 1.57310527004068e-05, "loss": 0.2952, "step": 16969, "teacher_loss": 0.2675391435623169 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.3686427175998688, "learning_rate": 1.5728784141316487e-05, "loss": 0.2198, "step": 16970, "teacher_loss": 0.2032998651266098 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.3578403890132904, "learning_rate": 1.572651556551734e-05, "loss": 0.1955, "step": 16971, "teacher_loss": 0.17742550373077393 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.22191378474235535, "learning_rate": 1.5724246973061363e-05, "loss": 0.222, "step": 16972, "teacher_loss": 0.22204521298408508 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.18693555891513824, "learning_rate": 1.5721978364000577e-05, "loss": 0.1387, "step": 16973, "teacher_loss": 0.13335445523262024 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.5246123671531677, "learning_rate": 1.5719709738386995e-05, "loss": 0.2645, "step": 16974, "teacher_loss": 0.23556794226169586 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.778317391872406, "learning_rate": 1.5717441096272628e-05, "loss": 0.2085, "step": 16975, "teacher_loss": 0.1452065408229828 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.45208150148391724, "learning_rate": 1.571517243770948e-05, "loss": 0.3653, "step": 16976, "teacher_loss": 0.3556939363479614 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.3655804991722107, "learning_rate": 1.571290376274958e-05, "loss": 0.2238, "step": 16977, "teacher_loss": 0.2080153226852417 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.6207676529884338, "learning_rate": 1.571063507144493e-05, "loss": 0.3204, "step": 16978, "teacher_loss": 0.2869967222213745 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.357609361410141, "learning_rate": 1.5708366363847553e-05, "loss": 0.2033, "step": 16979, "teacher_loss": 0.18611447513103485 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.5792220830917358, "learning_rate": 1.5706097640009452e-05, "loss": 0.2432, "step": 16980, "teacher_loss": 0.2059001624584198 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.2850901782512665, "learning_rate": 1.5703828899982654e-05, "loss": 0.2178, "step": 16981, "teacher_loss": 0.21033713221549988 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.4918980896472931, "learning_rate": 1.5701560143819167e-05, "loss": 0.2231, "step": 16982, "teacher_loss": 0.1932334303855896 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.2205010950565338, "learning_rate": 1.5699291371571012e-05, "loss": 0.2158, "step": 16983, "teacher_loss": 0.21523047983646393 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.30096447467803955, "learning_rate": 1.5697022583290204e-05, "loss": 0.1976, "step": 16984, "teacher_loss": 0.18609781563282013 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.6012523174285889, "learning_rate": 1.5694753779028754e-05, "loss": 0.3176, "step": 16985, "teacher_loss": 0.28607243299484253 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.41279304027557373, "learning_rate": 1.5692484958838683e-05, "loss": 0.237, "step": 16986, "teacher_loss": 0.21743261814117432 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.16249369084835052, "learning_rate": 1.569021612277201e-05, "loss": 0.1375, "step": 16987, "teacher_loss": 0.13474775850772858 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 1.0769838094711304, "learning_rate": 1.5687947270880748e-05, "loss": 0.3405, "step": 16988, "teacher_loss": 0.258626252412796 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.4901203513145447, "learning_rate": 1.568567840321692e-05, "loss": 0.2161, "step": 16989, "teacher_loss": 0.18562708795070648 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.6647986769676208, "learning_rate": 1.568340951983254e-05, "loss": 0.2612, "step": 16990, "teacher_loss": 0.21638908982276917 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.3793599605560303, "learning_rate": 1.5681140620779633e-05, "loss": 0.2154, "step": 16991, "teacher_loss": 0.1972190886735916 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.3759060502052307, "learning_rate": 1.567887170611021e-05, "loss": 0.2502, "step": 16992, "teacher_loss": 0.23621763288974762 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.19875489175319672, "learning_rate": 1.567660277587629e-05, "loss": 0.2139, "step": 16993, "teacher_loss": 0.21553188562393188 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.5348937511444092, "learning_rate": 1.5674333830129907e-05, "loss": 0.2496, "step": 16994, "teacher_loss": 0.21786893904209137 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.30575063824653625, "learning_rate": 1.5672064868923064e-05, "loss": 0.1935, "step": 16995, "teacher_loss": 0.18104791641235352 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.3474254012107849, "learning_rate": 1.566979589230779e-05, "loss": 0.2716, "step": 16996, "teacher_loss": 0.26320019364356995 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.21591754257678986, "learning_rate": 1.5667526900336107e-05, "loss": 0.153, "step": 16997, "teacher_loss": 0.146036297082901 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.7553324699401855, "learning_rate": 1.5665257893060026e-05, "loss": 0.3688, "step": 16998, "teacher_loss": 0.32585006952285767 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.3161123991012573, "learning_rate": 1.5662988870531585e-05, "loss": 0.2839, "step": 16999, "teacher_loss": 0.28030043840408325 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.6127636432647705, "learning_rate": 1.5660719832802793e-05, "loss": 0.2526, "step": 17000, "teacher_loss": 0.21252989768981934 }, { "epoch": 3.07, "eval_exact_match": 79.84862819299906, "eval_f1": 87.49352348954031, "step": 17000 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.2902565002441406, "learning_rate": 1.5658450779925674e-05, "loss": 0.2521, "step": 17001, "teacher_loss": 0.24788770079612732 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.46691271662712097, "learning_rate": 1.5656181711952255e-05, "loss": 0.2204, "step": 17002, "teacher_loss": 0.19297060370445251 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.1869509518146515, "learning_rate": 1.565391262893456e-05, "loss": 0.2104, "step": 17003, "teacher_loss": 0.2129782885313034 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.23464754223823547, "learning_rate": 1.5651643530924604e-05, "loss": 0.2597, "step": 17004, "teacher_loss": 0.2625196874141693 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.523677408695221, "learning_rate": 1.5649374417974417e-05, "loss": 0.3193, "step": 17005, "teacher_loss": 0.2965613305568695 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.10576523840427399, "learning_rate": 1.564710529013602e-05, "loss": 0.1803, "step": 17006, "teacher_loss": 0.1885388195514679 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.21308773756027222, "learning_rate": 1.564483614746144e-05, "loss": 0.1942, "step": 17007, "teacher_loss": 0.19205442070960999 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.30874884128570557, "learning_rate": 1.5642566990002705e-05, "loss": 0.2286, "step": 17008, "teacher_loss": 0.21973197162151337 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.16348449885845184, "learning_rate": 1.564029781781183e-05, "loss": 0.2655, "step": 17009, "teacher_loss": 0.27687662839889526 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.36447077989578247, "learning_rate": 1.5638028630940844e-05, "loss": 0.1744, "step": 17010, "teacher_loss": 0.1532387137413025 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.46360915899276733, "learning_rate": 1.563575942944178e-05, "loss": 0.286, "step": 17011, "teacher_loss": 0.26626139879226685 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.32487574219703674, "learning_rate": 1.5633490213366655e-05, "loss": 0.2146, "step": 17012, "teacher_loss": 0.20231729745864868 }, { "compression_loss": 0.0, "epoch": 3.07, "label_loss": 0.44872552156448364, "learning_rate": 1.56312209827675e-05, "loss": 0.2162, "step": 17013, "teacher_loss": 0.19032371044158936 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.37449783086776733, "learning_rate": 1.5628951737696344e-05, "loss": 0.2208, "step": 17014, "teacher_loss": 0.2037086933851242 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.29759496450424194, "learning_rate": 1.5626682478205205e-05, "loss": 0.1974, "step": 17015, "teacher_loss": 0.18624627590179443 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.651835560798645, "learning_rate": 1.5624413204346114e-05, "loss": 0.2663, "step": 17016, "teacher_loss": 0.22347280383110046 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.6556954979896545, "learning_rate": 1.5622143916171105e-05, "loss": 0.2756, "step": 17017, "teacher_loss": 0.23331734538078308 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.8042777180671692, "learning_rate": 1.5619874613732198e-05, "loss": 0.3854, "step": 17018, "teacher_loss": 0.33889976143836975 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.20930439233779907, "learning_rate": 1.561760529708143e-05, "loss": 0.1795, "step": 17019, "teacher_loss": 0.17623579502105713 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 1.0444622039794922, "learning_rate": 1.561533596627082e-05, "loss": 0.3384, "step": 17020, "teacher_loss": 0.25996947288513184 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.4599539041519165, "learning_rate": 1.5613066621352397e-05, "loss": 0.2829, "step": 17021, "teacher_loss": 0.26327618956565857 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.32906782627105713, "learning_rate": 1.56107972623782e-05, "loss": 0.19, "step": 17022, "teacher_loss": 0.17449253797531128 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.8386813402175903, "learning_rate": 1.5608527889400253e-05, "loss": 0.3193, "step": 17023, "teacher_loss": 0.2616077661514282 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.4148062467575073, "learning_rate": 1.5606258502470585e-05, "loss": 0.2496, "step": 17024, "teacher_loss": 0.23129773139953613 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.33648091554641724, "learning_rate": 1.5603989101641228e-05, "loss": 0.252, "step": 17025, "teacher_loss": 0.24256299436092377 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.6624933481216431, "learning_rate": 1.560171968696421e-05, "loss": 0.2565, "step": 17026, "teacher_loss": 0.2113877832889557 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.26048383116722107, "learning_rate": 1.5599450258491567e-05, "loss": 0.2651, "step": 17027, "teacher_loss": 0.2656324803829193 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.5426404476165771, "learning_rate": 1.5597180816275323e-05, "loss": 0.3756, "step": 17028, "teacher_loss": 0.357077032327652 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.24016955494880676, "learning_rate": 1.5594911360367513e-05, "loss": 0.227, "step": 17029, "teacher_loss": 0.2255600243806839 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.7102572917938232, "learning_rate": 1.5592641890820176e-05, "loss": 0.2563, "step": 17030, "teacher_loss": 0.20590931177139282 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.2590523362159729, "learning_rate": 1.559037240768533e-05, "loss": 0.1834, "step": 17031, "teacher_loss": 0.17494651675224304 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.5802199840545654, "learning_rate": 1.5588102911015014e-05, "loss": 0.3337, "step": 17032, "teacher_loss": 0.30636170506477356 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.4627974033355713, "learning_rate": 1.558583340086127e-05, "loss": 0.197, "step": 17033, "teacher_loss": 0.16745418310165405 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.5122882723808289, "learning_rate": 1.5583563877276116e-05, "loss": 0.1902, "step": 17034, "teacher_loss": 0.15436051785945892 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.13830158114433289, "learning_rate": 1.558129434031159e-05, "loss": 0.1796, "step": 17035, "teacher_loss": 0.184239000082016 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.2738603353500366, "learning_rate": 1.557902479001973e-05, "loss": 0.161, "step": 17036, "teacher_loss": 0.1485133171081543 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.7040095329284668, "learning_rate": 1.5576755226452568e-05, "loss": 0.2813, "step": 17037, "teacher_loss": 0.23428919911384583 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.6015704274177551, "learning_rate": 1.5574485649662138e-05, "loss": 0.3524, "step": 17038, "teacher_loss": 0.32473304867744446 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.2029210329055786, "learning_rate": 1.5572216059700478e-05, "loss": 0.175, "step": 17039, "teacher_loss": 0.1718527227640152 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.7049412131309509, "learning_rate": 1.5569946456619613e-05, "loss": 0.4409, "step": 17040, "teacher_loss": 0.41151756048202515 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 1.1779247522354126, "learning_rate": 1.5567676840471587e-05, "loss": 0.3635, "step": 17041, "teacher_loss": 0.2730620205402374 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.5995557308197021, "learning_rate": 1.5565407211308436e-05, "loss": 0.2942, "step": 17042, "teacher_loss": 0.2602502703666687 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.17783525586128235, "learning_rate": 1.5563137569182184e-05, "loss": 0.1801, "step": 17043, "teacher_loss": 0.18038839101791382 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.39081358909606934, "learning_rate": 1.5560867914144887e-05, "loss": 0.2799, "step": 17044, "teacher_loss": 0.26760998368263245 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.14298290014266968, "learning_rate": 1.5558598246248563e-05, "loss": 0.1786, "step": 17045, "teacher_loss": 0.18253114819526672 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.5367259979248047, "learning_rate": 1.5556328565545256e-05, "loss": 0.2698, "step": 17046, "teacher_loss": 0.24019497632980347 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.40469419956207275, "learning_rate": 1.555405887208701e-05, "loss": 0.2318, "step": 17047, "teacher_loss": 0.2126368284225464 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.8280733227729797, "learning_rate": 1.5551789165925845e-05, "loss": 0.4051, "step": 17048, "teacher_loss": 0.35811880230903625 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.4851178526878357, "learning_rate": 1.5549519447113815e-05, "loss": 0.1964, "step": 17049, "teacher_loss": 0.16433373093605042 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.4729152023792267, "learning_rate": 1.5547249715702952e-05, "loss": 0.3231, "step": 17050, "teacher_loss": 0.30639877915382385 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.301393985748291, "learning_rate": 1.554497997174529e-05, "loss": 0.2618, "step": 17051, "teacher_loss": 0.2573610544204712 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.5312014222145081, "learning_rate": 1.5542710215292882e-05, "loss": 0.2637, "step": 17052, "teacher_loss": 0.2339445799589157 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.25262972712516785, "learning_rate": 1.5540440446397745e-05, "loss": 0.1765, "step": 17053, "teacher_loss": 0.16803491115570068 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.6839818954467773, "learning_rate": 1.553817066511193e-05, "loss": 0.4102, "step": 17054, "teacher_loss": 0.3798198699951172 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.266541987657547, "learning_rate": 1.553590087148748e-05, "loss": 0.2689, "step": 17055, "teacher_loss": 0.26913249492645264 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.5451951026916504, "learning_rate": 1.5533631065576425e-05, "loss": 0.2408, "step": 17056, "teacher_loss": 0.20697185397148132 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.46809470653533936, "learning_rate": 1.553136124743081e-05, "loss": 0.2042, "step": 17057, "teacher_loss": 0.17489968240261078 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.3487834930419922, "learning_rate": 1.5529091417102678e-05, "loss": 0.2456, "step": 17058, "teacher_loss": 0.23416444659233093 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.2697889804840088, "learning_rate": 1.552682157464406e-05, "loss": 0.1999, "step": 17059, "teacher_loss": 0.1921597272157669 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.35291755199432373, "learning_rate": 1.552455172010701e-05, "loss": 0.2293, "step": 17060, "teacher_loss": 0.2155713438987732 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.4749341607093811, "learning_rate": 1.5522281853543554e-05, "loss": 0.2689, "step": 17061, "teacher_loss": 0.24605943262577057 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.6117103695869446, "learning_rate": 1.552001197500575e-05, "loss": 0.2499, "step": 17062, "teacher_loss": 0.2096526026725769 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.42779114842414856, "learning_rate": 1.551774208454562e-05, "loss": 0.3988, "step": 17063, "teacher_loss": 0.395539253950119 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.23250603675842285, "learning_rate": 1.551547218221523e-05, "loss": 0.1492, "step": 17064, "teacher_loss": 0.1399080604314804 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.31456995010375977, "learning_rate": 1.5513202268066593e-05, "loss": 0.2433, "step": 17065, "teacher_loss": 0.2354004979133606 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.8242419958114624, "learning_rate": 1.5510932342151778e-05, "loss": 0.2947, "step": 17066, "teacher_loss": 0.2358134388923645 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.1256561428308487, "learning_rate": 1.5508662404522812e-05, "loss": 0.1372, "step": 17067, "teacher_loss": 0.13844355940818787 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 0.6293669939041138, "learning_rate": 1.5506392455231745e-05, "loss": 0.3395, "step": 17068, "teacher_loss": 0.3072940707206726 }, { "compression_loss": 0.0, "epoch": 3.08, "label_loss": 1.000198245048523, "learning_rate": 1.5504122494330613e-05, "loss": 0.2622, "step": 17069, "teacher_loss": 0.18025052547454834 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.2902330756187439, "learning_rate": 1.5501852521871463e-05, "loss": 0.1762, "step": 17070, "teacher_loss": 0.1634884625673294 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5651159286499023, "learning_rate": 1.5499582537906342e-05, "loss": 0.3239, "step": 17071, "teacher_loss": 0.29711204767227173 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.10558003187179565, "learning_rate": 1.5497312542487294e-05, "loss": 0.1529, "step": 17072, "teacher_loss": 0.15810570120811462 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.24813322722911835, "learning_rate": 1.549504253566635e-05, "loss": 0.163, "step": 17073, "teacher_loss": 0.15349268913269043 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.3207513391971588, "learning_rate": 1.5492772517495577e-05, "loss": 0.2443, "step": 17074, "teacher_loss": 0.2357589602470398 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.8022313714027405, "learning_rate": 1.5490502488027e-05, "loss": 0.3246, "step": 17075, "teacher_loss": 0.2715368866920471 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.33774176239967346, "learning_rate": 1.5488232447312674e-05, "loss": 0.1936, "step": 17076, "teacher_loss": 0.17758655548095703 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.4840281307697296, "learning_rate": 1.5485962395404646e-05, "loss": 0.2115, "step": 17077, "teacher_loss": 0.18116378784179688 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.6643401384353638, "learning_rate": 1.548369233235495e-05, "loss": 0.2726, "step": 17078, "teacher_loss": 0.2290726751089096 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.671335756778717, "learning_rate": 1.5481422258215643e-05, "loss": 0.2992, "step": 17079, "teacher_loss": 0.2578341066837311 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5443426370620728, "learning_rate": 1.5479152173038766e-05, "loss": 0.2436, "step": 17080, "teacher_loss": 0.21020770072937012 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5381845831871033, "learning_rate": 1.5476882076876366e-05, "loss": 0.292, "step": 17081, "teacher_loss": 0.2646610736846924 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.1711021065711975, "learning_rate": 1.5474611969780496e-05, "loss": 0.1375, "step": 17082, "teacher_loss": 0.13379865884780884 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.3942982852458954, "learning_rate": 1.5472341851803186e-05, "loss": 0.3023, "step": 17083, "teacher_loss": 0.29207664728164673 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5175685882568359, "learning_rate": 1.5470071722996502e-05, "loss": 0.2305, "step": 17084, "teacher_loss": 0.19856303930282593 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.3329129219055176, "learning_rate": 1.546780158341248e-05, "loss": 0.204, "step": 17085, "teacher_loss": 0.1896638572216034 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.45642080903053284, "learning_rate": 1.5465531433103166e-05, "loss": 0.224, "step": 17086, "teacher_loss": 0.19817125797271729 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5998003482818604, "learning_rate": 1.5463261272120615e-05, "loss": 0.2662, "step": 17087, "teacher_loss": 0.22911834716796875 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.34131187200546265, "learning_rate": 1.5460991100516873e-05, "loss": 0.193, "step": 17088, "teacher_loss": 0.1764889359474182 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.7740792632102966, "learning_rate": 1.5458720918343987e-05, "loss": 0.3803, "step": 17089, "teacher_loss": 0.33654695749282837 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.1339554786682129, "learning_rate": 1.5456450725654007e-05, "loss": 0.1879, "step": 17090, "teacher_loss": 0.19389373064041138 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.44405293464660645, "learning_rate": 1.5454180522498976e-05, "loss": 0.2867, "step": 17091, "teacher_loss": 0.2692013382911682 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5587217807769775, "learning_rate": 1.545191030893095e-05, "loss": 0.248, "step": 17092, "teacher_loss": 0.2134218066930771 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.4008667469024658, "learning_rate": 1.5449640085001977e-05, "loss": 0.1709, "step": 17093, "teacher_loss": 0.14538581669330597 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.288235068321228, "learning_rate": 1.5447369850764103e-05, "loss": 0.1963, "step": 17094, "teacher_loss": 0.18603640794754028 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.44260019063949585, "learning_rate": 1.5445099606269378e-05, "loss": 0.2125, "step": 17095, "teacher_loss": 0.1869882494211197 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.6100733280181885, "learning_rate": 1.5442829351569854e-05, "loss": 0.26, "step": 17096, "teacher_loss": 0.2211434245109558 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.7037139534950256, "learning_rate": 1.5440559086717583e-05, "loss": 0.4707, "step": 17097, "teacher_loss": 0.44484108686447144 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.32256880402565, "learning_rate": 1.543828881176461e-05, "loss": 0.1997, "step": 17098, "teacher_loss": 0.18600735068321228 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.6091248989105225, "learning_rate": 1.5436018526762992e-05, "loss": 0.2592, "step": 17099, "teacher_loss": 0.22032345831394196 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.8146679401397705, "learning_rate": 1.5433748231764774e-05, "loss": 0.2706, "step": 17100, "teacher_loss": 0.21019107103347778 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5218662023544312, "learning_rate": 1.543147792682201e-05, "loss": 0.1837, "step": 17101, "teacher_loss": 0.1460893601179123 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5056889057159424, "learning_rate": 1.5429207611986756e-05, "loss": 0.2189, "step": 17102, "teacher_loss": 0.18699193000793457 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.23382161557674408, "learning_rate": 1.542693728731105e-05, "loss": 0.289, "step": 17103, "teacher_loss": 0.2951442003250122 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5903704166412354, "learning_rate": 1.5424666952846958e-05, "loss": 0.4089, "step": 17104, "teacher_loss": 0.3887832462787628 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.6547254323959351, "learning_rate": 1.542239660864652e-05, "loss": 0.3367, "step": 17105, "teacher_loss": 0.30138659477233887 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.37489405274391174, "learning_rate": 1.5420126254761797e-05, "loss": 0.2052, "step": 17106, "teacher_loss": 0.18638189136981964 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5926855802536011, "learning_rate": 1.5417855891244845e-05, "loss": 0.2747, "step": 17107, "teacher_loss": 0.23941358923912048 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.1621163785457611, "learning_rate": 1.54155855181477e-05, "loss": 0.1996, "step": 17108, "teacher_loss": 0.2038199007511139 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.40665680170059204, "learning_rate": 1.5413315135522434e-05, "loss": 0.1943, "step": 17109, "teacher_loss": 0.17071747779846191 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.59168541431427, "learning_rate": 1.5411044743421084e-05, "loss": 0.2225, "step": 17110, "teacher_loss": 0.18144477903842926 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.30516937375068665, "learning_rate": 1.5408774341895714e-05, "loss": 0.1838, "step": 17111, "teacher_loss": 0.17028182744979858 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5123518705368042, "learning_rate": 1.5406503930998375e-05, "loss": 0.2533, "step": 17112, "teacher_loss": 0.2244957685470581 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.3951176404953003, "learning_rate": 1.5404233510781112e-05, "loss": 0.2398, "step": 17113, "teacher_loss": 0.22251909971237183 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.39007332921028137, "learning_rate": 1.5401963081295994e-05, "loss": 0.1807, "step": 17114, "teacher_loss": 0.1574709117412567 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.2659448981285095, "learning_rate": 1.539969264259507e-05, "loss": 0.2304, "step": 17115, "teacher_loss": 0.2264612317085266 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.4305671453475952, "learning_rate": 1.5397422194730384e-05, "loss": 0.2189, "step": 17116, "teacher_loss": 0.19543595612049103 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.6756407022476196, "learning_rate": 1.5395151737754002e-05, "loss": 0.2883, "step": 17117, "teacher_loss": 0.2452574074268341 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.17432333528995514, "learning_rate": 1.539288127171798e-05, "loss": 0.1692, "step": 17118, "teacher_loss": 0.16858944296836853 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5327430963516235, "learning_rate": 1.539061079667436e-05, "loss": 0.2406, "step": 17119, "teacher_loss": 0.20810110867023468 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.27331963181495667, "learning_rate": 1.5388340312675215e-05, "loss": 0.2146, "step": 17120, "teacher_loss": 0.20806702971458435 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.3281494081020355, "learning_rate": 1.538606981977258e-05, "loss": 0.2598, "step": 17121, "teacher_loss": 0.2522505819797516 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.8761147260665894, "learning_rate": 1.5383799318018527e-05, "loss": 0.2753, "step": 17122, "teacher_loss": 0.2085953652858734 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.4426186978816986, "learning_rate": 1.5381528807465113e-05, "loss": 0.2395, "step": 17123, "teacher_loss": 0.2169855237007141 }, { "compression_loss": 0.0, "epoch": 3.09, "label_loss": 0.5183864831924438, "learning_rate": 1.5379258288164375e-05, "loss": 0.5789, "step": 17124, "teacher_loss": 0.5856744050979614 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.5461440682411194, "learning_rate": 1.537698776016839e-05, "loss": 0.3671, "step": 17125, "teacher_loss": 0.34719714522361755 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.16817721724510193, "learning_rate": 1.5374717223529203e-05, "loss": 0.1235, "step": 17126, "teacher_loss": 0.118507519364357 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.16240057349205017, "learning_rate": 1.5372446678298876e-05, "loss": 0.1915, "step": 17127, "teacher_loss": 0.1947633922100067 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.07226384431123734, "learning_rate": 1.5370176124529457e-05, "loss": 0.1392, "step": 17128, "teacher_loss": 0.1465841829776764 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.3599405288696289, "learning_rate": 1.5367905562273016e-05, "loss": 0.2177, "step": 17129, "teacher_loss": 0.20191195607185364 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.44051820039749146, "learning_rate": 1.53656349915816e-05, "loss": 0.2503, "step": 17130, "teacher_loss": 0.22919318079948425 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.43584781885147095, "learning_rate": 1.5363364412507272e-05, "loss": 0.2541, "step": 17131, "teacher_loss": 0.23385834693908691 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.38961225748062134, "learning_rate": 1.5361093825102087e-05, "loss": 0.2901, "step": 17132, "teacher_loss": 0.2789991497993469 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.4471387267112732, "learning_rate": 1.53588232294181e-05, "loss": 0.2394, "step": 17133, "teacher_loss": 0.21637234091758728 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.4134955406188965, "learning_rate": 1.5356552625507377e-05, "loss": 0.2378, "step": 17134, "teacher_loss": 0.21828094124794006 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.15211328864097595, "learning_rate": 1.535428201342197e-05, "loss": 0.1786, "step": 17135, "teacher_loss": 0.18156926333904266 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.2574191987514496, "learning_rate": 1.5352011393213932e-05, "loss": 0.1849, "step": 17136, "teacher_loss": 0.1768171787261963 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.3325204849243164, "learning_rate": 1.5349740764935342e-05, "loss": 0.2647, "step": 17137, "teacher_loss": 0.2572069466114044 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.8556715250015259, "learning_rate": 1.5347470128638233e-05, "loss": 0.2253, "step": 17138, "teacher_loss": 0.15525224804878235 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.25372314453125, "learning_rate": 1.534519948437468e-05, "loss": 0.2383, "step": 17139, "teacher_loss": 0.23653483390808105 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.7254093885421753, "learning_rate": 1.534292883219674e-05, "loss": 0.3888, "step": 17140, "teacher_loss": 0.3513984680175781 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.3339449167251587, "learning_rate": 1.534065817215646e-05, "loss": 0.1849, "step": 17141, "teacher_loss": 0.16836772859096527 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.5454400181770325, "learning_rate": 1.5338387504305924e-05, "loss": 0.191, "step": 17142, "teacher_loss": 0.15158015489578247 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.20470893383026123, "learning_rate": 1.5336116828697165e-05, "loss": 0.2203, "step": 17143, "teacher_loss": 0.22203657031059265 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.4558107256889343, "learning_rate": 1.5333846145382256e-05, "loss": 0.2508, "step": 17144, "teacher_loss": 0.22805365920066833 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.2915193736553192, "learning_rate": 1.5331575454413263e-05, "loss": 0.2303, "step": 17145, "teacher_loss": 0.22344574332237244 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.32340073585510254, "learning_rate": 1.5329304755842237e-05, "loss": 0.2881, "step": 17146, "teacher_loss": 0.2841394543647766 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.13586458563804626, "learning_rate": 1.5327034049721238e-05, "loss": 0.1963, "step": 17147, "teacher_loss": 0.20306968688964844 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.3415514826774597, "learning_rate": 1.5324763336102333e-05, "loss": 0.1447, "step": 17148, "teacher_loss": 0.12282686680555344 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.49278968572616577, "learning_rate": 1.532249261503757e-05, "loss": 0.2198, "step": 17149, "teacher_loss": 0.18943192064762115 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.551611602306366, "learning_rate": 1.5320221886579022e-05, "loss": 0.4034, "step": 17150, "teacher_loss": 0.3869735598564148 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.29609113931655884, "learning_rate": 1.531795115077875e-05, "loss": 0.282, "step": 17151, "teacher_loss": 0.28047001361846924 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.5154891014099121, "learning_rate": 1.531568040768881e-05, "loss": 0.2672, "step": 17152, "teacher_loss": 0.23960211873054504 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.7545921802520752, "learning_rate": 1.5313409657361263e-05, "loss": 0.27, "step": 17153, "teacher_loss": 0.21612633764743805 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.32348132133483887, "learning_rate": 1.531113889984817e-05, "loss": 0.2136, "step": 17154, "teacher_loss": 0.20138849318027496 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.44227826595306396, "learning_rate": 1.5308868135201597e-05, "loss": 0.2403, "step": 17155, "teacher_loss": 0.21786090731620789 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.33001893758773804, "learning_rate": 1.5306597363473607e-05, "loss": 0.2259, "step": 17156, "teacher_loss": 0.21437734365463257 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.15579232573509216, "learning_rate": 1.5304326584716255e-05, "loss": 0.1375, "step": 17157, "teacher_loss": 0.13541460037231445 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.1577894389629364, "learning_rate": 1.5302055798981606e-05, "loss": 0.1866, "step": 17158, "teacher_loss": 0.18984350562095642 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.2677778899669647, "learning_rate": 1.5299785006321723e-05, "loss": 0.2198, "step": 17159, "teacher_loss": 0.21442213654518127 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.17450259625911713, "learning_rate": 1.5297514206788673e-05, "loss": 0.1593, "step": 17160, "teacher_loss": 0.15759287774562836 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.3964899778366089, "learning_rate": 1.529524340043451e-05, "loss": 0.2236, "step": 17161, "teacher_loss": 0.2043883502483368 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.41697049140930176, "learning_rate": 1.5292972587311305e-05, "loss": 0.2236, "step": 17162, "teacher_loss": 0.20210015773773193 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.35055863857269287, "learning_rate": 1.5290701767471114e-05, "loss": 0.2626, "step": 17163, "teacher_loss": 0.25284022092819214 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.4551340639591217, "learning_rate": 1.5288430940966e-05, "loss": 0.3041, "step": 17164, "teacher_loss": 0.28732168674468994 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.5325958728790283, "learning_rate": 1.5286160107848036e-05, "loss": 0.4214, "step": 17165, "teacher_loss": 0.4090902805328369 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.32873302698135376, "learning_rate": 1.5283889268169274e-05, "loss": 0.271, "step": 17166, "teacher_loss": 0.264535129070282 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.1491089165210724, "learning_rate": 1.5281618421981788e-05, "loss": 0.2093, "step": 17167, "teacher_loss": 0.21599061787128448 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.41085103154182434, "learning_rate": 1.5279347569337627e-05, "loss": 0.3831, "step": 17168, "teacher_loss": 0.38001060485839844 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.39177989959716797, "learning_rate": 1.5277076710288867e-05, "loss": 0.3015, "step": 17169, "teacher_loss": 0.2914811670780182 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.9250391125679016, "learning_rate": 1.5274805844887574e-05, "loss": 0.312, "step": 17170, "teacher_loss": 0.24393875896930695 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.36199951171875, "learning_rate": 1.5272534973185802e-05, "loss": 0.311, "step": 17171, "teacher_loss": 0.3053860068321228 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.24936191737651825, "learning_rate": 1.527026409523562e-05, "loss": 0.208, "step": 17172, "teacher_loss": 0.2034320831298828 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.2587032914161682, "learning_rate": 1.5267993211089095e-05, "loss": 0.1899, "step": 17173, "teacher_loss": 0.18229596316814423 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.17346785962581635, "learning_rate": 1.5265722320798284e-05, "loss": 0.1878, "step": 17174, "teacher_loss": 0.18940353393554688 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.5105569362640381, "learning_rate": 1.5263451424415267e-05, "loss": 0.2487, "step": 17175, "teacher_loss": 0.21955807507038116 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.20995180308818817, "learning_rate": 1.526118052199209e-05, "loss": 0.1509, "step": 17176, "teacher_loss": 0.14435049891471863 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.14361567795276642, "learning_rate": 1.525890961358083e-05, "loss": 0.2067, "step": 17177, "teacher_loss": 0.2136983871459961 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.6527017951011658, "learning_rate": 1.5256638699233548e-05, "loss": 0.3164, "step": 17178, "teacher_loss": 0.27902650833129883 }, { "compression_loss": 0.0, "epoch": 3.1, "label_loss": 0.31982672214508057, "learning_rate": 1.5254367779002309e-05, "loss": 0.2659, "step": 17179, "teacher_loss": 0.2599627375602722 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.5161340236663818, "learning_rate": 1.5252096852939181e-05, "loss": 0.2354, "step": 17180, "teacher_loss": 0.2042478621006012 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.46684837341308594, "learning_rate": 1.5249825921096228e-05, "loss": 0.2445, "step": 17181, "teacher_loss": 0.21979549527168274 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.6748241186141968, "learning_rate": 1.5247554983525516e-05, "loss": 0.2707, "step": 17182, "teacher_loss": 0.22574448585510254 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.7047570943832397, "learning_rate": 1.5245284040279111e-05, "loss": 0.3171, "step": 17183, "teacher_loss": 0.27403441071510315 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.378791868686676, "learning_rate": 1.5243013091409073e-05, "loss": 0.2907, "step": 17184, "teacher_loss": 0.28095754981040955 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.22697332501411438, "learning_rate": 1.5240742136967477e-05, "loss": 0.201, "step": 17185, "teacher_loss": 0.1981421411037445 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.4791354835033417, "learning_rate": 1.5238471177006388e-05, "loss": 0.2939, "step": 17186, "teacher_loss": 0.27332431077957153 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.34185707569122314, "learning_rate": 1.5236200211577868e-05, "loss": 0.2217, "step": 17187, "teacher_loss": 0.20837756991386414 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.6027224659919739, "learning_rate": 1.5233929240733981e-05, "loss": 0.2758, "step": 17188, "teacher_loss": 0.23944266140460968 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.49354612827301025, "learning_rate": 1.5231658264526806e-05, "loss": 0.2485, "step": 17189, "teacher_loss": 0.22126665711402893 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.3478447198867798, "learning_rate": 1.5229387283008398e-05, "loss": 0.1734, "step": 17190, "teacher_loss": 0.15400651097297668 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.3695114850997925, "learning_rate": 1.5227116296230824e-05, "loss": 0.3932, "step": 17191, "teacher_loss": 0.39582574367523193 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.20844215154647827, "learning_rate": 1.5224845304246163e-05, "loss": 0.1798, "step": 17192, "teacher_loss": 0.17656593024730682 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.8951176404953003, "learning_rate": 1.5222574307106463e-05, "loss": 0.2144, "step": 17193, "teacher_loss": 0.1387416124343872 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.7001396417617798, "learning_rate": 1.5220303304863807e-05, "loss": 0.2505, "step": 17194, "teacher_loss": 0.20050784945487976 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.45884379744529724, "learning_rate": 1.5218032297570255e-05, "loss": 0.1982, "step": 17195, "teacher_loss": 0.16922956705093384 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.5640110373497009, "learning_rate": 1.5215761285277874e-05, "loss": 0.2644, "step": 17196, "teacher_loss": 0.23106548190116882 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.1853107511997223, "learning_rate": 1.5213490268038736e-05, "loss": 0.1897, "step": 17197, "teacher_loss": 0.19020459055900574 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.3523780107498169, "learning_rate": 1.5211219245904908e-05, "loss": 0.2902, "step": 17198, "teacher_loss": 0.2832720875740051 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.7154030799865723, "learning_rate": 1.5208948218928453e-05, "loss": 0.266, "step": 17199, "teacher_loss": 0.21611285209655762 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.688982367515564, "learning_rate": 1.5206677187161447e-05, "loss": 0.3168, "step": 17200, "teacher_loss": 0.27550023794174194 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.31435054540634155, "learning_rate": 1.520440615065595e-05, "loss": 0.1772, "step": 17201, "teacher_loss": 0.16199825704097748 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.3338039815425873, "learning_rate": 1.5202135109464032e-05, "loss": 0.1847, "step": 17202, "teacher_loss": 0.168122798204422 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.17167316377162933, "learning_rate": 1.5199864063637761e-05, "loss": 0.1637, "step": 17203, "teacher_loss": 0.16282892227172852 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.28445351123809814, "learning_rate": 1.5197593013229204e-05, "loss": 0.1555, "step": 17204, "teacher_loss": 0.14117391407489777 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.16458380222320557, "learning_rate": 1.519532195829044e-05, "loss": 0.1644, "step": 17205, "teacher_loss": 0.16439564526081085 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.3590165376663208, "learning_rate": 1.5193050898873522e-05, "loss": 0.2734, "step": 17206, "teacher_loss": 0.2638833522796631 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.6366062164306641, "learning_rate": 1.5190779835030527e-05, "loss": 0.2074, "step": 17207, "teacher_loss": 0.15966284275054932 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.8787311315536499, "learning_rate": 1.5188508766813525e-05, "loss": 0.3406, "step": 17208, "teacher_loss": 0.2808261513710022 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 1.0711147785186768, "learning_rate": 1.5186237694274577e-05, "loss": 0.3659, "step": 17209, "teacher_loss": 0.28753867745399475 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.3061642646789551, "learning_rate": 1.518396661746576e-05, "loss": 0.21, "step": 17210, "teacher_loss": 0.19927716255187988 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.4438420534133911, "learning_rate": 1.518169553643914e-05, "loss": 0.2197, "step": 17211, "teacher_loss": 0.19478917121887207 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.3180913031101227, "learning_rate": 1.5179424451246787e-05, "loss": 0.2041, "step": 17212, "teacher_loss": 0.19147509336471558 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.1515028029680252, "learning_rate": 1.517715336194077e-05, "loss": 0.1455, "step": 17213, "teacher_loss": 0.14481839537620544 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.12458354979753494, "learning_rate": 1.5174882268573153e-05, "loss": 0.1771, "step": 17214, "teacher_loss": 0.1829504519701004 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.2823290228843689, "learning_rate": 1.5172611171196014e-05, "loss": 0.1686, "step": 17215, "teacher_loss": 0.15591681003570557 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.28266263008117676, "learning_rate": 1.517034006986142e-05, "loss": 0.3185, "step": 17216, "teacher_loss": 0.3224581480026245 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.370505154132843, "learning_rate": 1.5168068964621436e-05, "loss": 0.2427, "step": 17217, "teacher_loss": 0.22850686311721802 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.1539417803287506, "learning_rate": 1.516579785552813e-05, "loss": 0.1598, "step": 17218, "teacher_loss": 0.16047051548957825 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.24063509702682495, "learning_rate": 1.5163526742633583e-05, "loss": 0.2104, "step": 17219, "teacher_loss": 0.20701248943805695 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.3107471168041229, "learning_rate": 1.5161255625989859e-05, "loss": 0.229, "step": 17220, "teacher_loss": 0.21993786096572876 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.4418913424015045, "learning_rate": 1.5158984505649021e-05, "loss": 0.2695, "step": 17221, "teacher_loss": 0.25036370754241943 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.4919448792934418, "learning_rate": 1.5156713381663148e-05, "loss": 0.3191, "step": 17222, "teacher_loss": 0.29993951320648193 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.4021309018135071, "learning_rate": 1.5154442254084307e-05, "loss": 0.233, "step": 17223, "teacher_loss": 0.2142338901758194 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.35398128628730774, "learning_rate": 1.5152171122964569e-05, "loss": 0.2655, "step": 17224, "teacher_loss": 0.255683958530426 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.6045404076576233, "learning_rate": 1.5149899988356005e-05, "loss": 0.2901, "step": 17225, "teacher_loss": 0.2551615536212921 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.12888306379318237, "learning_rate": 1.5147628850310675e-05, "loss": 0.1842, "step": 17226, "teacher_loss": 0.19036611914634705 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.23146682977676392, "learning_rate": 1.5145357708880667e-05, "loss": 0.2177, "step": 17227, "teacher_loss": 0.21614739298820496 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.954110860824585, "learning_rate": 1.5143086564118042e-05, "loss": 0.2696, "step": 17228, "teacher_loss": 0.19348779320716858 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.19229744374752045, "learning_rate": 1.5140815416074864e-05, "loss": 0.1845, "step": 17229, "teacher_loss": 0.1836622804403305 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.5042635202407837, "learning_rate": 1.513854426480322e-05, "loss": 0.3646, "step": 17230, "teacher_loss": 0.3491074740886688 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.6336063742637634, "learning_rate": 1.5136273110355166e-05, "loss": 0.2297, "step": 17231, "teacher_loss": 0.18479332327842712 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.4356980621814728, "learning_rate": 1.5134001952782778e-05, "loss": 0.2233, "step": 17232, "teacher_loss": 0.19973576068878174 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.6457315683364868, "learning_rate": 1.513173079213813e-05, "loss": 0.2235, "step": 17233, "teacher_loss": 0.17659424245357513 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.6848845481872559, "learning_rate": 1.5129459628473284e-05, "loss": 0.3156, "step": 17234, "teacher_loss": 0.2745181918144226 }, { "compression_loss": 0.0, "epoch": 3.11, "label_loss": 0.20801648497581482, "learning_rate": 1.5127188461840327e-05, "loss": 0.1317, "step": 17235, "teacher_loss": 0.12322592735290527 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.5595203042030334, "learning_rate": 1.5124917292291312e-05, "loss": 0.4279, "step": 17236, "teacher_loss": 0.41329264640808105 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.6127294301986694, "learning_rate": 1.512264611987832e-05, "loss": 0.2432, "step": 17237, "teacher_loss": 0.20211821794509888 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.17504458129405975, "learning_rate": 1.5120374944653419e-05, "loss": 0.1633, "step": 17238, "teacher_loss": 0.1620059609413147 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.2954214811325073, "learning_rate": 1.5118103766668682e-05, "loss": 0.2111, "step": 17239, "teacher_loss": 0.20170819759368896 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.35241201519966125, "learning_rate": 1.5115832585976181e-05, "loss": 0.187, "step": 17240, "teacher_loss": 0.16860079765319824 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.3200010359287262, "learning_rate": 1.5113561402627983e-05, "loss": 0.1986, "step": 17241, "teacher_loss": 0.18508964776992798 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.44506824016571045, "learning_rate": 1.5111290216676162e-05, "loss": 0.2359, "step": 17242, "teacher_loss": 0.2126464992761612 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.4178299605846405, "learning_rate": 1.5109019028172796e-05, "loss": 0.2529, "step": 17243, "teacher_loss": 0.2345495969057083 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.5821305513381958, "learning_rate": 1.5106747837169949e-05, "loss": 0.2204, "step": 17244, "teacher_loss": 0.18020527064800262 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.5139521360397339, "learning_rate": 1.5104476643719692e-05, "loss": 0.2246, "step": 17245, "teacher_loss": 0.19250378012657166 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.48922663927078247, "learning_rate": 1.5102205447874098e-05, "loss": 0.2575, "step": 17246, "teacher_loss": 0.23173211514949799 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.2536476254463196, "learning_rate": 1.509993424968524e-05, "loss": 0.2202, "step": 17247, "teacher_loss": 0.2164355218410492 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.34476983547210693, "learning_rate": 1.509766304920519e-05, "loss": 0.1837, "step": 17248, "teacher_loss": 0.16580624878406525 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.7201471328735352, "learning_rate": 1.5095391846486018e-05, "loss": 0.243, "step": 17249, "teacher_loss": 0.1899665743112564 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.3451116681098938, "learning_rate": 1.50931206415798e-05, "loss": 0.1725, "step": 17250, "teacher_loss": 0.15337088704109192 }, { "epoch": 3.12, "eval_exact_match": 79.73509933774834, "eval_f1": 87.31426402735542, "step": 17250 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.32742929458618164, "learning_rate": 1.5090849434538602e-05, "loss": 0.2051, "step": 17251, "teacher_loss": 0.19149544835090637 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.2632191479206085, "learning_rate": 1.50885782254145e-05, "loss": 0.1434, "step": 17252, "teacher_loss": 0.13006706535816193 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.7957450747489929, "learning_rate": 1.5086307014259567e-05, "loss": 0.3074, "step": 17253, "teacher_loss": 0.2530926465988159 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.400049090385437, "learning_rate": 1.5084035801125865e-05, "loss": 0.2307, "step": 17254, "teacher_loss": 0.21188125014305115 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.24235990643501282, "learning_rate": 1.5081764586065484e-05, "loss": 0.2006, "step": 17255, "teacher_loss": 0.19593161344528198 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.7102748155593872, "learning_rate": 1.5079493369130481e-05, "loss": 0.2861, "step": 17256, "teacher_loss": 0.23894359171390533 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.6256086826324463, "learning_rate": 1.5077222150372933e-05, "loss": 0.1996, "step": 17257, "teacher_loss": 0.15222826600074768 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.17724823951721191, "learning_rate": 1.5074950929844916e-05, "loss": 0.1784, "step": 17258, "teacher_loss": 0.17855240404605865 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.20423980057239532, "learning_rate": 1.5072679707598495e-05, "loss": 0.175, "step": 17259, "teacher_loss": 0.17169588804244995 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.7947405576705933, "learning_rate": 1.507040848368575e-05, "loss": 0.2721, "step": 17260, "teacher_loss": 0.2140299677848816 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.394207239151001, "learning_rate": 1.5068137258158746e-05, "loss": 0.2206, "step": 17261, "teacher_loss": 0.20128047466278076 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.17013733088970184, "learning_rate": 1.5065866031069561e-05, "loss": 0.2894, "step": 17262, "teacher_loss": 0.3026968538761139 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.5336381196975708, "learning_rate": 1.5063594802470264e-05, "loss": 0.2127, "step": 17263, "teacher_loss": 0.1770850121974945 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.33640623092651367, "learning_rate": 1.5061323572412927e-05, "loss": 0.2624, "step": 17264, "teacher_loss": 0.25413405895233154 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.6635050773620605, "learning_rate": 1.5059052340949631e-05, "loss": 0.2783, "step": 17265, "teacher_loss": 0.23552128672599792 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.27809059619903564, "learning_rate": 1.5056781108132435e-05, "loss": 0.2337, "step": 17266, "teacher_loss": 0.22874122858047485 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.7809144258499146, "learning_rate": 1.5054509874013421e-05, "loss": 0.3121, "step": 17267, "teacher_loss": 0.26003915071487427 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.1828133463859558, "learning_rate": 1.5052238638644663e-05, "loss": 0.1581, "step": 17268, "teacher_loss": 0.1553703397512436 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.2469458281993866, "learning_rate": 1.5049967402078224e-05, "loss": 0.187, "step": 17269, "teacher_loss": 0.18037953972816467 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.5360699892044067, "learning_rate": 1.5047696164366183e-05, "loss": 0.2615, "step": 17270, "teacher_loss": 0.2310405969619751 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.5356260538101196, "learning_rate": 1.5045424925560613e-05, "loss": 0.3302, "step": 17271, "teacher_loss": 0.3074026107788086 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.2949972152709961, "learning_rate": 1.5043153685713584e-05, "loss": 0.2089, "step": 17272, "teacher_loss": 0.19932758808135986 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.3311798572540283, "learning_rate": 1.5040882444877173e-05, "loss": 0.2475, "step": 17273, "teacher_loss": 0.23816128075122833 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.8101736307144165, "learning_rate": 1.5038611203103451e-05, "loss": 0.3525, "step": 17274, "teacher_loss": 0.3016057014465332 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.3346106708049774, "learning_rate": 1.5036339960444488e-05, "loss": 0.2306, "step": 17275, "teacher_loss": 0.21909326314926147 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.6711146235466003, "learning_rate": 1.5034068716952361e-05, "loss": 0.4125, "step": 17276, "teacher_loss": 0.3837779760360718 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.3212396502494812, "learning_rate": 1.5031797472679136e-05, "loss": 0.1883, "step": 17277, "teacher_loss": 0.17357513308525085 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.49728041887283325, "learning_rate": 1.5029526227676891e-05, "loss": 0.248, "step": 17278, "teacher_loss": 0.2202717512845993 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.21826279163360596, "learning_rate": 1.50272549819977e-05, "loss": 0.2022, "step": 17279, "teacher_loss": 0.20043213665485382 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.4382314085960388, "learning_rate": 1.5024983735693635e-05, "loss": 0.2304, "step": 17280, "teacher_loss": 0.20733627676963806 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.3459300398826599, "learning_rate": 1.5022712488816767e-05, "loss": 0.2495, "step": 17281, "teacher_loss": 0.23879435658454895 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.33973464369773865, "learning_rate": 1.5020441241419167e-05, "loss": 0.224, "step": 17282, "teacher_loss": 0.2111472189426422 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.46012091636657715, "learning_rate": 1.5018169993552916e-05, "loss": 0.2198, "step": 17283, "teacher_loss": 0.19307363033294678 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.6901553273200989, "learning_rate": 1.5015898745270074e-05, "loss": 0.3467, "step": 17284, "teacher_loss": 0.3085027039051056 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.20587536692619324, "learning_rate": 1.5013627496622732e-05, "loss": 0.24, "step": 17285, "teacher_loss": 0.2437414526939392 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.6419857740402222, "learning_rate": 1.5011356247662942e-05, "loss": 0.2972, "step": 17286, "teacher_loss": 0.2588920295238495 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.28395235538482666, "learning_rate": 1.5009084998442793e-05, "loss": 0.183, "step": 17287, "teacher_loss": 0.1718360185623169 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.3092729449272156, "learning_rate": 1.5006813749014353e-05, "loss": 0.2128, "step": 17288, "teacher_loss": 0.2020946741104126 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.3032781183719635, "learning_rate": 1.5004542499429686e-05, "loss": 0.2324, "step": 17289, "teacher_loss": 0.224469393491745 }, { "compression_loss": 0.0, "epoch": 3.12, "label_loss": 0.8242799639701843, "learning_rate": 1.5002271249740886e-05, "loss": 0.2882, "step": 17290, "teacher_loss": 0.22858509421348572 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.3714783787727356, "learning_rate": 1.5e-05, "loss": 0.2344, "step": 17291, "teacher_loss": 0.2191598117351532 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.3011588454246521, "learning_rate": 1.4997728750259122e-05, "loss": 0.2366, "step": 17292, "teacher_loss": 0.22947613894939423 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.5027822256088257, "learning_rate": 1.4995457500570312e-05, "loss": 0.3898, "step": 17293, "teacher_loss": 0.3772868812084198 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.26266777515411377, "learning_rate": 1.4993186250985651e-05, "loss": 0.2882, "step": 17294, "teacher_loss": 0.29101717472076416 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.4714064598083496, "learning_rate": 1.4990915001557211e-05, "loss": 0.2854, "step": 17295, "teacher_loss": 0.26474443078041077 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.8741016387939453, "learning_rate": 1.4988643752337058e-05, "loss": 0.3245, "step": 17296, "teacher_loss": 0.26347556710243225 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.24074003100395203, "learning_rate": 1.4986372503377272e-05, "loss": 0.1833, "step": 17297, "teacher_loss": 0.17694228887557983 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.45717424154281616, "learning_rate": 1.4984101254729928e-05, "loss": 0.3024, "step": 17298, "teacher_loss": 0.28524285554885864 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.14835584163665771, "learning_rate": 1.4981830006447084e-05, "loss": 0.2015, "step": 17299, "teacher_loss": 0.20742233097553253 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 1.518599510192871, "learning_rate": 1.4979558758580835e-05, "loss": 0.3251, "step": 17300, "teacher_loss": 0.19244027137756348 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.5807650089263916, "learning_rate": 1.4977287511183238e-05, "loss": 0.247, "step": 17301, "teacher_loss": 0.20989114046096802 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.7127798795700073, "learning_rate": 1.497501626430637e-05, "loss": 0.3071, "step": 17302, "teacher_loss": 0.2620091438293457 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.23384368419647217, "learning_rate": 1.49727450180023e-05, "loss": 0.1886, "step": 17303, "teacher_loss": 0.1836071014404297 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.3913388252258301, "learning_rate": 1.497047377232311e-05, "loss": 0.2028, "step": 17304, "teacher_loss": 0.18187090754508972 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.3458684980869293, "learning_rate": 1.4968202527320868e-05, "loss": 0.198, "step": 17305, "teacher_loss": 0.18154026567935944 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.5905870795249939, "learning_rate": 1.4965931283047643e-05, "loss": 0.2405, "step": 17306, "teacher_loss": 0.20164187252521515 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.4692143201828003, "learning_rate": 1.4963660039555515e-05, "loss": 0.2561, "step": 17307, "teacher_loss": 0.23243489861488342 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.5120751857757568, "learning_rate": 1.4961388796896556e-05, "loss": 0.2353, "step": 17308, "teacher_loss": 0.20450431108474731 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.5066673755645752, "learning_rate": 1.4959117555122826e-05, "loss": 0.3804, "step": 17309, "teacher_loss": 0.36641961336135864 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.6234026551246643, "learning_rate": 1.4956846314286418e-05, "loss": 0.3004, "step": 17310, "teacher_loss": 0.2645578980445862 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.6348604559898376, "learning_rate": 1.4954575074439392e-05, "loss": 0.2721, "step": 17311, "teacher_loss": 0.23182141780853271 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.38520652055740356, "learning_rate": 1.4952303835633817e-05, "loss": 0.2117, "step": 17312, "teacher_loss": 0.19240404665470123 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.30941373109817505, "learning_rate": 1.4950032597921779e-05, "loss": 0.1894, "step": 17313, "teacher_loss": 0.1760375052690506 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.5903949737548828, "learning_rate": 1.4947761361355344e-05, "loss": 0.2302, "step": 17314, "teacher_loss": 0.19021350145339966 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.4451650381088257, "learning_rate": 1.494549012598658e-05, "loss": 0.2436, "step": 17315, "teacher_loss": 0.2211552858352661 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.3629075586795807, "learning_rate": 1.4943218891867567e-05, "loss": 0.2752, "step": 17316, "teacher_loss": 0.26550936698913574 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.8448565006256104, "learning_rate": 1.4940947659050374e-05, "loss": 0.305, "step": 17317, "teacher_loss": 0.2449950873851776 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.7690628170967102, "learning_rate": 1.4938676427587073e-05, "loss": 0.3355, "step": 17318, "teacher_loss": 0.2872878909111023 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 1.2261710166931152, "learning_rate": 1.4936405197529739e-05, "loss": 0.3367, "step": 17319, "teacher_loss": 0.23790243268013 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.22403885424137115, "learning_rate": 1.4934133968930441e-05, "loss": 0.1844, "step": 17320, "teacher_loss": 0.1800086498260498 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.17598478496074677, "learning_rate": 1.4931862741841253e-05, "loss": 0.1978, "step": 17321, "teacher_loss": 0.20019902288913727 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.6885719895362854, "learning_rate": 1.4929591516314251e-05, "loss": 0.256, "step": 17322, "teacher_loss": 0.20796188712120056 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.22574710845947266, "learning_rate": 1.492732029240151e-05, "loss": 0.1682, "step": 17323, "teacher_loss": 0.16186118125915527 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.18088002502918243, "learning_rate": 1.4925049070155091e-05, "loss": 0.167, "step": 17324, "teacher_loss": 0.1654605120420456 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.34204718470573425, "learning_rate": 1.4922777849627068e-05, "loss": 0.1736, "step": 17325, "teacher_loss": 0.15493807196617126 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.6257104277610779, "learning_rate": 1.4920506630869523e-05, "loss": 0.2052, "step": 17326, "teacher_loss": 0.15847787261009216 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.31872308254241943, "learning_rate": 1.491823541393452e-05, "loss": 0.2294, "step": 17327, "teacher_loss": 0.21951383352279663 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.5625432729721069, "learning_rate": 1.4915964198874133e-05, "loss": 0.3538, "step": 17328, "teacher_loss": 0.3305727243423462 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.6109493970870972, "learning_rate": 1.4913692985740438e-05, "loss": 0.2188, "step": 17329, "teacher_loss": 0.17526112496852875 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.8130006790161133, "learning_rate": 1.4911421774585503e-05, "loss": 0.3233, "step": 17330, "teacher_loss": 0.2689291536808014 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.2950381338596344, "learning_rate": 1.4909150565461397e-05, "loss": 0.1835, "step": 17331, "teacher_loss": 0.17115579545497894 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.2552592158317566, "learning_rate": 1.4906879358420201e-05, "loss": 0.1449, "step": 17332, "teacher_loss": 0.13260522484779358 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.9623233079910278, "learning_rate": 1.4904608153513986e-05, "loss": 0.3492, "step": 17333, "teacher_loss": 0.28105485439300537 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.12002383917570114, "learning_rate": 1.4902336950794808e-05, "loss": 0.1455, "step": 17334, "teacher_loss": 0.1483183205127716 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.32650214433670044, "learning_rate": 1.4900065750314762e-05, "loss": 0.2309, "step": 17335, "teacher_loss": 0.22024744749069214 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 1.1242852210998535, "learning_rate": 1.4897794552125906e-05, "loss": 0.5022, "step": 17336, "teacher_loss": 0.43306225538253784 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.3050174415111542, "learning_rate": 1.4895523356280312e-05, "loss": 0.1474, "step": 17337, "teacher_loss": 0.12990710139274597 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.1475551426410675, "learning_rate": 1.4893252162830055e-05, "loss": 0.236, "step": 17338, "teacher_loss": 0.24583858251571655 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.25043943524360657, "learning_rate": 1.4890980971827206e-05, "loss": 0.218, "step": 17339, "teacher_loss": 0.2144230157136917 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.6438193321228027, "learning_rate": 1.4888709783323835e-05, "loss": 0.4636, "step": 17340, "teacher_loss": 0.4435848295688629 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.544432520866394, "learning_rate": 1.4886438597372018e-05, "loss": 0.2158, "step": 17341, "teacher_loss": 0.17924290895462036 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.35039812326431274, "learning_rate": 1.4884167414023823e-05, "loss": 0.2581, "step": 17342, "teacher_loss": 0.24789577722549438 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.42001771926879883, "learning_rate": 1.4881896233331319e-05, "loss": 0.2221, "step": 17343, "teacher_loss": 0.20007112622261047 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.6026032567024231, "learning_rate": 1.4879625055346584e-05, "loss": 0.2893, "step": 17344, "teacher_loss": 0.2545427083969116 }, { "compression_loss": 0.0, "epoch": 3.13, "label_loss": 0.5349169969558716, "learning_rate": 1.4877353880121684e-05, "loss": 0.3016, "step": 17345, "teacher_loss": 0.27568987011909485 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.6087794303894043, "learning_rate": 1.4875082707708696e-05, "loss": 0.2252, "step": 17346, "teacher_loss": 0.18255363404750824 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.7299664616584778, "learning_rate": 1.4872811538159675e-05, "loss": 0.2888, "step": 17347, "teacher_loss": 0.2397913634777069 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.5684302449226379, "learning_rate": 1.4870540371526718e-05, "loss": 0.2399, "step": 17348, "teacher_loss": 0.2033928781747818 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.14928099513053894, "learning_rate": 1.4868269207861877e-05, "loss": 0.1681, "step": 17349, "teacher_loss": 0.17017295956611633 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.323599249124527, "learning_rate": 1.4865998047217222e-05, "loss": 0.2584, "step": 17350, "teacher_loss": 0.2511252164840698 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.2226908951997757, "learning_rate": 1.4863726889644838e-05, "loss": 0.1554, "step": 17351, "teacher_loss": 0.14787116646766663 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.27588048577308655, "learning_rate": 1.4861455735196787e-05, "loss": 0.1827, "step": 17352, "teacher_loss": 0.17239932715892792 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.22517824172973633, "learning_rate": 1.4859184583925133e-05, "loss": 0.1901, "step": 17353, "teacher_loss": 0.18622267246246338 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.36028891801834106, "learning_rate": 1.4856913435881963e-05, "loss": 0.2088, "step": 17354, "teacher_loss": 0.19196957349777222 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.608284056186676, "learning_rate": 1.4854642291119335e-05, "loss": 0.2621, "step": 17355, "teacher_loss": 0.22361181676387787 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.3511553406715393, "learning_rate": 1.4852371149689322e-05, "loss": 0.2394, "step": 17356, "teacher_loss": 0.22698359191417694 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 1.0278294086456299, "learning_rate": 1.4850100011644e-05, "loss": 0.3565, "step": 17357, "teacher_loss": 0.28185874223709106 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.33907994627952576, "learning_rate": 1.4847828877035437e-05, "loss": 0.2345, "step": 17358, "teacher_loss": 0.2228931188583374 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.9033491611480713, "learning_rate": 1.4845557745915692e-05, "loss": 0.3607, "step": 17359, "teacher_loss": 0.30040591955184937 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.7437979578971863, "learning_rate": 1.4843286618336856e-05, "loss": 0.2709, "step": 17360, "teacher_loss": 0.21839208900928497 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.546278715133667, "learning_rate": 1.4841015494350983e-05, "loss": 0.2817, "step": 17361, "teacher_loss": 0.2523575723171234 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.4615083336830139, "learning_rate": 1.4838744374010142e-05, "loss": 0.4003, "step": 17362, "teacher_loss": 0.393510103225708 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 1.232541799545288, "learning_rate": 1.483647325736642e-05, "loss": 0.4144, "step": 17363, "teacher_loss": 0.32349854707717896 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.4634174406528473, "learning_rate": 1.4834202144471872e-05, "loss": 0.2168, "step": 17364, "teacher_loss": 0.18938776850700378 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.5026025176048279, "learning_rate": 1.483193103537857e-05, "loss": 0.2513, "step": 17365, "teacher_loss": 0.22332298755645752 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.6822613477706909, "learning_rate": 1.4829659930138585e-05, "loss": 0.2522, "step": 17366, "teacher_loss": 0.20437918603420258 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.10683953762054443, "learning_rate": 1.4827388828803989e-05, "loss": 0.1552, "step": 17367, "teacher_loss": 0.16059842705726624 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.209193617105484, "learning_rate": 1.482511773142685e-05, "loss": 0.1378, "step": 17368, "teacher_loss": 0.1298552006483078 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.5235681533813477, "learning_rate": 1.4822846638059234e-05, "loss": 0.2145, "step": 17369, "teacher_loss": 0.18014344573020935 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.45085757970809937, "learning_rate": 1.4820575548753215e-05, "loss": 0.28, "step": 17370, "teacher_loss": 0.26097390055656433 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.24813106656074524, "learning_rate": 1.4818304463560865e-05, "loss": 0.1768, "step": 17371, "teacher_loss": 0.16886979341506958 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.3081740140914917, "learning_rate": 1.481603338253424e-05, "loss": 0.1931, "step": 17372, "teacher_loss": 0.18032169342041016 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.6924455165863037, "learning_rate": 1.4813762305725426e-05, "loss": 0.2884, "step": 17373, "teacher_loss": 0.24348080158233643 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.8001669049263, "learning_rate": 1.4811491233186482e-05, "loss": 0.3038, "step": 17374, "teacher_loss": 0.24869704246520996 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 1.205256700515747, "learning_rate": 1.4809220164969475e-05, "loss": 0.3118, "step": 17375, "teacher_loss": 0.21257489919662476 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.14333710074424744, "learning_rate": 1.4806949101126482e-05, "loss": 0.2143, "step": 17376, "teacher_loss": 0.222197026014328 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.10029410570859909, "learning_rate": 1.4804678041709566e-05, "loss": 0.1404, "step": 17377, "teacher_loss": 0.14483875036239624 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.16031135618686676, "learning_rate": 1.4802406986770795e-05, "loss": 0.1612, "step": 17378, "teacher_loss": 0.1613508015871048 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.12132205069065094, "learning_rate": 1.4800135936362243e-05, "loss": 0.1602, "step": 17379, "teacher_loss": 0.16454055905342102 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.4132627248764038, "learning_rate": 1.4797864890535973e-05, "loss": 0.4087, "step": 17380, "teacher_loss": 0.4081794023513794 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.9257434010505676, "learning_rate": 1.4795593849344052e-05, "loss": 0.2569, "step": 17381, "teacher_loss": 0.18255430459976196 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.18237711489200592, "learning_rate": 1.4793322812838555e-05, "loss": 0.2015, "step": 17382, "teacher_loss": 0.20359155535697937 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.2116706669330597, "learning_rate": 1.479105178107155e-05, "loss": 0.2096, "step": 17383, "teacher_loss": 0.20936527848243713 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.41488876938819885, "learning_rate": 1.478878075409509e-05, "loss": 0.3753, "step": 17384, "teacher_loss": 0.3709355294704437 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.6006801724433899, "learning_rate": 1.4786509731961264e-05, "loss": 0.3384, "step": 17385, "teacher_loss": 0.3093082010746002 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.6671277284622192, "learning_rate": 1.4784238714722129e-05, "loss": 0.4112, "step": 17386, "teacher_loss": 0.38274455070495605 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.5413326621055603, "learning_rate": 1.478196770242975e-05, "loss": 0.1904, "step": 17387, "teacher_loss": 0.1514175832271576 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.28974029421806335, "learning_rate": 1.4779696695136197e-05, "loss": 0.2304, "step": 17388, "teacher_loss": 0.22377179563045502 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.884414553642273, "learning_rate": 1.477742569289354e-05, "loss": 0.3112, "step": 17389, "teacher_loss": 0.24747012555599213 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.16819430887699127, "learning_rate": 1.4775154695753845e-05, "loss": 0.1979, "step": 17390, "teacher_loss": 0.20117659866809845 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.4513154625892639, "learning_rate": 1.4772883703769177e-05, "loss": 0.2656, "step": 17391, "teacher_loss": 0.24493777751922607 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.5327795743942261, "learning_rate": 1.4770612716991606e-05, "loss": 0.3886, "step": 17392, "teacher_loss": 0.3725517988204956 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.5828357338905334, "learning_rate": 1.4768341735473198e-05, "loss": 0.2096, "step": 17393, "teacher_loss": 0.16809405386447906 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.37763193249702454, "learning_rate": 1.4766070759266016e-05, "loss": 0.2846, "step": 17394, "teacher_loss": 0.2742539644241333 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.48016589879989624, "learning_rate": 1.4763799788422135e-05, "loss": 0.2002, "step": 17395, "teacher_loss": 0.16908231377601624 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.5175419449806213, "learning_rate": 1.4761528822993618e-05, "loss": 0.4087, "step": 17396, "teacher_loss": 0.3966619074344635 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.274973064661026, "learning_rate": 1.4759257863032522e-05, "loss": 0.1866, "step": 17397, "teacher_loss": 0.1767333447933197 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.75236576795578, "learning_rate": 1.475698690859093e-05, "loss": 0.2922, "step": 17398, "teacher_loss": 0.24108746647834778 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.41506046056747437, "learning_rate": 1.4754715959720897e-05, "loss": 0.1797, "step": 17399, "teacher_loss": 0.15354523062705994 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.4944053888320923, "learning_rate": 1.4752445016474489e-05, "loss": 0.2465, "step": 17400, "teacher_loss": 0.21899157762527466 }, { "compression_loss": 0.0, "epoch": 3.14, "label_loss": 0.3569111227989197, "learning_rate": 1.4750174078903776e-05, "loss": 0.213, "step": 17401, "teacher_loss": 0.1970204859972 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.5005359649658203, "learning_rate": 1.4747903147060823e-05, "loss": 0.2421, "step": 17402, "teacher_loss": 0.2134018838405609 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.5222065448760986, "learning_rate": 1.4745632220997692e-05, "loss": 0.2522, "step": 17403, "teacher_loss": 0.22216098010540009 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.6102386713027954, "learning_rate": 1.4743361300766455e-05, "loss": 0.2705, "step": 17404, "teacher_loss": 0.23276478052139282 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.20437079668045044, "learning_rate": 1.4741090386419173e-05, "loss": 0.203, "step": 17405, "teacher_loss": 0.20280733704566956 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.31127116084098816, "learning_rate": 1.4738819478007918e-05, "loss": 0.2524, "step": 17406, "teacher_loss": 0.24581119418144226 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.32153379917144775, "learning_rate": 1.4736548575584737e-05, "loss": 0.1917, "step": 17407, "teacher_loss": 0.17727014422416687 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.40570133924484253, "learning_rate": 1.4734277679201716e-05, "loss": 0.215, "step": 17408, "teacher_loss": 0.19385957717895508 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.16944542527198792, "learning_rate": 1.4732006788910912e-05, "loss": 0.1599, "step": 17409, "teacher_loss": 0.15888236463069916 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.7532769441604614, "learning_rate": 1.4729735904764383e-05, "loss": 0.4933, "step": 17410, "teacher_loss": 0.4644352197647095 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.13085927069187164, "learning_rate": 1.4727465026814204e-05, "loss": 0.1981, "step": 17411, "teacher_loss": 0.20560771226882935 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.20340105891227722, "learning_rate": 1.4725194155112432e-05, "loss": 0.1799, "step": 17412, "teacher_loss": 0.17723365128040314 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.4465812146663666, "learning_rate": 1.4722923289711133e-05, "loss": 0.2701, "step": 17413, "teacher_loss": 0.25051164627075195 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.3256910443305969, "learning_rate": 1.4720652430662375e-05, "loss": 0.1804, "step": 17414, "teacher_loss": 0.1642308533191681 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.26035094261169434, "learning_rate": 1.4718381578018218e-05, "loss": 0.1924, "step": 17415, "teacher_loss": 0.18489119410514832 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.3798878788948059, "learning_rate": 1.4716110731830727e-05, "loss": 0.2, "step": 17416, "teacher_loss": 0.18005529046058655 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.548895001411438, "learning_rate": 1.4713839892151968e-05, "loss": 0.2037, "step": 17417, "teacher_loss": 0.16538025438785553 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.5879232883453369, "learning_rate": 1.4711569059034e-05, "loss": 0.4327, "step": 17418, "teacher_loss": 0.4154396951198578 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.16994859278202057, "learning_rate": 1.4709298232528887e-05, "loss": 0.1228, "step": 17419, "teacher_loss": 0.11754976958036423 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.6238703727722168, "learning_rate": 1.4707027412688698e-05, "loss": 0.22, "step": 17420, "teacher_loss": 0.1751675307750702 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.32937946915626526, "learning_rate": 1.4704756599565494e-05, "loss": 0.1813, "step": 17421, "teacher_loss": 0.1648046374320984 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.4378325939178467, "learning_rate": 1.4702485793211328e-05, "loss": 0.1926, "step": 17422, "teacher_loss": 0.16530011594295502 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.3265809714794159, "learning_rate": 1.470021499367828e-05, "loss": 0.1882, "step": 17423, "teacher_loss": 0.1728629171848297 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.5963238477706909, "learning_rate": 1.4697944201018398e-05, "loss": 0.2664, "step": 17424, "teacher_loss": 0.22971251606941223 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.38765954971313477, "learning_rate": 1.4695673415283747e-05, "loss": 0.2882, "step": 17425, "teacher_loss": 0.2770967483520508 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.29905790090560913, "learning_rate": 1.4693402636526398e-05, "loss": 0.2809, "step": 17426, "teacher_loss": 0.2788505554199219 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.30916622281074524, "learning_rate": 1.4691131864798405e-05, "loss": 0.2316, "step": 17427, "teacher_loss": 0.22302895784378052 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.9052622318267822, "learning_rate": 1.4688861100151833e-05, "loss": 0.3378, "step": 17428, "teacher_loss": 0.2747696042060852 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.527613639831543, "learning_rate": 1.4686590342638739e-05, "loss": 0.3534, "step": 17429, "teacher_loss": 0.33403724431991577 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.3878132700920105, "learning_rate": 1.4684319592311193e-05, "loss": 0.1779, "step": 17430, "teacher_loss": 0.15452352166175842 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.30898457765579224, "learning_rate": 1.4682048849221258e-05, "loss": 0.2685, "step": 17431, "teacher_loss": 0.26400619745254517 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.4095154106616974, "learning_rate": 1.4679778113420976e-05, "loss": 0.2569, "step": 17432, "teacher_loss": 0.2399575114250183 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.4363976716995239, "learning_rate": 1.4677507384962433e-05, "loss": 0.2646, "step": 17433, "teacher_loss": 0.24546974897384644 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.5808266997337341, "learning_rate": 1.4675236663897675e-05, "loss": 0.2284, "step": 17434, "teacher_loss": 0.18919017910957336 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.508952796459198, "learning_rate": 1.4672965950278763e-05, "loss": 0.2901, "step": 17435, "teacher_loss": 0.26581472158432007 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.4463152289390564, "learning_rate": 1.4670695244157767e-05, "loss": 0.2683, "step": 17436, "teacher_loss": 0.24847060441970825 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.8172603845596313, "learning_rate": 1.466842454558674e-05, "loss": 0.2527, "step": 17437, "teacher_loss": 0.19002526998519897 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.5451173186302185, "learning_rate": 1.466615385461774e-05, "loss": 0.2468, "step": 17438, "teacher_loss": 0.2136991024017334 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.37743890285491943, "learning_rate": 1.4663883171302836e-05, "loss": 0.2211, "step": 17439, "teacher_loss": 0.20369601249694824 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.4452858567237854, "learning_rate": 1.4661612495694082e-05, "loss": 0.1903, "step": 17440, "teacher_loss": 0.16195917129516602 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.5688092112541199, "learning_rate": 1.4659341827843537e-05, "loss": 0.272, "step": 17441, "teacher_loss": 0.23905879259109497 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.4404653310775757, "learning_rate": 1.4657071167803266e-05, "loss": 0.2191, "step": 17442, "teacher_loss": 0.1944507658481598 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.7133147716522217, "learning_rate": 1.4654800515625322e-05, "loss": 0.2844, "step": 17443, "teacher_loss": 0.2367367148399353 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.15977270901203156, "learning_rate": 1.4652529871361766e-05, "loss": 0.2054, "step": 17444, "teacher_loss": 0.2104422003030777 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.9102276563644409, "learning_rate": 1.4650259235064662e-05, "loss": 0.4153, "step": 17445, "teacher_loss": 0.360293447971344 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.8373477458953857, "learning_rate": 1.4647988606786068e-05, "loss": 0.2971, "step": 17446, "teacher_loss": 0.2370269000530243 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.316379189491272, "learning_rate": 1.464571798657803e-05, "loss": 0.1559, "step": 17447, "teacher_loss": 0.13809096813201904 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.5214146971702576, "learning_rate": 1.4643447374492625e-05, "loss": 0.3668, "step": 17448, "teacher_loss": 0.3495672941207886 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.4202248454093933, "learning_rate": 1.46411767705819e-05, "loss": 0.191, "step": 17449, "teacher_loss": 0.1654840111732483 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.4848083555698395, "learning_rate": 1.4638906174897917e-05, "loss": 0.2988, "step": 17450, "teacher_loss": 0.2780914306640625 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.8171603679656982, "learning_rate": 1.4636635587492729e-05, "loss": 0.2549, "step": 17451, "teacher_loss": 0.19246980547904968 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.38159215450286865, "learning_rate": 1.46343650084184e-05, "loss": 0.1923, "step": 17452, "teacher_loss": 0.17128071188926697 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.5327211618423462, "learning_rate": 1.4632094437726988e-05, "loss": 0.2124, "step": 17453, "teacher_loss": 0.176824152469635 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.1735510230064392, "learning_rate": 1.462982387547054e-05, "loss": 0.2063, "step": 17454, "teacher_loss": 0.20999057590961456 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.28142133355140686, "learning_rate": 1.4627553321701128e-05, "loss": 0.1695, "step": 17455, "teacher_loss": 0.15701445937156677 }, { "compression_loss": 0.0, "epoch": 3.15, "label_loss": 0.20482710003852844, "learning_rate": 1.4625282776470803e-05, "loss": 0.1656, "step": 17456, "teacher_loss": 0.16128851473331451 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.19681161642074585, "learning_rate": 1.462301223983161e-05, "loss": 0.2101, "step": 17457, "teacher_loss": 0.21153885126113892 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.5013963580131531, "learning_rate": 1.4620741711835627e-05, "loss": 0.2437, "step": 17458, "teacher_loss": 0.21507784724235535 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.621864914894104, "learning_rate": 1.4618471192534895e-05, "loss": 0.5705, "step": 17459, "teacher_loss": 0.5648020505905151 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.2799007296562195, "learning_rate": 1.4616200681981472e-05, "loss": 0.216, "step": 17460, "teacher_loss": 0.20892545580863953 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.8843612670898438, "learning_rate": 1.4613930180227421e-05, "loss": 0.1911, "step": 17461, "teacher_loss": 0.11408155411481857 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.14197197556495667, "learning_rate": 1.461165968732479e-05, "loss": 0.1769, "step": 17462, "teacher_loss": 0.1807783544063568 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.31700652837753296, "learning_rate": 1.460938920332564e-05, "loss": 0.1736, "step": 17463, "teacher_loss": 0.15770840644836426 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.3048824071884155, "learning_rate": 1.4607118728282025e-05, "loss": 0.1667, "step": 17464, "teacher_loss": 0.15134331583976746 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.3354547917842865, "learning_rate": 1.4604848262245999e-05, "loss": 0.2603, "step": 17465, "teacher_loss": 0.2519644498825073 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.1901552826166153, "learning_rate": 1.4602577805269615e-05, "loss": 0.1651, "step": 17466, "teacher_loss": 0.16234713792800903 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.38609468936920166, "learning_rate": 1.4600307357404933e-05, "loss": 0.1979, "step": 17467, "teacher_loss": 0.1770011931657791 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.18647032976150513, "learning_rate": 1.4598036918704006e-05, "loss": 0.2291, "step": 17468, "teacher_loss": 0.23385076224803925 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.5232111215591431, "learning_rate": 1.4595766489218892e-05, "loss": 0.3357, "step": 17469, "teacher_loss": 0.3148711621761322 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.8115173578262329, "learning_rate": 1.4593496069001629e-05, "loss": 0.2884, "step": 17470, "teacher_loss": 0.23026251792907715 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.8417997360229492, "learning_rate": 1.4591225658104292e-05, "loss": 0.3887, "step": 17471, "teacher_loss": 0.3383626341819763 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.965733528137207, "learning_rate": 1.4588955256578922e-05, "loss": 0.3231, "step": 17472, "teacher_loss": 0.2516665756702423 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.3409142792224884, "learning_rate": 1.4586684864477572e-05, "loss": 0.3346, "step": 17473, "teacher_loss": 0.3339143991470337 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.3837571442127228, "learning_rate": 1.45844144818523e-05, "loss": 0.2486, "step": 17474, "teacher_loss": 0.23362372815608978 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.7640320062637329, "learning_rate": 1.4582144108755162e-05, "loss": 0.5399, "step": 17475, "teacher_loss": 0.5149547457695007 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.16650408506393433, "learning_rate": 1.4579873745238202e-05, "loss": 0.1921, "step": 17476, "teacher_loss": 0.1949939727783203 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.6649729013442993, "learning_rate": 1.4577603391353481e-05, "loss": 0.2945, "step": 17477, "teacher_loss": 0.2533126175403595 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.8058868646621704, "learning_rate": 1.4575333047153046e-05, "loss": 0.3139, "step": 17478, "teacher_loss": 0.2591917812824249 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.5767667889595032, "learning_rate": 1.457306271268895e-05, "loss": 0.3302, "step": 17479, "teacher_loss": 0.30282142758369446 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.4302701950073242, "learning_rate": 1.457079238801325e-05, "loss": 0.2949, "step": 17480, "teacher_loss": 0.2799092233181 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.2463553547859192, "learning_rate": 1.4568522073177993e-05, "loss": 0.1922, "step": 17481, "teacher_loss": 0.1862202286720276 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.7751959562301636, "learning_rate": 1.4566251768235225e-05, "loss": 0.281, "step": 17482, "teacher_loss": 0.22610455751419067 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.505419909954071, "learning_rate": 1.456398147323701e-05, "loss": 0.288, "step": 17483, "teacher_loss": 0.2638220191001892 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.6560220718383789, "learning_rate": 1.4561711188235393e-05, "loss": 0.2451, "step": 17484, "teacher_loss": 0.1994813233613968 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.4274773895740509, "learning_rate": 1.4559440913282416e-05, "loss": 0.2408, "step": 17485, "teacher_loss": 0.22002321481704712 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.39207181334495544, "learning_rate": 1.4557170648430148e-05, "loss": 0.2943, "step": 17486, "teacher_loss": 0.2834526300430298 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.5216990113258362, "learning_rate": 1.4554900393730625e-05, "loss": 0.2427, "step": 17487, "teacher_loss": 0.21164673566818237 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.6370932459831238, "learning_rate": 1.45526301492359e-05, "loss": 0.2455, "step": 17488, "teacher_loss": 0.20193777978420258 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.3933643698692322, "learning_rate": 1.4550359914998027e-05, "loss": 0.2547, "step": 17489, "teacher_loss": 0.2392462193965912 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.620413064956665, "learning_rate": 1.4548089691069054e-05, "loss": 0.3088, "step": 17490, "teacher_loss": 0.274222195148468 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 1.4505033493041992, "learning_rate": 1.4545819477501028e-05, "loss": 0.4155, "step": 17491, "teacher_loss": 0.3004941940307617 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.6978291869163513, "learning_rate": 1.4543549274345997e-05, "loss": 0.3205, "step": 17492, "teacher_loss": 0.2786010801792145 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.3535919487476349, "learning_rate": 1.4541279081656016e-05, "loss": 0.5062, "step": 17493, "teacher_loss": 0.5232028365135193 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.664519190788269, "learning_rate": 1.4539008899483133e-05, "loss": 0.288, "step": 17494, "teacher_loss": 0.2461758404970169 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.19022926688194275, "learning_rate": 1.4536738727879384e-05, "loss": 0.1568, "step": 17495, "teacher_loss": 0.15306758880615234 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.493743896484375, "learning_rate": 1.4534468566896838e-05, "loss": 0.2899, "step": 17496, "teacher_loss": 0.2673032879829407 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.2214827686548233, "learning_rate": 1.4532198416587528e-05, "loss": 0.1983, "step": 17497, "teacher_loss": 0.19575023651123047 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.4366607069969177, "learning_rate": 1.4529928277003502e-05, "loss": 0.2351, "step": 17498, "teacher_loss": 0.2126588672399521 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.5405682921409607, "learning_rate": 1.4527658148196815e-05, "loss": 0.2872, "step": 17499, "teacher_loss": 0.25903475284576416 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.2243812382221222, "learning_rate": 1.4525388030219512e-05, "loss": 0.2279, "step": 17500, "teacher_loss": 0.22830086946487427 }, { "epoch": 3.16, "eval_exact_match": 79.46073793755913, "eval_f1": 87.098412229066, "step": 17500 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.3075556755065918, "learning_rate": 1.4523117923123633e-05, "loss": 0.2043, "step": 17501, "teacher_loss": 0.19281215965747833 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.2748276889324188, "learning_rate": 1.4520847826961235e-05, "loss": 0.244, "step": 17502, "teacher_loss": 0.24052734673023224 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.2654651403427124, "learning_rate": 1.4518577741784359e-05, "loss": 0.2046, "step": 17503, "teacher_loss": 0.19788596034049988 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 1.288184404373169, "learning_rate": 1.4516307667645048e-05, "loss": 0.6536, "step": 17504, "teacher_loss": 0.583102822303772 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.7251483798027039, "learning_rate": 1.4514037604595356e-05, "loss": 0.2568, "step": 17505, "teacher_loss": 0.20476871728897095 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.8096426725387573, "learning_rate": 1.451176755268733e-05, "loss": 0.3424, "step": 17506, "teacher_loss": 0.2904762029647827 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.3818186819553375, "learning_rate": 1.4509497511972997e-05, "loss": 0.2263, "step": 17507, "teacher_loss": 0.20907238125801086 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.26641252636909485, "learning_rate": 1.4507227482504427e-05, "loss": 0.2059, "step": 17508, "teacher_loss": 0.19913128018379211 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.39385098218917847, "learning_rate": 1.450495746433365e-05, "loss": 0.2183, "step": 17509, "teacher_loss": 0.19881710410118103 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.5063592195510864, "learning_rate": 1.4502687457512714e-05, "loss": 0.2333, "step": 17510, "teacher_loss": 0.2030005156993866 }, { "compression_loss": 0.0, "epoch": 3.16, "label_loss": 0.2908146381378174, "learning_rate": 1.4500417462093662e-05, "loss": 0.2455, "step": 17511, "teacher_loss": 0.24045629799365997 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.3965708613395691, "learning_rate": 1.449814747812854e-05, "loss": 0.1726, "step": 17512, "teacher_loss": 0.14776010811328888 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.1032509133219719, "learning_rate": 1.4495877505669393e-05, "loss": 0.1683, "step": 17513, "teacher_loss": 0.1755698323249817 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.3869970142841339, "learning_rate": 1.449360754476826e-05, "loss": 0.352, "step": 17514, "teacher_loss": 0.3481558561325073 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.3167208433151245, "learning_rate": 1.4491337595477192e-05, "loss": 0.1842, "step": 17515, "teacher_loss": 0.16949915885925293 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.3707008361816406, "learning_rate": 1.4489067657848226e-05, "loss": 0.2464, "step": 17516, "teacher_loss": 0.23258526623249054 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.40697360038757324, "learning_rate": 1.4486797731933404e-05, "loss": 0.2671, "step": 17517, "teacher_loss": 0.251510888338089 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.6458220481872559, "learning_rate": 1.4484527817784776e-05, "loss": 0.2548, "step": 17518, "teacher_loss": 0.211335688829422 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.5201655626296997, "learning_rate": 1.4482257915454383e-05, "loss": 0.284, "step": 17519, "teacher_loss": 0.2577097415924072 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.9687808156013489, "learning_rate": 1.4479988024994252e-05, "loss": 0.3117, "step": 17520, "teacher_loss": 0.23872385919094086 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.2458811104297638, "learning_rate": 1.4477718146456446e-05, "loss": 0.2148, "step": 17521, "teacher_loss": 0.21139711141586304 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.6077238321304321, "learning_rate": 1.4475448279892996e-05, "loss": 0.2542, "step": 17522, "teacher_loss": 0.21488723158836365 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.5509481430053711, "learning_rate": 1.447317842535594e-05, "loss": 0.3242, "step": 17523, "teacher_loss": 0.299039751291275 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.15414343774318695, "learning_rate": 1.4470908582897328e-05, "loss": 0.1858, "step": 17524, "teacher_loss": 0.1893196403980255 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 1.639272689819336, "learning_rate": 1.4468638752569193e-05, "loss": 0.4238, "step": 17525, "teacher_loss": 0.2887035608291626 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.36097022891044617, "learning_rate": 1.4466368934423578e-05, "loss": 0.1981, "step": 17526, "teacher_loss": 0.17999424040317535 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.5599234104156494, "learning_rate": 1.4464099128512525e-05, "loss": 0.1892, "step": 17527, "teacher_loss": 0.14800825715065002 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.49424654245376587, "learning_rate": 1.4461829334888073e-05, "loss": 0.3032, "step": 17528, "teacher_loss": 0.28192079067230225 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.5125178694725037, "learning_rate": 1.4459559553602256e-05, "loss": 0.2355, "step": 17529, "teacher_loss": 0.20472539961338043 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.7114384174346924, "learning_rate": 1.4457289784707122e-05, "loss": 0.2821, "step": 17530, "teacher_loss": 0.23443858325481415 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.33134832978248596, "learning_rate": 1.445502002825471e-05, "loss": 0.2293, "step": 17531, "teacher_loss": 0.2179253101348877 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.5935012698173523, "learning_rate": 1.4452750284297052e-05, "loss": 0.356, "step": 17532, "teacher_loss": 0.3296297490596771 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.30534982681274414, "learning_rate": 1.4450480552886184e-05, "loss": 0.3293, "step": 17533, "teacher_loss": 0.3319437503814697 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.9275591373443604, "learning_rate": 1.4448210834074155e-05, "loss": 0.5499, "step": 17534, "teacher_loss": 0.507900595664978 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.30951058864593506, "learning_rate": 1.4445941127912995e-05, "loss": 0.2661, "step": 17535, "teacher_loss": 0.2612661123275757 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.6395387649536133, "learning_rate": 1.4443671434454743e-05, "loss": 0.2865, "step": 17536, "teacher_loss": 0.24729005992412567 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.22077274322509766, "learning_rate": 1.4441401753751438e-05, "loss": 0.2074, "step": 17537, "teacher_loss": 0.20587016642093658 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.35192614793777466, "learning_rate": 1.4439132085855117e-05, "loss": 0.178, "step": 17538, "teacher_loss": 0.15865057706832886 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.7866694331169128, "learning_rate": 1.4436862430817813e-05, "loss": 0.2815, "step": 17539, "teacher_loss": 0.22539003193378448 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.23134806752204895, "learning_rate": 1.4434592788691568e-05, "loss": 0.1736, "step": 17540, "teacher_loss": 0.16715869307518005 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.6995489597320557, "learning_rate": 1.4432323159528415e-05, "loss": 0.2038, "step": 17541, "teacher_loss": 0.14872127771377563 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.5319263935089111, "learning_rate": 1.4430053543380388e-05, "loss": 0.3348, "step": 17542, "teacher_loss": 0.3129148781299591 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.7178807258605957, "learning_rate": 1.4427783940299526e-05, "loss": 0.3609, "step": 17543, "teacher_loss": 0.32126104831695557 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.8869133591651917, "learning_rate": 1.4425514350337866e-05, "loss": 0.2862, "step": 17544, "teacher_loss": 0.21946462988853455 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.31641507148742676, "learning_rate": 1.442324477354743e-05, "loss": 0.1934, "step": 17545, "teacher_loss": 0.17970064282417297 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.6466572284698486, "learning_rate": 1.4420975209980272e-05, "loss": 0.2663, "step": 17546, "teacher_loss": 0.22409173846244812 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.282548725605011, "learning_rate": 1.4418705659688414e-05, "loss": 0.1863, "step": 17547, "teacher_loss": 0.17564553022384644 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.3805087208747864, "learning_rate": 1.4416436122723888e-05, "loss": 0.187, "step": 17548, "teacher_loss": 0.1654852032661438 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.20888382196426392, "learning_rate": 1.4414166599138737e-05, "loss": 0.1874, "step": 17549, "teacher_loss": 0.18498674035072327 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.7838221788406372, "learning_rate": 1.4411897088984987e-05, "loss": 0.4258, "step": 17550, "teacher_loss": 0.38604599237442017 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.614762008190155, "learning_rate": 1.440962759231467e-05, "loss": 0.2088, "step": 17551, "teacher_loss": 0.1636539101600647 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.3760119676589966, "learning_rate": 1.4407358109179827e-05, "loss": 0.2224, "step": 17552, "teacher_loss": 0.20533594489097595 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.30171364545822144, "learning_rate": 1.4405088639632486e-05, "loss": 0.2676, "step": 17553, "teacher_loss": 0.2638113498687744 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.3653731942176819, "learning_rate": 1.4402819183724683e-05, "loss": 0.3816, "step": 17554, "teacher_loss": 0.3833754062652588 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.9574127197265625, "learning_rate": 1.4400549741508434e-05, "loss": 0.3138, "step": 17555, "teacher_loss": 0.24224044382572174 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.466407835483551, "learning_rate": 1.4398280313035794e-05, "loss": 0.237, "step": 17556, "teacher_loss": 0.2115379273891449 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.3493289649486542, "learning_rate": 1.4396010898358778e-05, "loss": 0.2048, "step": 17557, "teacher_loss": 0.188791424036026 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.27235856652259827, "learning_rate": 1.4393741497529416e-05, "loss": 0.2044, "step": 17558, "teacher_loss": 0.19688169658184052 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.2390129715204239, "learning_rate": 1.439147211059975e-05, "loss": 0.1695, "step": 17559, "teacher_loss": 0.1617770791053772 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.3681508004665375, "learning_rate": 1.4389202737621801e-05, "loss": 0.1585, "step": 17560, "teacher_loss": 0.1352112889289856 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.5449030995368958, "learning_rate": 1.4386933378647602e-05, "loss": 0.3457, "step": 17561, "teacher_loss": 0.3235843777656555 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.5737286806106567, "learning_rate": 1.4384664033729185e-05, "loss": 0.2354, "step": 17562, "teacher_loss": 0.1977914422750473 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.26882851123809814, "learning_rate": 1.4382394702918575e-05, "loss": 0.2597, "step": 17563, "teacher_loss": 0.2586757242679596 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.5868334174156189, "learning_rate": 1.43801253862678e-05, "loss": 0.2298, "step": 17564, "teacher_loss": 0.19010649621486664 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.3118050694465637, "learning_rate": 1.4377856083828897e-05, "loss": 0.1804, "step": 17565, "teacher_loss": 0.16577717661857605 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.7272975444793701, "learning_rate": 1.4375586795653887e-05, "loss": 0.3331, "step": 17566, "teacher_loss": 0.2893384099006653 }, { "compression_loss": 0.0, "epoch": 3.17, "label_loss": 0.18259891867637634, "learning_rate": 1.4373317521794796e-05, "loss": 0.2626, "step": 17567, "teacher_loss": 0.27145156264305115 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.40485966205596924, "learning_rate": 1.437104826230366e-05, "loss": 0.1812, "step": 17568, "teacher_loss": 0.1563625931739807 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.36247414350509644, "learning_rate": 1.4368779017232504e-05, "loss": 0.2012, "step": 17569, "teacher_loss": 0.18328312039375305 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.3005625307559967, "learning_rate": 1.4366509786633342e-05, "loss": 0.2165, "step": 17570, "teacher_loss": 0.20714882016181946 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.547737717628479, "learning_rate": 1.4364240570558224e-05, "loss": 0.3317, "step": 17571, "teacher_loss": 0.3076656758785248 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.5777987241744995, "learning_rate": 1.4361971369059158e-05, "loss": 0.2132, "step": 17572, "teacher_loss": 0.17271798849105835 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.2663493752479553, "learning_rate": 1.4359702182188176e-05, "loss": 0.1766, "step": 17573, "teacher_loss": 0.16660578548908234 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.4218781590461731, "learning_rate": 1.4357433009997299e-05, "loss": 0.2216, "step": 17574, "teacher_loss": 0.1993720978498459 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.21224823594093323, "learning_rate": 1.435516385253856e-05, "loss": 0.2113, "step": 17575, "teacher_loss": 0.21118026971817017 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.21006067097187042, "learning_rate": 1.4352894709863983e-05, "loss": 0.1483, "step": 17576, "teacher_loss": 0.14138223230838776 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.48695528507232666, "learning_rate": 1.4350625582025584e-05, "loss": 0.2328, "step": 17577, "teacher_loss": 0.2045508623123169 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.6032968759536743, "learning_rate": 1.4348356469075399e-05, "loss": 0.2474, "step": 17578, "teacher_loss": 0.20787540078163147 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.5241185426712036, "learning_rate": 1.4346087371065448e-05, "loss": 0.2145, "step": 17579, "teacher_loss": 0.1801140457391739 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.4550755023956299, "learning_rate": 1.4343818288047744e-05, "loss": 0.2798, "step": 17580, "teacher_loss": 0.2603676915168762 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.49509015679359436, "learning_rate": 1.4341549220074329e-05, "loss": 0.2559, "step": 17581, "teacher_loss": 0.2293020635843277 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.5839067697525024, "learning_rate": 1.4339280167197213e-05, "loss": 0.3084, "step": 17582, "teacher_loss": 0.27779239416122437 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.7676784992218018, "learning_rate": 1.4337011129468419e-05, "loss": 0.4795, "step": 17583, "teacher_loss": 0.44750893115997314 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.6214165687561035, "learning_rate": 1.4334742106939975e-05, "loss": 0.238, "step": 17584, "teacher_loss": 0.19544601440429688 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.5743802785873413, "learning_rate": 1.43324730996639e-05, "loss": 0.4628, "step": 17585, "teacher_loss": 0.45037519931793213 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.19376444816589355, "learning_rate": 1.4330204107692212e-05, "loss": 0.1461, "step": 17586, "teacher_loss": 0.14078941941261292 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.31987154483795166, "learning_rate": 1.4327935131076939e-05, "loss": 0.2716, "step": 17587, "teacher_loss": 0.2662258744239807 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 1.3952951431274414, "learning_rate": 1.4325666169870099e-05, "loss": 0.6551, "step": 17588, "teacher_loss": 0.572895884513855 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.5303844213485718, "learning_rate": 1.4323397224123708e-05, "loss": 0.277, "step": 17589, "teacher_loss": 0.24882817268371582 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.39242875576019287, "learning_rate": 1.4321128293889793e-05, "loss": 0.2433, "step": 17590, "teacher_loss": 0.22669702768325806 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.3252655267715454, "learning_rate": 1.4318859379220371e-05, "loss": 0.2031, "step": 17591, "teacher_loss": 0.18953226506710052 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.7132079601287842, "learning_rate": 1.4316590480167459e-05, "loss": 0.3375, "step": 17592, "teacher_loss": 0.29573991894721985 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.47425180673599243, "learning_rate": 1.431432159678308e-05, "loss": 0.3001, "step": 17593, "teacher_loss": 0.28070133924484253 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.8338819742202759, "learning_rate": 1.4312052729119256e-05, "loss": 0.7873, "step": 17594, "teacher_loss": 0.782118558883667 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.9233233332633972, "learning_rate": 1.4309783877227996e-05, "loss": 0.2628, "step": 17595, "teacher_loss": 0.18941757082939148 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.25967979431152344, "learning_rate": 1.430751504116132e-05, "loss": 0.2334, "step": 17596, "teacher_loss": 0.23048532009124756 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.554546058177948, "learning_rate": 1.430524622097125e-05, "loss": 0.2532, "step": 17597, "teacher_loss": 0.2197268158197403 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.33481258153915405, "learning_rate": 1.4302977416709802e-05, "loss": 0.2784, "step": 17598, "teacher_loss": 0.27212274074554443 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.3703487515449524, "learning_rate": 1.4300708628428989e-05, "loss": 0.1704, "step": 17599, "teacher_loss": 0.14821605384349823 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.10718933492898941, "learning_rate": 1.4298439856180832e-05, "loss": 0.1775, "step": 17600, "teacher_loss": 0.1852729320526123 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.5222270488739014, "learning_rate": 1.4296171100017348e-05, "loss": 0.2145, "step": 17601, "teacher_loss": 0.18033930659294128 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.8295646905899048, "learning_rate": 1.4293902359990547e-05, "loss": 0.2802, "step": 17602, "teacher_loss": 0.21916356682777405 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.34876590967178345, "learning_rate": 1.429163363615245e-05, "loss": 0.1961, "step": 17603, "teacher_loss": 0.17908209562301636 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.6965045928955078, "learning_rate": 1.4289364928555073e-05, "loss": 0.279, "step": 17604, "teacher_loss": 0.23265808820724487 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.2956174314022064, "learning_rate": 1.428709623725042e-05, "loss": 0.2945, "step": 17605, "teacher_loss": 0.29439499974250793 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 1.3945338726043701, "learning_rate": 1.4284827562290521e-05, "loss": 0.4184, "step": 17606, "teacher_loss": 0.30990782380104065 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.2730444371700287, "learning_rate": 1.428255890372738e-05, "loss": 0.1876, "step": 17607, "teacher_loss": 0.17813752591609955 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.07789207249879837, "learning_rate": 1.4280290261613002e-05, "loss": 0.1384, "step": 17608, "teacher_loss": 0.14511734247207642 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.17575177550315857, "learning_rate": 1.4278021635999424e-05, "loss": 0.1828, "step": 17609, "teacher_loss": 0.18362730741500854 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.4916270971298218, "learning_rate": 1.427575302693864e-05, "loss": 0.2387, "step": 17610, "teacher_loss": 0.210578054189682 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.9475731253623962, "learning_rate": 1.4273484434482665e-05, "loss": 0.3271, "step": 17611, "teacher_loss": 0.25814807415008545 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.19031646847724915, "learning_rate": 1.4271215858683519e-05, "loss": 0.185, "step": 17612, "teacher_loss": 0.18441042304039001 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.3296540379524231, "learning_rate": 1.4268947299593206e-05, "loss": 0.2503, "step": 17613, "teacher_loss": 0.2414407730102539 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.5212024450302124, "learning_rate": 1.4266678757263743e-05, "loss": 0.2743, "step": 17614, "teacher_loss": 0.24687375128269196 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.2577836811542511, "learning_rate": 1.4264410231747128e-05, "loss": 0.182, "step": 17615, "teacher_loss": 0.17355065047740936 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.4391128718852997, "learning_rate": 1.4262141723095387e-05, "loss": 0.248, "step": 17616, "teacher_loss": 0.22681915760040283 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.4718478322029114, "learning_rate": 1.4259873231360527e-05, "loss": 0.1972, "step": 17617, "teacher_loss": 0.1667015552520752 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.15968924760818481, "learning_rate": 1.4257604756594548e-05, "loss": 0.1436, "step": 17618, "teacher_loss": 0.1418251395225525 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.6067183017730713, "learning_rate": 1.4255336298849474e-05, "loss": 0.3592, "step": 17619, "teacher_loss": 0.3316514194011688 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.3493698239326477, "learning_rate": 1.4253067858177301e-05, "loss": 0.1859, "step": 17620, "teacher_loss": 0.16778475046157837 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 1.3777087926864624, "learning_rate": 1.4250799434630042e-05, "loss": 0.3961, "step": 17621, "teacher_loss": 0.28703343868255615 }, { "compression_loss": 0.0, "epoch": 3.18, "label_loss": 0.37302398681640625, "learning_rate": 1.4248531028259708e-05, "loss": 0.1679, "step": 17622, "teacher_loss": 0.14505496621131897 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.4962156116962433, "learning_rate": 1.4246262639118304e-05, "loss": 0.2602, "step": 17623, "teacher_loss": 0.23398357629776 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.4226142168045044, "learning_rate": 1.4243994267257836e-05, "loss": 0.2567, "step": 17624, "teacher_loss": 0.2382708191871643 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3812740743160248, "learning_rate": 1.4241725912730315e-05, "loss": 0.2138, "step": 17625, "teacher_loss": 0.19524195790290833 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.5385949015617371, "learning_rate": 1.4239457575587747e-05, "loss": 0.2702, "step": 17626, "teacher_loss": 0.24038901925086975 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3025643229484558, "learning_rate": 1.4237189255882131e-05, "loss": 0.2356, "step": 17627, "teacher_loss": 0.22810712456703186 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.21577215194702148, "learning_rate": 1.4234920953665482e-05, "loss": 0.212, "step": 17628, "teacher_loss": 0.21161997318267822 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.37343868613243103, "learning_rate": 1.4232652668989805e-05, "loss": 0.2554, "step": 17629, "teacher_loss": 0.24230524897575378 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.632673978805542, "learning_rate": 1.4230384401907092e-05, "loss": 0.3659, "step": 17630, "teacher_loss": 0.33627569675445557 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.7718628644943237, "learning_rate": 1.4228116152469368e-05, "loss": 0.2987, "step": 17631, "teacher_loss": 0.24612393975257874 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3600156903266907, "learning_rate": 1.4225847920728622e-05, "loss": 0.2183, "step": 17632, "teacher_loss": 0.20254400372505188 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.45248568058013916, "learning_rate": 1.4223579706736857e-05, "loss": 0.3043, "step": 17633, "teacher_loss": 0.28782743215560913 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.33973008394241333, "learning_rate": 1.4221311510546089e-05, "loss": 0.1713, "step": 17634, "teacher_loss": 0.15261170268058777 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.5740082263946533, "learning_rate": 1.4219043332208312e-05, "loss": 0.3057, "step": 17635, "teacher_loss": 0.27593904733657837 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.6180772185325623, "learning_rate": 1.4216775171775531e-05, "loss": 0.3007, "step": 17636, "teacher_loss": 0.26542431116104126 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.4716870188713074, "learning_rate": 1.421450702929974e-05, "loss": 0.1933, "step": 17637, "teacher_loss": 0.1623629480600357 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.5338704586029053, "learning_rate": 1.4212238904832956e-05, "loss": 0.1825, "step": 17638, "teacher_loss": 0.14342273771762848 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.5109022855758667, "learning_rate": 1.4209970798427167e-05, "loss": 0.309, "step": 17639, "teacher_loss": 0.2865491807460785 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.7189458608627319, "learning_rate": 1.420770271013438e-05, "loss": 0.3553, "step": 17640, "teacher_loss": 0.31493479013442993 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.44520998001098633, "learning_rate": 1.4205434640006595e-05, "loss": 0.3036, "step": 17641, "teacher_loss": 0.2878129482269287 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.31332480907440186, "learning_rate": 1.4203166588095816e-05, "loss": 0.2407, "step": 17642, "teacher_loss": 0.23264554142951965 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.8516744375228882, "learning_rate": 1.4200898554454028e-05, "loss": 0.2043, "step": 17643, "teacher_loss": 0.1324230283498764 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.5234546661376953, "learning_rate": 1.4198630539133249e-05, "loss": 0.2783, "step": 17644, "teacher_loss": 0.2510383725166321 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 1.0129578113555908, "learning_rate": 1.4196362542185469e-05, "loss": 0.2908, "step": 17645, "teacher_loss": 0.2105954885482788 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.41638368368148804, "learning_rate": 1.419409456366268e-05, "loss": 0.1926, "step": 17646, "teacher_loss": 0.16770394146442413 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3812369108200073, "learning_rate": 1.4191826603616891e-05, "loss": 0.2008, "step": 17647, "teacher_loss": 0.1807435303926468 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.4451167583465576, "learning_rate": 1.4189558662100094e-05, "loss": 0.1753, "step": 17648, "teacher_loss": 0.14527684450149536 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3233838677406311, "learning_rate": 1.4187290739164285e-05, "loss": 0.1695, "step": 17649, "teacher_loss": 0.15235596895217896 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 1.3505933284759521, "learning_rate": 1.4185022834861466e-05, "loss": 0.3868, "step": 17650, "teacher_loss": 0.2797505259513855 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3997369408607483, "learning_rate": 1.4182754949243629e-05, "loss": 0.2755, "step": 17651, "teacher_loss": 0.26170945167541504 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3941057324409485, "learning_rate": 1.4180487082362767e-05, "loss": 0.2674, "step": 17652, "teacher_loss": 0.2533155679702759 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.14867646992206573, "learning_rate": 1.4178219234270885e-05, "loss": 0.1983, "step": 17653, "teacher_loss": 0.20384220778942108 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.26068776845932007, "learning_rate": 1.4175951405019973e-05, "loss": 0.1861, "step": 17654, "teacher_loss": 0.17776955664157867 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3619043231010437, "learning_rate": 1.4173683594662016e-05, "loss": 0.1683, "step": 17655, "teacher_loss": 0.14680679142475128 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3401820659637451, "learning_rate": 1.4171415803249027e-05, "loss": 0.17, "step": 17656, "teacher_loss": 0.1510607898235321 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.497144877910614, "learning_rate": 1.4169148030832984e-05, "loss": 0.1915, "step": 17657, "teacher_loss": 0.15756356716156006 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.5746393203735352, "learning_rate": 1.4166880277465888e-05, "loss": 0.5828, "step": 17658, "teacher_loss": 0.5836811661720276 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3819441497325897, "learning_rate": 1.4164612543199725e-05, "loss": 0.176, "step": 17659, "teacher_loss": 0.1530633270740509 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.24425393342971802, "learning_rate": 1.4162344828086496e-05, "loss": 0.2048, "step": 17660, "teacher_loss": 0.20042669773101807 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.5154309868812561, "learning_rate": 1.4160077132178188e-05, "loss": 0.2846, "step": 17661, "teacher_loss": 0.25900042057037354 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.49648517370224, "learning_rate": 1.4157809455526789e-05, "loss": 0.2329, "step": 17662, "teacher_loss": 0.20363697409629822 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.7446349263191223, "learning_rate": 1.4155541798184299e-05, "loss": 0.2716, "step": 17663, "teacher_loss": 0.2190054953098297 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.27723339200019836, "learning_rate": 1.4153274160202702e-05, "loss": 0.1999, "step": 17664, "teacher_loss": 0.19125187397003174 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.6717166900634766, "learning_rate": 1.4151006541633989e-05, "loss": 0.4072, "step": 17665, "teacher_loss": 0.3778409957885742 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3500524163246155, "learning_rate": 1.4148738942530152e-05, "loss": 0.3734, "step": 17666, "teacher_loss": 0.37594538927078247 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.5047045350074768, "learning_rate": 1.4146471362943182e-05, "loss": 0.223, "step": 17667, "teacher_loss": 0.19164644181728363 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.4416535496711731, "learning_rate": 1.4144203802925054e-05, "loss": 0.2412, "step": 17668, "teacher_loss": 0.21894004940986633 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.6555887460708618, "learning_rate": 1.4141936262527778e-05, "loss": 0.2562, "step": 17669, "teacher_loss": 0.21176809072494507 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.3796350359916687, "learning_rate": 1.4139668741803329e-05, "loss": 0.1752, "step": 17670, "teacher_loss": 0.15246069431304932 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.5755200982093811, "learning_rate": 1.4137401240803692e-05, "loss": 0.3264, "step": 17671, "teacher_loss": 0.2986907958984375 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.301070898771286, "learning_rate": 1.4135133759580861e-05, "loss": 0.176, "step": 17672, "teacher_loss": 0.16208702325820923 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.9147008657455444, "learning_rate": 1.4132866298186821e-05, "loss": 0.3147, "step": 17673, "teacher_loss": 0.24803461134433746 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.4807908535003662, "learning_rate": 1.4130598856673552e-05, "loss": 0.2223, "step": 17674, "teacher_loss": 0.1935236006975174 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.883996844291687, "learning_rate": 1.4128331435093049e-05, "loss": 0.3523, "step": 17675, "teacher_loss": 0.2932104170322418 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.2951236963272095, "learning_rate": 1.4126064033497293e-05, "loss": 0.213, "step": 17676, "teacher_loss": 0.20385511219501495 }, { "compression_loss": 0.0, "epoch": 3.19, "label_loss": 0.6488451957702637, "learning_rate": 1.412379665193827e-05, "loss": 0.3624, "step": 17677, "teacher_loss": 0.3306090533733368 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.38633355498313904, "learning_rate": 1.4121529290467956e-05, "loss": 0.1792, "step": 17678, "teacher_loss": 0.15614831447601318 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.30094248056411743, "learning_rate": 1.411926194913835e-05, "loss": 0.1903, "step": 17679, "teacher_loss": 0.17803940176963806 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.16600218415260315, "learning_rate": 1.4116994628001424e-05, "loss": 0.2045, "step": 17680, "teacher_loss": 0.20878452062606812 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.19084596633911133, "learning_rate": 1.411472732710916e-05, "loss": 0.2012, "step": 17681, "teacher_loss": 0.2023848295211792 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.20804446935653687, "learning_rate": 1.4112460046513547e-05, "loss": 0.2054, "step": 17682, "teacher_loss": 0.20508253574371338 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.2727295756340027, "learning_rate": 1.4110192786266564e-05, "loss": 0.2159, "step": 17683, "teacher_loss": 0.20959827303886414 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 1.2126994132995605, "learning_rate": 1.4107925546420191e-05, "loss": 0.4428, "step": 17684, "teacher_loss": 0.35727572441101074 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.6570178270339966, "learning_rate": 1.4105658327026413e-05, "loss": 0.2816, "step": 17685, "teacher_loss": 0.23986339569091797 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.18651720881462097, "learning_rate": 1.4103391128137208e-05, "loss": 0.1791, "step": 17686, "teacher_loss": 0.1782565861940384 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.8153842687606812, "learning_rate": 1.4101123949804553e-05, "loss": 0.4564, "step": 17687, "teacher_loss": 0.4164618253707886 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.26899033784866333, "learning_rate": 1.4098856792080434e-05, "loss": 0.229, "step": 17688, "teacher_loss": 0.22460463643074036 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.17461831867694855, "learning_rate": 1.4096589655016827e-05, "loss": 0.206, "step": 17689, "teacher_loss": 0.20951685309410095 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.23320603370666504, "learning_rate": 1.4094322538665707e-05, "loss": 0.1781, "step": 17690, "teacher_loss": 0.17200851440429688 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.47878435254096985, "learning_rate": 1.4092055443079059e-05, "loss": 0.2868, "step": 17691, "teacher_loss": 0.265491783618927 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.4098367691040039, "learning_rate": 1.4089788368308862e-05, "loss": 0.2362, "step": 17692, "teacher_loss": 0.21688339114189148 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.4018549621105194, "learning_rate": 1.4087521314407081e-05, "loss": 0.2049, "step": 17693, "teacher_loss": 0.18304800987243652 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.400320827960968, "learning_rate": 1.4085254281425707e-05, "loss": 0.2331, "step": 17694, "teacher_loss": 0.21457022428512573 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.8185330033302307, "learning_rate": 1.4082987269416708e-05, "loss": 0.378, "step": 17695, "teacher_loss": 0.32909512519836426 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.6196879148483276, "learning_rate": 1.4080720278432056e-05, "loss": 0.2412, "step": 17696, "teacher_loss": 0.19913756847381592 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.5211464762687683, "learning_rate": 1.4078453308523737e-05, "loss": 0.2043, "step": 17697, "teacher_loss": 0.16904941201210022 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.497321218252182, "learning_rate": 1.4076186359743721e-05, "loss": 0.2597, "step": 17698, "teacher_loss": 0.2333253026008606 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.397977352142334, "learning_rate": 1.4073919432143981e-05, "loss": 0.1895, "step": 17699, "teacher_loss": 0.16636788845062256 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.4134765863418579, "learning_rate": 1.4071652525776488e-05, "loss": 0.2006, "step": 17700, "teacher_loss": 0.1769149899482727 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.4349774718284607, "learning_rate": 1.4069385640693226e-05, "loss": 0.2345, "step": 17701, "teacher_loss": 0.21224355697631836 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.30599090456962585, "learning_rate": 1.406711877694616e-05, "loss": 0.2445, "step": 17702, "teacher_loss": 0.23763877153396606 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.37175849080085754, "learning_rate": 1.4064851934587256e-05, "loss": 0.2657, "step": 17703, "teacher_loss": 0.2539531886577606 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.4812345504760742, "learning_rate": 1.4062585113668503e-05, "loss": 0.258, "step": 17704, "teacher_loss": 0.23319700360298157 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 1.0152655839920044, "learning_rate": 1.4060318314241858e-05, "loss": 0.4713, "step": 17705, "teacher_loss": 0.4109044075012207 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.6553852558135986, "learning_rate": 1.4058051536359297e-05, "loss": 0.2487, "step": 17706, "teacher_loss": 0.20351681113243103 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.4203108549118042, "learning_rate": 1.4055784780072792e-05, "loss": 0.2351, "step": 17707, "teacher_loss": 0.21454772353172302 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 1.3231407403945923, "learning_rate": 1.405351804543431e-05, "loss": 0.3969, "step": 17708, "teacher_loss": 0.29393795132637024 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.7056772112846375, "learning_rate": 1.4051251332495819e-05, "loss": 0.4094, "step": 17709, "teacher_loss": 0.3764837384223938 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.6474820375442505, "learning_rate": 1.4048984641309293e-05, "loss": 0.2457, "step": 17710, "teacher_loss": 0.20108479261398315 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.39592161774635315, "learning_rate": 1.4046717971926699e-05, "loss": 0.1769, "step": 17711, "teacher_loss": 0.15252313017845154 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.5343132615089417, "learning_rate": 1.4044451324399999e-05, "loss": 0.3181, "step": 17712, "teacher_loss": 0.29402533173561096 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.20495331287384033, "learning_rate": 1.4042184698781169e-05, "loss": 0.2235, "step": 17713, "teacher_loss": 0.2255142778158188 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.34046465158462524, "learning_rate": 1.4039918095122173e-05, "loss": 0.24, "step": 17714, "teacher_loss": 0.22879615426063538 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.1492111086845398, "learning_rate": 1.4037651513474973e-05, "loss": 0.2264, "step": 17715, "teacher_loss": 0.23500242829322815 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.3030222952365875, "learning_rate": 1.4035384953891541e-05, "loss": 0.2291, "step": 17716, "teacher_loss": 0.22091852128505707 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.21485331654548645, "learning_rate": 1.4033118416423844e-05, "loss": 0.1885, "step": 17717, "teacher_loss": 0.1855505108833313 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.2831752300262451, "learning_rate": 1.4030851901123833e-05, "loss": 0.1632, "step": 17718, "teacher_loss": 0.14986774325370789 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.23836159706115723, "learning_rate": 1.4028585408043491e-05, "loss": 0.2117, "step": 17719, "teacher_loss": 0.20869216322898865 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.3062734603881836, "learning_rate": 1.4026318937234772e-05, "loss": 0.197, "step": 17720, "teacher_loss": 0.18488062918186188 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.3281233608722687, "learning_rate": 1.4024052488749639e-05, "loss": 0.1921, "step": 17721, "teacher_loss": 0.177039235830307 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.059197455644607544, "learning_rate": 1.4021786062640053e-05, "loss": 0.1656, "step": 17722, "teacher_loss": 0.17742754518985748 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.1570141762495041, "learning_rate": 1.4019519658957984e-05, "loss": 0.1593, "step": 17723, "teacher_loss": 0.15955600142478943 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.404154896736145, "learning_rate": 1.401725327775539e-05, "loss": 0.2089, "step": 17724, "teacher_loss": 0.1872521936893463 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.3694135844707489, "learning_rate": 1.4014986919084228e-05, "loss": 0.234, "step": 17725, "teacher_loss": 0.21898458898067474 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.1793111264705658, "learning_rate": 1.4012720582996466e-05, "loss": 0.1449, "step": 17726, "teacher_loss": 0.14107060432434082 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.34231704473495483, "learning_rate": 1.4010454269544064e-05, "loss": 0.2774, "step": 17727, "teacher_loss": 0.2701775133609772 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.35584425926208496, "learning_rate": 1.400818797877897e-05, "loss": 0.2467, "step": 17728, "teacher_loss": 0.23459625244140625 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.26902374625205994, "learning_rate": 1.4005921710753159e-05, "loss": 0.1591, "step": 17729, "teacher_loss": 0.14692333340644836 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.5736459493637085, "learning_rate": 1.4003655465518582e-05, "loss": 0.1924, "step": 17730, "teacher_loss": 0.15006959438323975 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.4083263576030731, "learning_rate": 1.400138924312719e-05, "loss": 0.2416, "step": 17731, "teacher_loss": 0.22303420305252075 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.44486677646636963, "learning_rate": 1.399912304363096e-05, "loss": 0.2068, "step": 17732, "teacher_loss": 0.18037262558937073 }, { "compression_loss": 0.0, "epoch": 3.2, "label_loss": 0.3760298490524292, "learning_rate": 1.3996856867081834e-05, "loss": 0.2479, "step": 17733, "teacher_loss": 0.23361876606941223 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.23757338523864746, "learning_rate": 1.3994590713531768e-05, "loss": 0.1829, "step": 17734, "teacher_loss": 0.17678220570087433 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.2864382863044739, "learning_rate": 1.3992324583032727e-05, "loss": 0.362, "step": 17735, "teacher_loss": 0.37041598558425903 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.5763384103775024, "learning_rate": 1.3990058475636663e-05, "loss": 0.2051, "step": 17736, "teacher_loss": 0.16379867494106293 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.4633319079875946, "learning_rate": 1.3987792391395526e-05, "loss": 0.2256, "step": 17737, "teacher_loss": 0.19915470480918884 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.24617871642112732, "learning_rate": 1.3985526330361277e-05, "loss": 0.2065, "step": 17738, "teacher_loss": 0.2020697295665741 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.7810106873512268, "learning_rate": 1.3983260292585869e-05, "loss": 0.2663, "step": 17739, "teacher_loss": 0.20915773510932922 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.7676599025726318, "learning_rate": 1.3980994278121256e-05, "loss": 0.4112, "step": 17740, "teacher_loss": 0.3715493381023407 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.2399691641330719, "learning_rate": 1.3978728287019381e-05, "loss": 0.2957, "step": 17741, "teacher_loss": 0.30185675621032715 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.9170626401901245, "learning_rate": 1.3976462319332214e-05, "loss": 0.3016, "step": 17742, "teacher_loss": 0.23324422538280487 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.8479702472686768, "learning_rate": 1.3974196375111693e-05, "loss": 0.435, "step": 17743, "teacher_loss": 0.3890647292137146 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.34014949202537537, "learning_rate": 1.3971930454409772e-05, "loss": 0.1435, "step": 17744, "teacher_loss": 0.12165423482656479 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.4077708423137665, "learning_rate": 1.3969664557278405e-05, "loss": 0.2113, "step": 17745, "teacher_loss": 0.18946444988250732 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.16778525710105896, "learning_rate": 1.3967398683769543e-05, "loss": 0.1694, "step": 17746, "teacher_loss": 0.16960649192333221 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.19429773092269897, "learning_rate": 1.3965132833935126e-05, "loss": 0.1507, "step": 17747, "teacher_loss": 0.145864337682724 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.5520732998847961, "learning_rate": 1.3962867007827118e-05, "loss": 0.3056, "step": 17748, "teacher_loss": 0.27824074029922485 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.4759901762008667, "learning_rate": 1.3960601205497456e-05, "loss": 0.5577, "step": 17749, "teacher_loss": 0.566731333732605 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.23135754466056824, "learning_rate": 1.3958335426998091e-05, "loss": 0.235, "step": 17750, "teacher_loss": 0.23542466759681702 }, { "epoch": 3.21, "eval_exact_match": 79.90539262062441, "eval_f1": 87.3797160961695, "step": 17750 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.82684326171875, "learning_rate": 1.3956069672380974e-05, "loss": 0.8044, "step": 17751, "teacher_loss": 0.8018832206726074 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.39377492666244507, "learning_rate": 1.3953803941698054e-05, "loss": 0.25, "step": 17752, "teacher_loss": 0.23407380282878876 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.33119285106658936, "learning_rate": 1.3951538235001262e-05, "loss": 0.1821, "step": 17753, "teacher_loss": 0.16549348831176758 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.7300830483436584, "learning_rate": 1.3949272552342564e-05, "loss": 0.3055, "step": 17754, "teacher_loss": 0.2582813501358032 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.7702221870422363, "learning_rate": 1.3947006893773894e-05, "loss": 0.2558, "step": 17755, "teacher_loss": 0.19865018129348755 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.07892671972513199, "learning_rate": 1.3944741259347193e-05, "loss": 0.1785, "step": 17756, "teacher_loss": 0.18961171805858612 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.6018067002296448, "learning_rate": 1.3942475649114421e-05, "loss": 0.38, "step": 17757, "teacher_loss": 0.3554080128669739 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.5098625421524048, "learning_rate": 1.3940210063127509e-05, "loss": 0.2096, "step": 17758, "teacher_loss": 0.17625929415225983 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.25803834199905396, "learning_rate": 1.3937944501438398e-05, "loss": 0.2406, "step": 17759, "teacher_loss": 0.23868940770626068 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.21180912852287292, "learning_rate": 1.3935678964099037e-05, "loss": 0.2184, "step": 17760, "teacher_loss": 0.2191643863916397 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.1667906641960144, "learning_rate": 1.393341345116137e-05, "loss": 0.1503, "step": 17761, "teacher_loss": 0.1484241783618927 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.9333224892616272, "learning_rate": 1.3931147962677332e-05, "loss": 0.4977, "step": 17762, "teacher_loss": 0.44930300116539 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.4633907675743103, "learning_rate": 1.3928882498698864e-05, "loss": 0.3167, "step": 17763, "teacher_loss": 0.3004424571990967 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.6299740076065063, "learning_rate": 1.3926617059277913e-05, "loss": 0.529, "step": 17764, "teacher_loss": 0.5177338719367981 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.7085591554641724, "learning_rate": 1.3924351644466418e-05, "loss": 0.2092, "step": 17765, "teacher_loss": 0.1536863148212433 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.44883373379707336, "learning_rate": 1.3922086254316305e-05, "loss": 0.2284, "step": 17766, "teacher_loss": 0.20394131541252136 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.4773971438407898, "learning_rate": 1.391982088887953e-05, "loss": 0.2131, "step": 17767, "teacher_loss": 0.18375900387763977 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.27856025099754333, "learning_rate": 1.3917555548208023e-05, "loss": 0.2092, "step": 17768, "teacher_loss": 0.2014828473329544 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.37856942415237427, "learning_rate": 1.3915290232353719e-05, "loss": 0.2301, "step": 17769, "teacher_loss": 0.21359002590179443 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.29520073533058167, "learning_rate": 1.391302494136856e-05, "loss": 0.2402, "step": 17770, "teacher_loss": 0.2340729981660843 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.3753064274787903, "learning_rate": 1.3910759675304478e-05, "loss": 0.2574, "step": 17771, "teacher_loss": 0.24427062273025513 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.13529081642627716, "learning_rate": 1.3908494434213411e-05, "loss": 0.1777, "step": 17772, "teacher_loss": 0.1824117749929428 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.8264325857162476, "learning_rate": 1.3906229218147296e-05, "loss": 0.3665, "step": 17773, "teacher_loss": 0.3153989911079407 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.6421117782592773, "learning_rate": 1.3903964027158066e-05, "loss": 0.381, "step": 17774, "teacher_loss": 0.3519960641860962 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.37415218353271484, "learning_rate": 1.390169886129765e-05, "loss": 0.1854, "step": 17775, "teacher_loss": 0.16437961161136627 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.7798163890838623, "learning_rate": 1.389943372061799e-05, "loss": 0.3087, "step": 17776, "teacher_loss": 0.25636786222457886 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.6778844594955444, "learning_rate": 1.389716860517102e-05, "loss": 0.4921, "step": 17777, "teacher_loss": 0.47141796350479126 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.7336118221282959, "learning_rate": 1.3894903515008657e-05, "loss": 0.2345, "step": 17778, "teacher_loss": 0.17899875342845917 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.5994202494621277, "learning_rate": 1.3892638450182852e-05, "loss": 0.204, "step": 17779, "teacher_loss": 0.16002686321735382 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.20934957265853882, "learning_rate": 1.3890373410745525e-05, "loss": 0.1766, "step": 17780, "teacher_loss": 0.17296954989433289 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.25799915194511414, "learning_rate": 1.388810839674861e-05, "loss": 0.148, "step": 17781, "teacher_loss": 0.13580778241157532 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.7298446893692017, "learning_rate": 1.3885843408244032e-05, "loss": 0.3167, "step": 17782, "teacher_loss": 0.27077868580818176 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.1925753951072693, "learning_rate": 1.3883578445283726e-05, "loss": 0.1501, "step": 17783, "teacher_loss": 0.14532917737960815 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.13934828341007233, "learning_rate": 1.3881313507919618e-05, "loss": 0.1928, "step": 17784, "teacher_loss": 0.1987382173538208 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.5044597387313843, "learning_rate": 1.3879048596203637e-05, "loss": 0.246, "step": 17785, "teacher_loss": 0.21726396679878235 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.33396244049072266, "learning_rate": 1.3876783710187711e-05, "loss": 0.2013, "step": 17786, "teacher_loss": 0.18651892244815826 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.26629796624183655, "learning_rate": 1.3874518849923769e-05, "loss": 0.1879, "step": 17787, "teacher_loss": 0.17923521995544434 }, { "compression_loss": 0.0, "epoch": 3.21, "label_loss": 0.9082004427909851, "learning_rate": 1.3872254015463733e-05, "loss": 0.2493, "step": 17788, "teacher_loss": 0.1761389523744583 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.3509395718574524, "learning_rate": 1.3869989206859533e-05, "loss": 0.2243, "step": 17789, "teacher_loss": 0.21019186079502106 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.5572032928466797, "learning_rate": 1.3867724424163097e-05, "loss": 0.2458, "step": 17790, "teacher_loss": 0.2111891657114029 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.3161216676235199, "learning_rate": 1.3865459667426335e-05, "loss": 0.2275, "step": 17791, "teacher_loss": 0.2176235467195511 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.36654701828956604, "learning_rate": 1.386319493670119e-05, "loss": 0.2477, "step": 17792, "teacher_loss": 0.23445579409599304 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.37016183137893677, "learning_rate": 1.3860930232039575e-05, "loss": 0.1863, "step": 17793, "teacher_loss": 0.16586801409721375 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.2387593686580658, "learning_rate": 1.3858665553493411e-05, "loss": 0.2156, "step": 17794, "teacher_loss": 0.21303579211235046 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 1.050684928894043, "learning_rate": 1.3856400901114627e-05, "loss": 0.4766, "step": 17795, "teacher_loss": 0.4128328561782837 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.2775558531284332, "learning_rate": 1.385413627495514e-05, "loss": 0.1774, "step": 17796, "teacher_loss": 0.1662633717060089 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.3065398931503296, "learning_rate": 1.3851871675066873e-05, "loss": 0.3035, "step": 17797, "teacher_loss": 0.30317962169647217 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.18469856679439545, "learning_rate": 1.3849607101501748e-05, "loss": 0.1597, "step": 17798, "teacher_loss": 0.1569458693265915 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.6691349744796753, "learning_rate": 1.3847342554311681e-05, "loss": 0.2884, "step": 17799, "teacher_loss": 0.2460532784461975 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.17319554090499878, "learning_rate": 1.3845078033548592e-05, "loss": 0.1951, "step": 17800, "teacher_loss": 0.19758352637290955 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.9470968246459961, "learning_rate": 1.3842813539264405e-05, "loss": 0.3105, "step": 17801, "teacher_loss": 0.23980209231376648 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.6728264093399048, "learning_rate": 1.3840549071511036e-05, "loss": 0.2987, "step": 17802, "teacher_loss": 0.2570805251598358 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.6264507174491882, "learning_rate": 1.3838284630340398e-05, "loss": 0.2158, "step": 17803, "teacher_loss": 0.17012710869312286 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.5300797820091248, "learning_rate": 1.3836020215804407e-05, "loss": 0.3452, "step": 17804, "teacher_loss": 0.3246801495552063 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.496523916721344, "learning_rate": 1.3833755827954985e-05, "loss": 0.2112, "step": 17805, "teacher_loss": 0.17947755753993988 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.40019291639328003, "learning_rate": 1.3831491466844047e-05, "loss": 0.2001, "step": 17806, "teacher_loss": 0.17790161073207855 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.2955458462238312, "learning_rate": 1.3829227132523502e-05, "loss": 0.2167, "step": 17807, "teacher_loss": 0.20796501636505127 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.4099975824356079, "learning_rate": 1.3826962825045272e-05, "loss": 0.2128, "step": 17808, "teacher_loss": 0.19083893299102783 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.3059951663017273, "learning_rate": 1.3824698544461268e-05, "loss": 0.1935, "step": 17809, "teacher_loss": 0.18097171187400818 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.4173629879951477, "learning_rate": 1.3822434290823399e-05, "loss": 0.2493, "step": 17810, "teacher_loss": 0.23062361776828766 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.2537233829498291, "learning_rate": 1.3820170064183584e-05, "loss": 0.1709, "step": 17811, "teacher_loss": 0.16165202856063843 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.22788269817829132, "learning_rate": 1.3817905864593733e-05, "loss": 0.1496, "step": 17812, "teacher_loss": 0.14094901084899902 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.4514191746711731, "learning_rate": 1.3815641692105753e-05, "loss": 0.2601, "step": 17813, "teacher_loss": 0.23888617753982544 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.38667869567871094, "learning_rate": 1.3813377546771561e-05, "loss": 0.288, "step": 17814, "teacher_loss": 0.2770565152168274 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.4020824432373047, "learning_rate": 1.3811113428643067e-05, "loss": 0.3178, "step": 17815, "teacher_loss": 0.3084867000579834 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.25023317337036133, "learning_rate": 1.380884933777217e-05, "loss": 0.2005, "step": 17816, "teacher_loss": 0.19499355554580688 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.37997233867645264, "learning_rate": 1.3806585274210794e-05, "loss": 0.2496, "step": 17817, "teacher_loss": 0.23506735265254974 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.7976357936859131, "learning_rate": 1.3804321238010837e-05, "loss": 0.3665, "step": 17818, "teacher_loss": 0.3185747265815735 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.7212059497833252, "learning_rate": 1.3802057229224206e-05, "loss": 0.3755, "step": 17819, "teacher_loss": 0.3371182978153229 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.4705544114112854, "learning_rate": 1.3799793247902814e-05, "loss": 0.1691, "step": 17820, "teacher_loss": 0.13560077548027039 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.38171130418777466, "learning_rate": 1.3797529294098564e-05, "loss": 0.2134, "step": 17821, "teacher_loss": 0.19466093182563782 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.31826701760292053, "learning_rate": 1.379526536786336e-05, "loss": 0.2296, "step": 17822, "teacher_loss": 0.2197890281677246 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.3092745542526245, "learning_rate": 1.3793001469249112e-05, "loss": 0.1459, "step": 17823, "teacher_loss": 0.12775088846683502 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.3573278486728668, "learning_rate": 1.3790737598307722e-05, "loss": 0.1503, "step": 17824, "teacher_loss": 0.1273033618927002 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 1.042050838470459, "learning_rate": 1.3788473755091097e-05, "loss": 0.3295, "step": 17825, "teacher_loss": 0.2503780722618103 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.7646037340164185, "learning_rate": 1.3786209939651124e-05, "loss": 0.3309, "step": 17826, "teacher_loss": 0.2826780676841736 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.6198161840438843, "learning_rate": 1.378394615203973e-05, "loss": 0.2434, "step": 17827, "teacher_loss": 0.20158883929252625 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.8396542072296143, "learning_rate": 1.3781682392308801e-05, "loss": 0.3676, "step": 17828, "teacher_loss": 0.31516358256340027 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.4323810338973999, "learning_rate": 1.3779418660510237e-05, "loss": 0.2077, "step": 17829, "teacher_loss": 0.18271008133888245 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.7173413038253784, "learning_rate": 1.377715495669595e-05, "loss": 0.2327, "step": 17830, "teacher_loss": 0.17882099747657776 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.4369521737098694, "learning_rate": 1.3774891280917831e-05, "loss": 0.2385, "step": 17831, "teacher_loss": 0.21650457382202148 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.5975555181503296, "learning_rate": 1.377262763322778e-05, "loss": 0.2396, "step": 17832, "teacher_loss": 0.19982370734214783 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.4877334237098694, "learning_rate": 1.3770364013677701e-05, "loss": 0.2167, "step": 17833, "teacher_loss": 0.18654392659664154 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.496481329202652, "learning_rate": 1.376810042231949e-05, "loss": 0.1752, "step": 17834, "teacher_loss": 0.13955262303352356 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.4529290497303009, "learning_rate": 1.3765836859205036e-05, "loss": 0.2489, "step": 17835, "teacher_loss": 0.22623030841350555 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.258700966835022, "learning_rate": 1.3763573324386247e-05, "loss": 0.2224, "step": 17836, "teacher_loss": 0.21837686002254486 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.37753772735595703, "learning_rate": 1.3761309817915017e-05, "loss": 0.204, "step": 17837, "teacher_loss": 0.184768408536911 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 1.0870299339294434, "learning_rate": 1.3759046339843233e-05, "loss": 0.2382, "step": 17838, "teacher_loss": 0.14393766224384308 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.5531726479530334, "learning_rate": 1.37567828902228e-05, "loss": 0.2982, "step": 17839, "teacher_loss": 0.26991429924964905 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.38269758224487305, "learning_rate": 1.3754519469105612e-05, "loss": 0.2805, "step": 17840, "teacher_loss": 0.2691340744495392 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.2569577097892761, "learning_rate": 1.3752256076543549e-05, "loss": 0.288, "step": 17841, "teacher_loss": 0.29143622517585754 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.2525979280471802, "learning_rate": 1.374999271258852e-05, "loss": 0.2363, "step": 17842, "teacher_loss": 0.23447683453559875 }, { "compression_loss": 0.0, "epoch": 3.22, "label_loss": 0.5004053115844727, "learning_rate": 1.374772937729241e-05, "loss": 0.2449, "step": 17843, "teacher_loss": 0.21653254330158234 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.21672698855400085, "learning_rate": 1.374546607070711e-05, "loss": 0.1447, "step": 17844, "teacher_loss": 0.13671648502349854 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.38864701986312866, "learning_rate": 1.3743202792884509e-05, "loss": 0.3385, "step": 17845, "teacher_loss": 0.33290863037109375 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.377738893032074, "learning_rate": 1.3740939543876504e-05, "loss": 0.2155, "step": 17846, "teacher_loss": 0.19750091433525085 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.5593780279159546, "learning_rate": 1.3738676323734978e-05, "loss": 0.2921, "step": 17847, "teacher_loss": 0.2623681426048279 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.280231773853302, "learning_rate": 1.373641313251182e-05, "loss": 0.1934, "step": 17848, "teacher_loss": 0.18378078937530518 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.3979759216308594, "learning_rate": 1.3734149970258925e-05, "loss": 0.2608, "step": 17849, "teacher_loss": 0.2455916404724121 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.361625611782074, "learning_rate": 1.3731886837028177e-05, "loss": 0.2266, "step": 17850, "teacher_loss": 0.2116299420595169 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.24727371335029602, "learning_rate": 1.3729623732871452e-05, "loss": 0.2299, "step": 17851, "teacher_loss": 0.22796601057052612 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.4785187542438507, "learning_rate": 1.3727360657840658e-05, "loss": 0.2486, "step": 17852, "teacher_loss": 0.22303897142410278 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.42806005477905273, "learning_rate": 1.3725097611987664e-05, "loss": 0.2152, "step": 17853, "teacher_loss": 0.1915723830461502 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.7730212211608887, "learning_rate": 1.3722834595364356e-05, "loss": 0.3504, "step": 17854, "teacher_loss": 0.3034707307815552 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.2793616056442261, "learning_rate": 1.3720571608022626e-05, "loss": 0.1912, "step": 17855, "teacher_loss": 0.1813506782054901 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.35565051436424255, "learning_rate": 1.3718308650014352e-05, "loss": 0.2278, "step": 17856, "teacher_loss": 0.21363089978694916 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.22848287224769592, "learning_rate": 1.3716045721391415e-05, "loss": 0.2843, "step": 17857, "teacher_loss": 0.29054874181747437 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.6709418296813965, "learning_rate": 1.3713782822205703e-05, "loss": 0.2619, "step": 17858, "teacher_loss": 0.21644198894500732 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.2802004814147949, "learning_rate": 1.3711519952509096e-05, "loss": 0.3008, "step": 17859, "teacher_loss": 0.3030715584754944 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.9301304221153259, "learning_rate": 1.3709257112353469e-05, "loss": 0.3686, "step": 17860, "teacher_loss": 0.3061832785606384 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.27422329783439636, "learning_rate": 1.3706994301790708e-05, "loss": 0.1946, "step": 17861, "teacher_loss": 0.18570773303508759 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.29091331362724304, "learning_rate": 1.3704731520872693e-05, "loss": 0.2294, "step": 17862, "teacher_loss": 0.2225547432899475 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.6820839643478394, "learning_rate": 1.3702468769651297e-05, "loss": 0.2574, "step": 17863, "teacher_loss": 0.21026533842086792 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.39962050318717957, "learning_rate": 1.3700206048178407e-05, "loss": 0.219, "step": 17864, "teacher_loss": 0.19894824922084808 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.2082759141921997, "learning_rate": 1.3697943356505897e-05, "loss": 0.1868, "step": 17865, "teacher_loss": 0.18443885445594788 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.4345414638519287, "learning_rate": 1.369568069468564e-05, "loss": 0.175, "step": 17866, "teacher_loss": 0.14616265892982483 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.2150755226612091, "learning_rate": 1.3693418062769508e-05, "loss": 0.1746, "step": 17867, "teacher_loss": 0.17014577984809875 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.22658689320087433, "learning_rate": 1.3691155460809388e-05, "loss": 0.1702, "step": 17868, "teacher_loss": 0.16391071677207947 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.8484402298927307, "learning_rate": 1.3688892888857149e-05, "loss": 0.3013, "step": 17869, "teacher_loss": 0.24049615859985352 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.690902829170227, "learning_rate": 1.3686630346964662e-05, "loss": 0.308, "step": 17870, "teacher_loss": 0.26549142599105835 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.30712515115737915, "learning_rate": 1.3684367835183807e-05, "loss": 0.1609, "step": 17871, "teacher_loss": 0.14463184773921967 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.3385729193687439, "learning_rate": 1.3682105353566453e-05, "loss": 0.2131, "step": 17872, "teacher_loss": 0.1991869956254959 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.6309759616851807, "learning_rate": 1.3679842902164467e-05, "loss": 0.2309, "step": 17873, "teacher_loss": 0.1864413321018219 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.2998085021972656, "learning_rate": 1.367758048102973e-05, "loss": 0.1517, "step": 17874, "teacher_loss": 0.13527345657348633 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.5139168500900269, "learning_rate": 1.3675318090214112e-05, "loss": 0.2419, "step": 17875, "teacher_loss": 0.2117028683423996 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.9159693717956543, "learning_rate": 1.3673055729769467e-05, "loss": 0.3943, "step": 17876, "teacher_loss": 0.3363143503665924 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.7316271066665649, "learning_rate": 1.3670793399747686e-05, "loss": 0.2156, "step": 17877, "teacher_loss": 0.15831011533737183 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.23709836602210999, "learning_rate": 1.3668531100200624e-05, "loss": 0.1891, "step": 17878, "teacher_loss": 0.18372409045696259 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.45341965556144714, "learning_rate": 1.3666268831180144e-05, "loss": 0.2838, "step": 17879, "teacher_loss": 0.2649998664855957 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.29396146535873413, "learning_rate": 1.3664006592738133e-05, "loss": 0.208, "step": 17880, "teacher_loss": 0.19839657843112946 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.5531530380249023, "learning_rate": 1.3661744384926442e-05, "loss": 0.2741, "step": 17881, "teacher_loss": 0.2431270182132721 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.29309001564979553, "learning_rate": 1.3659482207796936e-05, "loss": 0.2432, "step": 17882, "teacher_loss": 0.23762616515159607 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.2827276289463043, "learning_rate": 1.3657220061401486e-05, "loss": 0.2173, "step": 17883, "teacher_loss": 0.21004685759544373 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.41173237562179565, "learning_rate": 1.3654957945791955e-05, "loss": 0.2126, "step": 17884, "teacher_loss": 0.19047416746616364 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.290057897567749, "learning_rate": 1.3652695861020206e-05, "loss": 0.2198, "step": 17885, "teacher_loss": 0.21198277175426483 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.36265161633491516, "learning_rate": 1.36504338071381e-05, "loss": 0.2294, "step": 17886, "teacher_loss": 0.21459153294563293 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.3917011618614197, "learning_rate": 1.36481717841975e-05, "loss": 0.2348, "step": 17887, "teacher_loss": 0.21741682291030884 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.5426660776138306, "learning_rate": 1.3645909792250274e-05, "loss": 0.3107, "step": 17888, "teacher_loss": 0.2848764955997467 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.3115384578704834, "learning_rate": 1.3643647831348267e-05, "loss": 0.2121, "step": 17889, "teacher_loss": 0.20105823874473572 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.3003392219543457, "learning_rate": 1.3641385901543356e-05, "loss": 0.2289, "step": 17890, "teacher_loss": 0.22092482447624207 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.9843560457229614, "learning_rate": 1.3639124002887392e-05, "loss": 0.3292, "step": 17891, "teacher_loss": 0.25640881061553955 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.4312601089477539, "learning_rate": 1.363686213543223e-05, "loss": 0.2768, "step": 17892, "teacher_loss": 0.2596362233161926 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.18807637691497803, "learning_rate": 1.3634600299229735e-05, "loss": 0.2414, "step": 17893, "teacher_loss": 0.2472846657037735 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.11939650028944016, "learning_rate": 1.3632338494331764e-05, "loss": 0.1595, "step": 17894, "teacher_loss": 0.16391196846961975 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.8226872682571411, "learning_rate": 1.3630076720790165e-05, "loss": 0.3156, "step": 17895, "teacher_loss": 0.259303480386734 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 1.1016690731048584, "learning_rate": 1.3627814978656804e-05, "loss": 0.254, "step": 17896, "teacher_loss": 0.15981265902519226 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.19216234982013702, "learning_rate": 1.362555326798353e-05, "loss": 0.1663, "step": 17897, "teacher_loss": 0.16339480876922607 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.26410144567489624, "learning_rate": 1.3623291588822198e-05, "loss": 0.1442, "step": 17898, "teacher_loss": 0.1308397501707077 }, { "compression_loss": 0.0, "epoch": 3.23, "label_loss": 0.2545037269592285, "learning_rate": 1.3621029941224666e-05, "loss": 0.1808, "step": 17899, "teacher_loss": 0.17255869507789612 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.22529344260692596, "learning_rate": 1.3618768325242784e-05, "loss": 0.1762, "step": 17900, "teacher_loss": 0.17073184251785278 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.5310543775558472, "learning_rate": 1.3616506740928393e-05, "loss": 0.3608, "step": 17901, "teacher_loss": 0.34183478355407715 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.20028069615364075, "learning_rate": 1.3614245188333367e-05, "loss": 0.2475, "step": 17902, "teacher_loss": 0.25274956226348877 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.6437517404556274, "learning_rate": 1.3611983667509538e-05, "loss": 0.3608, "step": 17903, "teacher_loss": 0.32940834760665894 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.3183338940143585, "learning_rate": 1.3609722178508758e-05, "loss": 0.2328, "step": 17904, "teacher_loss": 0.2233031541109085 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.2668170928955078, "learning_rate": 1.3607460721382888e-05, "loss": 0.184, "step": 17905, "teacher_loss": 0.17485041916370392 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.6202265024185181, "learning_rate": 1.3605199296183766e-05, "loss": 0.2493, "step": 17906, "teacher_loss": 0.20804864168167114 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.30104145407676697, "learning_rate": 1.3602937902963242e-05, "loss": 0.2255, "step": 17907, "teacher_loss": 0.21705962717533112 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.23740994930267334, "learning_rate": 1.3600676541773162e-05, "loss": 0.1984, "step": 17908, "teacher_loss": 0.19404630362987518 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.6643080115318298, "learning_rate": 1.3598415212665375e-05, "loss": 0.4363, "step": 17909, "teacher_loss": 0.41095784306526184 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.8403022289276123, "learning_rate": 1.3596153915691724e-05, "loss": 0.2469, "step": 17910, "teacher_loss": 0.18102183938026428 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.1629820466041565, "learning_rate": 1.3593892650904052e-05, "loss": 0.1935, "step": 17911, "teacher_loss": 0.1969369351863861 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.4275708496570587, "learning_rate": 1.3591631418354211e-05, "loss": 0.2382, "step": 17912, "teacher_loss": 0.21713140606880188 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.7296366095542908, "learning_rate": 1.358937021809404e-05, "loss": 0.299, "step": 17913, "teacher_loss": 0.2511311173439026 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.46660614013671875, "learning_rate": 1.3587109050175373e-05, "loss": 0.2337, "step": 17914, "teacher_loss": 0.20785781741142273 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.19109860062599182, "learning_rate": 1.3584847914650067e-05, "loss": 0.2088, "step": 17915, "teacher_loss": 0.2108122706413269 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.29618990421295166, "learning_rate": 1.3582586811569954e-05, "loss": 0.2189, "step": 17916, "teacher_loss": 0.2102924883365631 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.5907838344573975, "learning_rate": 1.3580325740986873e-05, "loss": 0.2859, "step": 17917, "teacher_loss": 0.25204408168792725 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.22742050886154175, "learning_rate": 1.3578064702952668e-05, "loss": 0.222, "step": 17918, "teacher_loss": 0.22135965526103973 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.24711500108242035, "learning_rate": 1.3575803697519177e-05, "loss": 0.1955, "step": 17919, "teacher_loss": 0.18978387117385864 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.5944147109985352, "learning_rate": 1.3573542724738233e-05, "loss": 0.5108, "step": 17920, "teacher_loss": 0.5015382170677185 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.41176509857177734, "learning_rate": 1.3571281784661683e-05, "loss": 0.2944, "step": 17921, "teacher_loss": 0.2813221514225006 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.3666585385799408, "learning_rate": 1.3569020877341356e-05, "loss": 0.2317, "step": 17922, "teacher_loss": 0.21672730147838593 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.49456626176834106, "learning_rate": 1.3566760002829088e-05, "loss": 0.3047, "step": 17923, "teacher_loss": 0.28362077474594116 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.33030563592910767, "learning_rate": 1.3564499161176718e-05, "loss": 0.2266, "step": 17924, "teacher_loss": 0.21506813168525696 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.3109801709651947, "learning_rate": 1.356223835243608e-05, "loss": 0.2203, "step": 17925, "teacher_loss": 0.21024981141090393 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.9230259656906128, "learning_rate": 1.3559977576659e-05, "loss": 0.5323, "step": 17926, "teacher_loss": 0.48883843421936035 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.3097284436225891, "learning_rate": 1.3557716833897326e-05, "loss": 0.291, "step": 17927, "teacher_loss": 0.288865327835083 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.2568824887275696, "learning_rate": 1.3555456124202876e-05, "loss": 0.2158, "step": 17928, "teacher_loss": 0.2112794816493988 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.43142515420913696, "learning_rate": 1.3553195447627486e-05, "loss": 0.2053, "step": 17929, "teacher_loss": 0.1802024096250534 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.4834776818752289, "learning_rate": 1.3550934804222983e-05, "loss": 0.1857, "step": 17930, "teacher_loss": 0.15262514352798462 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 1.0416516065597534, "learning_rate": 1.3548674194041205e-05, "loss": 0.3945, "step": 17931, "teacher_loss": 0.3226143717765808 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.47691306471824646, "learning_rate": 1.3546413617133977e-05, "loss": 0.2369, "step": 17932, "teacher_loss": 0.21026135981082916 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.3165375590324402, "learning_rate": 1.3544153073553122e-05, "loss": 0.2342, "step": 17933, "teacher_loss": 0.2250521183013916 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.33222872018814087, "learning_rate": 1.3541892563350476e-05, "loss": 0.2721, "step": 17934, "teacher_loss": 0.26539868116378784 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.3754420280456543, "learning_rate": 1.3539632086577862e-05, "loss": 0.2705, "step": 17935, "teacher_loss": 0.258821576833725 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.41611868143081665, "learning_rate": 1.3537371643287103e-05, "loss": 0.334, "step": 17936, "teacher_loss": 0.32491499185562134 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.5634071826934814, "learning_rate": 1.3535111233530028e-05, "loss": 0.2851, "step": 17937, "teacher_loss": 0.25421175360679626 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.7406641840934753, "learning_rate": 1.3532850857358467e-05, "loss": 0.212, "step": 17938, "teacher_loss": 0.15324822068214417 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.32580795884132385, "learning_rate": 1.3530590514824226e-05, "loss": 0.1646, "step": 17939, "teacher_loss": 0.1467362642288208 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.2023438811302185, "learning_rate": 1.3528330205979149e-05, "loss": 0.1709, "step": 17940, "teacher_loss": 0.1673606038093567 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.5118176341056824, "learning_rate": 1.3526069930875044e-05, "loss": 0.3031, "step": 17941, "teacher_loss": 0.27990370988845825 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.8362551927566528, "learning_rate": 1.3523809689563733e-05, "loss": 0.391, "step": 17942, "teacher_loss": 0.34148338437080383 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.35762929916381836, "learning_rate": 1.3521549482097045e-05, "loss": 0.221, "step": 17943, "teacher_loss": 0.20583957433700562 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.28415733575820923, "learning_rate": 1.3519289308526792e-05, "loss": 0.1669, "step": 17944, "teacher_loss": 0.153926283121109 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.7693701982498169, "learning_rate": 1.3517029168904796e-05, "loss": 0.321, "step": 17945, "teacher_loss": 0.27118054032325745 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.5445525050163269, "learning_rate": 1.3514769063282875e-05, "loss": 0.3355, "step": 17946, "teacher_loss": 0.3122883439064026 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.3659040629863739, "learning_rate": 1.3512508991712848e-05, "loss": 0.1529, "step": 17947, "teacher_loss": 0.12917840480804443 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.47576749324798584, "learning_rate": 1.3510248954246532e-05, "loss": 0.1964, "step": 17948, "teacher_loss": 0.1653842329978943 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 1.0991228818893433, "learning_rate": 1.3507988950935733e-05, "loss": 0.4157, "step": 17949, "teacher_loss": 0.33978402614593506 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.7395064830780029, "learning_rate": 1.3505728981832285e-05, "loss": 0.3408, "step": 17950, "teacher_loss": 0.29649513959884644 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.24430668354034424, "learning_rate": 1.3503469046987983e-05, "loss": 0.2416, "step": 17951, "teacher_loss": 0.24131101369857788 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.9090707302093506, "learning_rate": 1.3501209146454651e-05, "loss": 0.4204, "step": 17952, "teacher_loss": 0.366134375333786 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.4414246678352356, "learning_rate": 1.34989492802841e-05, "loss": 0.2309, "step": 17953, "teacher_loss": 0.20747947692871094 }, { "compression_loss": 0.0, "epoch": 3.24, "label_loss": 0.35907846689224243, "learning_rate": 1.3496689448528143e-05, "loss": 0.2822, "step": 17954, "teacher_loss": 0.2736409306526184 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.19814419746398926, "learning_rate": 1.3494429651238585e-05, "loss": 0.1854, "step": 17955, "teacher_loss": 0.1839936226606369 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.2181391417980194, "learning_rate": 1.3492169888467246e-05, "loss": 0.1725, "step": 17956, "teacher_loss": 0.1674099564552307 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.46645861864089966, "learning_rate": 1.348991016026593e-05, "loss": 0.2356, "step": 17957, "teacher_loss": 0.2099272459745407 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.4819023609161377, "learning_rate": 1.3487650466686442e-05, "loss": 0.2209, "step": 17958, "teacher_loss": 0.1919083148241043 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.5197563171386719, "learning_rate": 1.3485390807780601e-05, "loss": 0.2245, "step": 17959, "teacher_loss": 0.19174908101558685 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.440390408039093, "learning_rate": 1.3483131183600205e-05, "loss": 0.1675, "step": 17960, "teacher_loss": 0.1371985524892807 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.33385708928108215, "learning_rate": 1.348087159419706e-05, "loss": 0.3438, "step": 17961, "teacher_loss": 0.3448812961578369 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.46897149085998535, "learning_rate": 1.347861203962298e-05, "loss": 0.2089, "step": 17962, "teacher_loss": 0.18005365133285522 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.3958002030849457, "learning_rate": 1.3476352519929766e-05, "loss": 0.2512, "step": 17963, "teacher_loss": 0.23509825766086578 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.4387205243110657, "learning_rate": 1.3474093035169212e-05, "loss": 0.3714, "step": 17964, "teacher_loss": 0.3639586269855499 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.18875867128372192, "learning_rate": 1.3471833585393139e-05, "loss": 0.2247, "step": 17965, "teacher_loss": 0.228724867105484 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.4327235817909241, "learning_rate": 1.3469574170653337e-05, "loss": 0.2545, "step": 17966, "teacher_loss": 0.23472946882247925 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.17957806587219238, "learning_rate": 1.3467314791001608e-05, "loss": 0.1873, "step": 17967, "teacher_loss": 0.18815457820892334 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.8084374666213989, "learning_rate": 1.3465055446489757e-05, "loss": 0.5776, "step": 17968, "teacher_loss": 0.5519437193870544 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.25457704067230225, "learning_rate": 1.3462796137169584e-05, "loss": 0.1875, "step": 17969, "teacher_loss": 0.1800491213798523 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 1.3587530851364136, "learning_rate": 1.3460536863092888e-05, "loss": 0.455, "step": 17970, "teacher_loss": 0.354530394077301 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.8940027952194214, "learning_rate": 1.3458277624311461e-05, "loss": 0.3136, "step": 17971, "teacher_loss": 0.24912425875663757 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.841683030128479, "learning_rate": 1.3456018420877111e-05, "loss": 0.3592, "step": 17972, "teacher_loss": 0.3055430054664612 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.24170561134815216, "learning_rate": 1.345375925284163e-05, "loss": 0.1943, "step": 17973, "teacher_loss": 0.18905138969421387 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.24589915573596954, "learning_rate": 1.3451500120256806e-05, "loss": 0.2806, "step": 17974, "teacher_loss": 0.284446120262146 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.5890187621116638, "learning_rate": 1.344924102317445e-05, "loss": 0.2192, "step": 17975, "teacher_loss": 0.17814594507217407 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.43649035692214966, "learning_rate": 1.3446981961646346e-05, "loss": 0.2342, "step": 17976, "teacher_loss": 0.21174272894859314 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.42006468772888184, "learning_rate": 1.3444722935724287e-05, "loss": 0.2096, "step": 17977, "teacher_loss": 0.18619731068611145 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.8843041062355042, "learning_rate": 1.3442463945460069e-05, "loss": 0.3347, "step": 17978, "teacher_loss": 0.2736354470252991 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.5869308114051819, "learning_rate": 1.3440204990905483e-05, "loss": 0.3299, "step": 17979, "teacher_loss": 0.3013291656970978 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.6212499737739563, "learning_rate": 1.3437946072112317e-05, "loss": 0.2545, "step": 17980, "teacher_loss": 0.21378956735134125 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.8992911577224731, "learning_rate": 1.343568718913237e-05, "loss": 0.3675, "step": 17981, "teacher_loss": 0.30844783782958984 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.3091692328453064, "learning_rate": 1.3433428342017422e-05, "loss": 0.2503, "step": 17982, "teacher_loss": 0.24377751350402832 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.5269281268119812, "learning_rate": 1.3431169530819264e-05, "loss": 0.3066, "step": 17983, "teacher_loss": 0.2821333408355713 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.5792193412780762, "learning_rate": 1.3428910755589688e-05, "loss": 0.2335, "step": 17984, "teacher_loss": 0.19503188133239746 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.4550713896751404, "learning_rate": 1.3426652016380479e-05, "loss": 0.2089, "step": 17985, "teacher_loss": 0.18155571818351746 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.5396783351898193, "learning_rate": 1.3424393313243418e-05, "loss": 0.2623, "step": 17986, "teacher_loss": 0.23149323463439941 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.42046624422073364, "learning_rate": 1.3422134646230298e-05, "loss": 0.202, "step": 17987, "teacher_loss": 0.17768613994121552 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.5487052202224731, "learning_rate": 1.3419876015392904e-05, "loss": 0.4206, "step": 17988, "teacher_loss": 0.4063110053539276 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.26007553935050964, "learning_rate": 1.341761742078301e-05, "loss": 0.1729, "step": 17989, "teacher_loss": 0.16318368911743164 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.3510127067565918, "learning_rate": 1.3415358862452403e-05, "loss": 0.1929, "step": 17990, "teacher_loss": 0.17536193132400513 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.20279119908809662, "learning_rate": 1.3413100340452869e-05, "loss": 0.1953, "step": 17991, "teacher_loss": 0.1944923996925354 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.5365443825721741, "learning_rate": 1.3410841854836185e-05, "loss": 0.208, "step": 17992, "teacher_loss": 0.1714564710855484 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.7745158076286316, "learning_rate": 1.340858340565413e-05, "loss": 0.2982, "step": 17993, "teacher_loss": 0.24531300365924835 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.4842342734336853, "learning_rate": 1.340632499295849e-05, "loss": 0.2627, "step": 17994, "teacher_loss": 0.23804882168769836 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.33197221159935, "learning_rate": 1.340406661680104e-05, "loss": 0.2458, "step": 17995, "teacher_loss": 0.23621074855327606 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.9307555556297302, "learning_rate": 1.3401808277233555e-05, "loss": 0.2615, "step": 17996, "teacher_loss": 0.18708816170692444 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.6056849956512451, "learning_rate": 1.3399549974307816e-05, "loss": 0.2572, "step": 17997, "teacher_loss": 0.21846500039100647 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.3170185089111328, "learning_rate": 1.3397291708075602e-05, "loss": 0.3039, "step": 17998, "teacher_loss": 0.30240270495414734 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.4331493377685547, "learning_rate": 1.3395033478588675e-05, "loss": 0.3011, "step": 17999, "teacher_loss": 0.28642538189888 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.355377197265625, "learning_rate": 1.3392775285898827e-05, "loss": 0.1787, "step": 18000, "teacher_loss": 0.15907607972621918 }, { "epoch": 3.25, "eval_exact_match": 80.0, "eval_f1": 87.35321118500602, "step": 18000 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.21056440472602844, "learning_rate": 1.339051713005782e-05, "loss": 0.2549, "step": 18001, "teacher_loss": 0.25985103845596313 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.19201895594596863, "learning_rate": 1.3388259011117424e-05, "loss": 0.1825, "step": 18002, "teacher_loss": 0.18148615956306458 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.10029195249080658, "learning_rate": 1.3386000929129425e-05, "loss": 0.1726, "step": 18003, "teacher_loss": 0.1805967390537262 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.4660463035106659, "learning_rate": 1.3383742884145584e-05, "loss": 0.217, "step": 18004, "teacher_loss": 0.18936866521835327 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.27851828932762146, "learning_rate": 1.3381484876217669e-05, "loss": 0.1678, "step": 18005, "teacher_loss": 0.15549078583717346 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.5409957766532898, "learning_rate": 1.3379226905397456e-05, "loss": 0.217, "step": 18006, "teacher_loss": 0.18100020289421082 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.3592095375061035, "learning_rate": 1.337696897173671e-05, "loss": 0.289, "step": 18007, "teacher_loss": 0.28115957975387573 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.3841264843940735, "learning_rate": 1.3374711075287198e-05, "loss": 0.2631, "step": 18008, "teacher_loss": 0.2496688961982727 }, { "compression_loss": 0.0, "epoch": 3.25, "label_loss": 0.34814420342445374, "learning_rate": 1.337245321610069e-05, "loss": 0.2301, "step": 18009, "teacher_loss": 0.21702076494693756 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.46644219756126404, "learning_rate": 1.3370195394228952e-05, "loss": 0.2608, "step": 18010, "teacher_loss": 0.2379295378923416 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.1959199607372284, "learning_rate": 1.3367937609723749e-05, "loss": 0.1964, "step": 18011, "teacher_loss": 0.19647538661956787 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.5666648149490356, "learning_rate": 1.3365679862636833e-05, "loss": 0.4057, "step": 18012, "teacher_loss": 0.3877698481082916 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.6380306482315063, "learning_rate": 1.3363422153019988e-05, "loss": 0.2272, "step": 18013, "teacher_loss": 0.18155093491077423 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.3806297779083252, "learning_rate": 1.3361164480924965e-05, "loss": 0.2581, "step": 18014, "teacher_loss": 0.24453939497470856 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.2186947762966156, "learning_rate": 1.3358906846403522e-05, "loss": 0.2245, "step": 18015, "teacher_loss": 0.2251451015472412 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.39550304412841797, "learning_rate": 1.3356649249507428e-05, "loss": 0.2833, "step": 18016, "teacher_loss": 0.2707808315753937 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 1.2079740762710571, "learning_rate": 1.3354391690288439e-05, "loss": 0.2558, "step": 18017, "teacher_loss": 0.15005674958229065 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.4492696523666382, "learning_rate": 1.3352134168798311e-05, "loss": 0.2227, "step": 18018, "teacher_loss": 0.19756457209587097 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.2646927833557129, "learning_rate": 1.3349876685088811e-05, "loss": 0.2221, "step": 18019, "teacher_loss": 0.21738159656524658 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.4837474226951599, "learning_rate": 1.3347619239211692e-05, "loss": 0.2339, "step": 18020, "teacher_loss": 0.2061486840248108 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.3486769199371338, "learning_rate": 1.3345361831218704e-05, "loss": 0.1858, "step": 18021, "teacher_loss": 0.1677398681640625 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.7297282218933105, "learning_rate": 1.3343104461161613e-05, "loss": 0.2969, "step": 18022, "teacher_loss": 0.2487574815750122 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.5716339945793152, "learning_rate": 1.3340847129092173e-05, "loss": 0.4358, "step": 18023, "teacher_loss": 0.42072850465774536 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.5458360910415649, "learning_rate": 1.3338589835062123e-05, "loss": 0.3033, "step": 18024, "teacher_loss": 0.2763892412185669 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.22596397995948792, "learning_rate": 1.3336332579123238e-05, "loss": 0.2007, "step": 18025, "teacher_loss": 0.19794821739196777 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.9249753952026367, "learning_rate": 1.3334075361327254e-05, "loss": 0.3378, "step": 18026, "teacher_loss": 0.2726061940193176 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.451119065284729, "learning_rate": 1.333181818172592e-05, "loss": 0.2331, "step": 18027, "teacher_loss": 0.20892536640167236 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.35619962215423584, "learning_rate": 1.3329561040371007e-05, "loss": 0.2347, "step": 18028, "teacher_loss": 0.22119031846523285 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.6137403249740601, "learning_rate": 1.3327303937314247e-05, "loss": 0.2654, "step": 18029, "teacher_loss": 0.2267158329486847 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.6352120637893677, "learning_rate": 1.3325046872607387e-05, "loss": 0.2759, "step": 18030, "teacher_loss": 0.23595969378948212 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.5605087280273438, "learning_rate": 1.3322789846302185e-05, "loss": 0.2235, "step": 18031, "teacher_loss": 0.1860453486442566 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.28998863697052, "learning_rate": 1.3320532858450382e-05, "loss": 0.1719, "step": 18032, "teacher_loss": 0.15875372290611267 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.5532456636428833, "learning_rate": 1.3318275909103727e-05, "loss": 0.23, "step": 18033, "teacher_loss": 0.19406282901763916 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.3889275789260864, "learning_rate": 1.3316018998313962e-05, "loss": 0.193, "step": 18034, "teacher_loss": 0.17122645676136017 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.2096666395664215, "learning_rate": 1.3313762126132832e-05, "loss": 0.2132, "step": 18035, "teacher_loss": 0.21355712413787842 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.4629286229610443, "learning_rate": 1.3311505292612085e-05, "loss": 0.345, "step": 18036, "teacher_loss": 0.33191320300102234 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.2919420003890991, "learning_rate": 1.3309248497803451e-05, "loss": 0.1719, "step": 18037, "teacher_loss": 0.1585262417793274 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.5541933178901672, "learning_rate": 1.3306991741758689e-05, "loss": 0.6016, "step": 18038, "teacher_loss": 0.606877326965332 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.33001285791397095, "learning_rate": 1.3304735024529526e-05, "loss": 0.2032, "step": 18039, "teacher_loss": 0.18911322951316833 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.14874063432216644, "learning_rate": 1.3302478346167703e-05, "loss": 0.1585, "step": 18040, "teacher_loss": 0.15957880020141602 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.6319202780723572, "learning_rate": 1.3300221706724966e-05, "loss": 0.229, "step": 18041, "teacher_loss": 0.1842707246541977 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.634954035282135, "learning_rate": 1.329796510625305e-05, "loss": 0.4518, "step": 18042, "teacher_loss": 0.43140947818756104 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.32245922088623047, "learning_rate": 1.3295708544803688e-05, "loss": 0.2035, "step": 18043, "teacher_loss": 0.19022925198078156 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.270362913608551, "learning_rate": 1.329345202242862e-05, "loss": 0.1868, "step": 18044, "teacher_loss": 0.17756280303001404 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.4343642294406891, "learning_rate": 1.3291195539179584e-05, "loss": 0.2687, "step": 18045, "teacher_loss": 0.2502707242965698 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.42184072732925415, "learning_rate": 1.3288939095108306e-05, "loss": 0.1719, "step": 18046, "teacher_loss": 0.14414390921592712 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.37064820528030396, "learning_rate": 1.3286682690266527e-05, "loss": 0.1962, "step": 18047, "teacher_loss": 0.1768346130847931 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.40810659527778625, "learning_rate": 1.3284426324705981e-05, "loss": 0.1833, "step": 18048, "teacher_loss": 0.15836849808692932 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.9537204504013062, "learning_rate": 1.3282169998478388e-05, "loss": 0.2607, "step": 18049, "teacher_loss": 0.18374550342559814 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.6317575573921204, "learning_rate": 1.3279913711635493e-05, "loss": 0.2875, "step": 18050, "teacher_loss": 0.24921450018882751 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.4654926359653473, "learning_rate": 1.3277657464229018e-05, "loss": 0.2092, "step": 18051, "teacher_loss": 0.1807556450366974 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.9254947900772095, "learning_rate": 1.3275401256310694e-05, "loss": 0.3353, "step": 18052, "teacher_loss": 0.2697446048259735 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.711561918258667, "learning_rate": 1.3273145087932243e-05, "loss": 0.4599, "step": 18053, "teacher_loss": 0.4319247007369995 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.39307230710983276, "learning_rate": 1.3270888959145402e-05, "loss": 0.2834, "step": 18054, "teacher_loss": 0.27126815915107727 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.1702650785446167, "learning_rate": 1.3268632870001895e-05, "loss": 0.1807, "step": 18055, "teacher_loss": 0.1819145381450653 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.8414362668991089, "learning_rate": 1.3266376820553442e-05, "loss": 0.309, "step": 18056, "teacher_loss": 0.2498057782649994 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.8354657888412476, "learning_rate": 1.3264120810851773e-05, "loss": 0.2957, "step": 18057, "teacher_loss": 0.2356717586517334 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.5256901979446411, "learning_rate": 1.3261864840948609e-05, "loss": 0.2341, "step": 18058, "teacher_loss": 0.2017158716917038 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.3564150333404541, "learning_rate": 1.325960891089567e-05, "loss": 0.2669, "step": 18059, "teacher_loss": 0.2569480836391449 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.28577864170074463, "learning_rate": 1.3257353020744685e-05, "loss": 0.1658, "step": 18060, "teacher_loss": 0.15241412818431854 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.9722816944122314, "learning_rate": 1.3255097170547371e-05, "loss": 0.3675, "step": 18061, "teacher_loss": 0.30028021335601807 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.6382484436035156, "learning_rate": 1.3252841360355441e-05, "loss": 0.2562, "step": 18062, "teacher_loss": 0.2137523889541626 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.41152137517929077, "learning_rate": 1.3250585590220627e-05, "loss": 0.2049, "step": 18063, "teacher_loss": 0.18191435933113098 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.4244527816772461, "learning_rate": 1.3248329860194639e-05, "loss": 0.1799, "step": 18064, "teacher_loss": 0.15269345045089722 }, { "compression_loss": 0.0, "epoch": 3.26, "label_loss": 0.3407462239265442, "learning_rate": 1.3246074170329192e-05, "loss": 0.2657, "step": 18065, "teacher_loss": 0.25739943981170654 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.39456138014793396, "learning_rate": 1.3243818520676009e-05, "loss": 0.2051, "step": 18066, "teacher_loss": 0.184077650308609 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.39879655838012695, "learning_rate": 1.3241562911286803e-05, "loss": 0.2359, "step": 18067, "teacher_loss": 0.21774812042713165 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.3525258004665375, "learning_rate": 1.3239307342213282e-05, "loss": 0.2211, "step": 18068, "teacher_loss": 0.20652933418750763 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.4380994439125061, "learning_rate": 1.323705181350717e-05, "loss": 0.177, "step": 18069, "teacher_loss": 0.1479840874671936 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.5991427898406982, "learning_rate": 1.3234796325220174e-05, "loss": 0.2933, "step": 18070, "teacher_loss": 0.25932633876800537 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.38682258129119873, "learning_rate": 1.3232540877404e-05, "loss": 0.1937, "step": 18071, "teacher_loss": 0.17227378487586975 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.4379361867904663, "learning_rate": 1.323028547011037e-05, "loss": 0.2606, "step": 18072, "teacher_loss": 0.24093812704086304 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.43991994857788086, "learning_rate": 1.3228030103390993e-05, "loss": 0.2285, "step": 18073, "teacher_loss": 0.20504266023635864 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.4691389203071594, "learning_rate": 1.3225774777297569e-05, "loss": 0.2483, "step": 18074, "teacher_loss": 0.2237095832824707 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.37046217918395996, "learning_rate": 1.3223519491881806e-05, "loss": 0.3211, "step": 18075, "teacher_loss": 0.31562644243240356 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.3690827488899231, "learning_rate": 1.3221264247195417e-05, "loss": 0.1889, "step": 18076, "teacher_loss": 0.16891750693321228 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.18976879119873047, "learning_rate": 1.3219009043290107e-05, "loss": 0.2171, "step": 18077, "teacher_loss": 0.22014687955379486 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.5652008652687073, "learning_rate": 1.3216753880217577e-05, "loss": 0.2329, "step": 18078, "teacher_loss": 0.19600054621696472 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.261024534702301, "learning_rate": 1.3214498758029537e-05, "loss": 0.2203, "step": 18079, "teacher_loss": 0.21579015254974365 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.32064616680145264, "learning_rate": 1.3212243676777686e-05, "loss": 0.2259, "step": 18080, "teacher_loss": 0.21533358097076416 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.6935408115386963, "learning_rate": 1.3209988636513728e-05, "loss": 0.2802, "step": 18081, "teacher_loss": 0.23425620794296265 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.2539622485637665, "learning_rate": 1.3207733637289364e-05, "loss": 0.1828, "step": 18082, "teacher_loss": 0.1749408096075058 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.3459761142730713, "learning_rate": 1.3205478679156295e-05, "loss": 0.2594, "step": 18083, "teacher_loss": 0.2497342824935913 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.41030436754226685, "learning_rate": 1.3203223762166216e-05, "loss": 0.1779, "step": 18084, "teacher_loss": 0.15209606289863586 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.2116227149963379, "learning_rate": 1.3200968886370833e-05, "loss": 0.2152, "step": 18085, "teacher_loss": 0.21561098098754883 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.24857468903064728, "learning_rate": 1.3198714051821843e-05, "loss": 0.2373, "step": 18086, "teacher_loss": 0.2360607236623764 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.4978814721107483, "learning_rate": 1.3196459258570928e-05, "loss": 0.306, "step": 18087, "teacher_loss": 0.2846578359603882 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.8321453332901001, "learning_rate": 1.3194204506669806e-05, "loss": 0.2858, "step": 18088, "teacher_loss": 0.22508911788463593 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.45448440313339233, "learning_rate": 1.3191949796170156e-05, "loss": 0.2183, "step": 18089, "teacher_loss": 0.19203674793243408 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.5713562965393066, "learning_rate": 1.3189695127123675e-05, "loss": 0.257, "step": 18090, "teacher_loss": 0.222085103392601 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 1.038316011428833, "learning_rate": 1.318744049958206e-05, "loss": 0.3771, "step": 18091, "teacher_loss": 0.30360138416290283 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.2662854790687561, "learning_rate": 1.3185185913596999e-05, "loss": 0.2041, "step": 18092, "teacher_loss": 0.19720464944839478 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.2641443610191345, "learning_rate": 1.3182931369220181e-05, "loss": 0.2097, "step": 18093, "teacher_loss": 0.20367072522640228 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.24942860007286072, "learning_rate": 1.3180676866503304e-05, "loss": 0.2164, "step": 18094, "teacher_loss": 0.21272876858711243 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.7187564969062805, "learning_rate": 1.317842240549805e-05, "loss": 0.2844, "step": 18095, "teacher_loss": 0.2361488789319992 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.539593517780304, "learning_rate": 1.3176167986256111e-05, "loss": 0.2592, "step": 18096, "teacher_loss": 0.228027805685997 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.19185474514961243, "learning_rate": 1.3173913608829164e-05, "loss": 0.203, "step": 18097, "teacher_loss": 0.2042843997478485 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.6059845089912415, "learning_rate": 1.3171659273268913e-05, "loss": 0.406, "step": 18098, "teacher_loss": 0.3837599754333496 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.35299962759017944, "learning_rate": 1.3169404979627028e-05, "loss": 0.2136, "step": 18099, "teacher_loss": 0.1980898529291153 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.18739306926727295, "learning_rate": 1.3167150727955199e-05, "loss": 0.1745, "step": 18100, "teacher_loss": 0.1730639934539795 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.7790427207946777, "learning_rate": 1.3164896518305107e-05, "loss": 0.2996, "step": 18101, "teacher_loss": 0.24631893634796143 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.26252225041389465, "learning_rate": 1.3162642350728439e-05, "loss": 0.2028, "step": 18102, "teacher_loss": 0.19619476795196533 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.5420483946800232, "learning_rate": 1.316038822527687e-05, "loss": 0.2238, "step": 18103, "teacher_loss": 0.1884036362171173 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.2817770838737488, "learning_rate": 1.3158134142002085e-05, "loss": 0.1499, "step": 18104, "teacher_loss": 0.13524103164672852 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.6387237310409546, "learning_rate": 1.3155880100955764e-05, "loss": 0.2261, "step": 18105, "teacher_loss": 0.1802579164505005 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.17957574129104614, "learning_rate": 1.3153626102189578e-05, "loss": 0.1716, "step": 18106, "teacher_loss": 0.1706923395395279 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.17800748348236084, "learning_rate": 1.3151372145755215e-05, "loss": 0.2106, "step": 18107, "teacher_loss": 0.2142239212989807 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.590187132358551, "learning_rate": 1.3149118231704345e-05, "loss": 0.2794, "step": 18108, "teacher_loss": 0.24484902620315552 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.38905492424964905, "learning_rate": 1.3146864360088642e-05, "loss": 0.1588, "step": 18109, "teacher_loss": 0.13320091366767883 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.2505139112472534, "learning_rate": 1.3144610530959784e-05, "loss": 0.2292, "step": 18110, "teacher_loss": 0.22679060697555542 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.3953123390674591, "learning_rate": 1.314235674436945e-05, "loss": 0.2419, "step": 18111, "teacher_loss": 0.22482754290103912 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.19311776757240295, "learning_rate": 1.3140103000369296e-05, "loss": 0.1627, "step": 18112, "teacher_loss": 0.15927964448928833 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.6625838279724121, "learning_rate": 1.3137849299011015e-05, "loss": 0.3446, "step": 18113, "teacher_loss": 0.3092654347419739 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.46393102407455444, "learning_rate": 1.313559564034626e-05, "loss": 0.3801, "step": 18114, "teacher_loss": 0.3708367645740509 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.40326282382011414, "learning_rate": 1.313334202442671e-05, "loss": 0.2832, "step": 18115, "teacher_loss": 0.26991456747055054 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.40717148780822754, "learning_rate": 1.3131088451304026e-05, "loss": 0.3281, "step": 18116, "teacher_loss": 0.31928497552871704 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.4894641041755676, "learning_rate": 1.3128834921029885e-05, "loss": 0.1978, "step": 18117, "teacher_loss": 0.16542458534240723 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.5823531150817871, "learning_rate": 1.3126581433655948e-05, "loss": 0.2515, "step": 18118, "teacher_loss": 0.21468724310398102 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.5762792825698853, "learning_rate": 1.312432798923388e-05, "loss": 0.2399, "step": 18119, "teacher_loss": 0.202561616897583 }, { "compression_loss": 0.0, "epoch": 3.27, "label_loss": 0.5645594596862793, "learning_rate": 1.312207458781535e-05, "loss": 0.3304, "step": 18120, "teacher_loss": 0.3044322729110718 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.3128772974014282, "learning_rate": 1.3119821229452023e-05, "loss": 0.1724, "step": 18121, "teacher_loss": 0.15684494376182556 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.3212696313858032, "learning_rate": 1.311756791419555e-05, "loss": 0.1912, "step": 18122, "teacher_loss": 0.17673750221729279 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.28454113006591797, "learning_rate": 1.3115314642097606e-05, "loss": 0.2528, "step": 18123, "teacher_loss": 0.2492837905883789 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.5250230431556702, "learning_rate": 1.3113061413209848e-05, "loss": 0.2436, "step": 18124, "teacher_loss": 0.212283656001091 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.3426131010055542, "learning_rate": 1.3110808227583924e-05, "loss": 0.2314, "step": 18125, "teacher_loss": 0.21906372904777527 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.2884938716888428, "learning_rate": 1.3108555085271517e-05, "loss": 0.3581, "step": 18126, "teacher_loss": 0.36588048934936523 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.1889309287071228, "learning_rate": 1.3106301986324266e-05, "loss": 0.2166, "step": 18127, "teacher_loss": 0.21964265406131744 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.4153388738632202, "learning_rate": 1.310404893079383e-05, "loss": 0.2687, "step": 18128, "teacher_loss": 0.2523530423641205 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.4441133141517639, "learning_rate": 1.3101795918731869e-05, "loss": 0.2803, "step": 18129, "teacher_loss": 0.2621369957923889 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.3501591682434082, "learning_rate": 1.309954295019004e-05, "loss": 0.1674, "step": 18130, "teacher_loss": 0.1470591425895691 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.8556177020072937, "learning_rate": 1.3097290025219986e-05, "loss": 0.3238, "step": 18131, "teacher_loss": 0.26474615931510925 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.4137946367263794, "learning_rate": 1.3095037143873375e-05, "loss": 0.2505, "step": 18132, "teacher_loss": 0.23240438103675842 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.2501292824745178, "learning_rate": 1.3092784306201847e-05, "loss": 0.2498, "step": 18133, "teacher_loss": 0.24974240362644196 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.3543018698692322, "learning_rate": 1.3090531512257055e-05, "loss": 0.2388, "step": 18134, "teacher_loss": 0.22598996758460999 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.5683243274688721, "learning_rate": 1.3088278762090653e-05, "loss": 0.233, "step": 18135, "teacher_loss": 0.19579479098320007 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.503743052482605, "learning_rate": 1.3086026055754293e-05, "loss": 0.2408, "step": 18136, "teacher_loss": 0.2115417867898941 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.2990487217903137, "learning_rate": 1.3083773393299611e-05, "loss": 0.1696, "step": 18137, "teacher_loss": 0.1552715301513672 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.46824532747268677, "learning_rate": 1.3081520774778258e-05, "loss": 0.1855, "step": 18138, "teacher_loss": 0.15413513779640198 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.25309228897094727, "learning_rate": 1.3079268200241885e-05, "loss": 0.1865, "step": 18139, "teacher_loss": 0.17913636565208435 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.5562655925750732, "learning_rate": 1.3077015669742132e-05, "loss": 0.3, "step": 18140, "teacher_loss": 0.2715657651424408 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.6625136137008667, "learning_rate": 1.3074763183330643e-05, "loss": 0.2776, "step": 18141, "teacher_loss": 0.23487672209739685 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.4585106372833252, "learning_rate": 1.3072510741059064e-05, "loss": 0.2499, "step": 18142, "teacher_loss": 0.2267199456691742 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.2499270737171173, "learning_rate": 1.3070258342979035e-05, "loss": 0.2552, "step": 18143, "teacher_loss": 0.2557613253593445 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.9610921740531921, "learning_rate": 1.3068005989142195e-05, "loss": 0.2526, "step": 18144, "teacher_loss": 0.17383532226085663 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.22878341376781464, "learning_rate": 1.3065753679600186e-05, "loss": 0.1976, "step": 18145, "teacher_loss": 0.1941266804933548 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.09850089251995087, "learning_rate": 1.3063501414404651e-05, "loss": 0.1395, "step": 18146, "teacher_loss": 0.14404058456420898 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.32071763277053833, "learning_rate": 1.3061249193607214e-05, "loss": 0.188, "step": 18147, "teacher_loss": 0.17330355942249298 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.655463695526123, "learning_rate": 1.3058997017259526e-05, "loss": 0.4159, "step": 18148, "teacher_loss": 0.38927972316741943 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.523260772228241, "learning_rate": 1.3056744885413216e-05, "loss": 0.2105, "step": 18149, "teacher_loss": 0.17572741210460663 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 1.012594223022461, "learning_rate": 1.3054492798119915e-05, "loss": 0.4024, "step": 18150, "teacher_loss": 0.3345867395401001 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.23949134349822998, "learning_rate": 1.3052240755431268e-05, "loss": 0.2107, "step": 18151, "teacher_loss": 0.20754443109035492 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.37489616870880127, "learning_rate": 1.3049988757398898e-05, "loss": 0.2046, "step": 18152, "teacher_loss": 0.1856551170349121 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.614890456199646, "learning_rate": 1.3047736804074439e-05, "loss": 0.1975, "step": 18153, "teacher_loss": 0.1511019915342331 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.6322044134140015, "learning_rate": 1.304548489550952e-05, "loss": 0.283, "step": 18154, "teacher_loss": 0.24421021342277527 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.5281765460968018, "learning_rate": 1.3043233031755778e-05, "loss": 0.2514, "step": 18155, "teacher_loss": 0.22064149379730225 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.481297105550766, "learning_rate": 1.3040981212864832e-05, "loss": 0.2876, "step": 18156, "teacher_loss": 0.2661042809486389 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.23492732644081116, "learning_rate": 1.3038729438888311e-05, "loss": 0.2415, "step": 18157, "teacher_loss": 0.2422710657119751 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.2689550220966339, "learning_rate": 1.3036477709877849e-05, "loss": 0.209, "step": 18158, "teacher_loss": 0.20228593051433563 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.4091396927833557, "learning_rate": 1.3034226025885067e-05, "loss": 0.3432, "step": 18159, "teacher_loss": 0.33587849140167236 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 1.1813099384307861, "learning_rate": 1.3031974386961578e-05, "loss": 0.3229, "step": 18160, "teacher_loss": 0.22749581933021545 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.5107371211051941, "learning_rate": 1.302972279315903e-05, "loss": 0.3707, "step": 18161, "teacher_loss": 0.3551744222640991 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.3317374885082245, "learning_rate": 1.3027471244529023e-05, "loss": 0.1907, "step": 18162, "teacher_loss": 0.175077423453331 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.8840545415878296, "learning_rate": 1.3025219741123185e-05, "loss": 0.3057, "step": 18163, "teacher_loss": 0.24141347408294678 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.510932981967926, "learning_rate": 1.3022968282993143e-05, "loss": 0.1835, "step": 18164, "teacher_loss": 0.14714449644088745 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.285107284784317, "learning_rate": 1.3020716870190507e-05, "loss": 0.1627, "step": 18165, "teacher_loss": 0.14914417266845703 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.5745794177055359, "learning_rate": 1.3018465502766899e-05, "loss": 0.2214, "step": 18166, "teacher_loss": 0.18211030960083008 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.5230456590652466, "learning_rate": 1.3016214180773937e-05, "loss": 0.2142, "step": 18167, "teacher_loss": 0.1799221783876419 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.40999191999435425, "learning_rate": 1.3013962904263237e-05, "loss": 0.286, "step": 18168, "teacher_loss": 0.2722364664077759 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.6786731481552124, "learning_rate": 1.3011711673286411e-05, "loss": 0.2253, "step": 18169, "teacher_loss": 0.1748993843793869 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.20154336094856262, "learning_rate": 1.3009460487895078e-05, "loss": 0.3155, "step": 18170, "teacher_loss": 0.32811373472213745 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.21616467833518982, "learning_rate": 1.3007209348140849e-05, "loss": 0.1604, "step": 18171, "teacher_loss": 0.15423047542572021 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.4753044545650482, "learning_rate": 1.3004958254075329e-05, "loss": 0.23, "step": 18172, "teacher_loss": 0.20273320376873016 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.28975003957748413, "learning_rate": 1.3002707205750142e-05, "loss": 0.3469, "step": 18173, "teacher_loss": 0.35327431559562683 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.3585415482521057, "learning_rate": 1.3000456203216887e-05, "loss": 0.2133, "step": 18174, "teacher_loss": 0.19721579551696777 }, { "compression_loss": 0.0, "epoch": 3.28, "label_loss": 0.8126368522644043, "learning_rate": 1.2998205246527171e-05, "loss": 0.6232, "step": 18175, "teacher_loss": 0.6021868586540222 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.4043880105018616, "learning_rate": 1.2995954335732615e-05, "loss": 0.3485, "step": 18176, "teacher_loss": 0.3422878384590149 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.2915797233581543, "learning_rate": 1.2993703470884818e-05, "loss": 0.1848, "step": 18177, "teacher_loss": 0.17288590967655182 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.25596755743026733, "learning_rate": 1.2991452652035383e-05, "loss": 0.1618, "step": 18178, "teacher_loss": 0.15136650204658508 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.3866546154022217, "learning_rate": 1.2989201879235912e-05, "loss": 0.2673, "step": 18179, "teacher_loss": 0.25404810905456543 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.667992115020752, "learning_rate": 1.2986951152538019e-05, "loss": 0.3579, "step": 18180, "teacher_loss": 0.32343828678131104 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.12965041399002075, "learning_rate": 1.2984700471993299e-05, "loss": 0.213, "step": 18181, "teacher_loss": 0.22230014204978943 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.4179753065109253, "learning_rate": 1.2982449837653354e-05, "loss": 0.2423, "step": 18182, "teacher_loss": 0.22282329201698303 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.5086942911148071, "learning_rate": 1.2980199249569785e-05, "loss": 0.218, "step": 18183, "teacher_loss": 0.18568812310695648 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.9095141887664795, "learning_rate": 1.2977948707794196e-05, "loss": 0.3201, "step": 18184, "teacher_loss": 0.25462767481803894 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.5382750034332275, "learning_rate": 1.2975698212378173e-05, "loss": 0.3276, "step": 18185, "teacher_loss": 0.30420225858688354 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.5130690336227417, "learning_rate": 1.297344776337333e-05, "loss": 0.2472, "step": 18186, "teacher_loss": 0.2176552563905716 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.6596359610557556, "learning_rate": 1.2971197360831249e-05, "loss": 0.476, "step": 18187, "teacher_loss": 0.4556193947792053 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.3123575448989868, "learning_rate": 1.2968947004803526e-05, "loss": 0.1933, "step": 18188, "teacher_loss": 0.18007208406925201 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.9957927465438843, "learning_rate": 1.2966696695341764e-05, "loss": 0.3092, "step": 18189, "teacher_loss": 0.23295804858207703 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.29672303795814514, "learning_rate": 1.296444643249755e-05, "loss": 0.1879, "step": 18190, "teacher_loss": 0.17577509582042694 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.6289877891540527, "learning_rate": 1.2962196216322474e-05, "loss": 0.2154, "step": 18191, "teacher_loss": 0.1694399118423462 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.12931294739246368, "learning_rate": 1.295994604686813e-05, "loss": 0.1475, "step": 18192, "teacher_loss": 0.14950107038021088 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.4452768564224243, "learning_rate": 1.295769592418611e-05, "loss": 0.3163, "step": 18193, "teacher_loss": 0.30198732018470764 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.2548239231109619, "learning_rate": 1.2955445848327994e-05, "loss": 0.2077, "step": 18194, "teacher_loss": 0.2024780809879303 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.41416066884994507, "learning_rate": 1.2953195819345378e-05, "loss": 0.2907, "step": 18195, "teacher_loss": 0.27695387601852417 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.29538631439208984, "learning_rate": 1.2950945837289849e-05, "loss": 0.1894, "step": 18196, "teacher_loss": 0.1775709092617035 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.47110238671302795, "learning_rate": 1.2948695902212978e-05, "loss": 0.2123, "step": 18197, "teacher_loss": 0.1835511028766632 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.5581475496292114, "learning_rate": 1.2946446014166371e-05, "loss": 0.2312, "step": 18198, "teacher_loss": 0.19492009282112122 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.39313197135925293, "learning_rate": 1.2944196173201596e-05, "loss": 0.2535, "step": 18199, "teacher_loss": 0.23802423477172852 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.1593824028968811, "learning_rate": 1.294194637937024e-05, "loss": 0.1766, "step": 18200, "teacher_loss": 0.17854812741279602 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.33885809779167175, "learning_rate": 1.2939696632723877e-05, "loss": 0.2406, "step": 18201, "teacher_loss": 0.22966915369033813 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.4542231857776642, "learning_rate": 1.29374469333141e-05, "loss": 0.2598, "step": 18202, "teacher_loss": 0.23821038007736206 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.6547744274139404, "learning_rate": 1.293519728119248e-05, "loss": 0.2471, "step": 18203, "teacher_loss": 0.20182767510414124 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.2911924421787262, "learning_rate": 1.293294767641059e-05, "loss": 0.173, "step": 18204, "teacher_loss": 0.15986818075180054 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.2568390667438507, "learning_rate": 1.2930698119020017e-05, "loss": 0.1736, "step": 18205, "teacher_loss": 0.16432306170463562 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.4197365641593933, "learning_rate": 1.2928448609072335e-05, "loss": 0.2282, "step": 18206, "teacher_loss": 0.20687752962112427 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.23294541239738464, "learning_rate": 1.292619914661911e-05, "loss": 0.1677, "step": 18207, "teacher_loss": 0.16040048003196716 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.19515679776668549, "learning_rate": 1.2923949731711925e-05, "loss": 0.1699, "step": 18208, "teacher_loss": 0.16708455979824066 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.25613880157470703, "learning_rate": 1.292170036440235e-05, "loss": 0.2256, "step": 18209, "teacher_loss": 0.22223928570747375 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.32962068915367126, "learning_rate": 1.2919451044741945e-05, "loss": 0.1718, "step": 18210, "teacher_loss": 0.15424056351184845 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.37333303689956665, "learning_rate": 1.29172017727823e-05, "loss": 0.203, "step": 18211, "teacher_loss": 0.1840239018201828 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.31904950737953186, "learning_rate": 1.291495254857497e-05, "loss": 0.2049, "step": 18212, "teacher_loss": 0.19222334027290344 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.2840752899646759, "learning_rate": 1.2912703372171524e-05, "loss": 0.1784, "step": 18213, "teacher_loss": 0.16664424538612366 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.5742809772491455, "learning_rate": 1.2910454243623537e-05, "loss": 0.2592, "step": 18214, "teacher_loss": 0.2242123931646347 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.20950889587402344, "learning_rate": 1.2908205162982568e-05, "loss": 0.2758, "step": 18215, "teacher_loss": 0.2831608057022095 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.28280192613601685, "learning_rate": 1.2905956130300179e-05, "loss": 0.1847, "step": 18216, "teacher_loss": 0.17380356788635254 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.22982732951641083, "learning_rate": 1.290370714562794e-05, "loss": 0.1681, "step": 18217, "teacher_loss": 0.16121140122413635 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.3928210735321045, "learning_rate": 1.2901458209017413e-05, "loss": 0.2426, "step": 18218, "teacher_loss": 0.22593817114830017 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.4701545834541321, "learning_rate": 1.2899209320520159e-05, "loss": 0.251, "step": 18219, "teacher_loss": 0.22662797570228577 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.14719830453395844, "learning_rate": 1.2896960480187727e-05, "loss": 0.243, "step": 18220, "teacher_loss": 0.25367259979248047 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.3538978397846222, "learning_rate": 1.2894711688071698e-05, "loss": 0.3386, "step": 18221, "teacher_loss": 0.3369472622871399 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.28509244322776794, "learning_rate": 1.2892462944223613e-05, "loss": 0.1747, "step": 18222, "teacher_loss": 0.1623988151550293 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.8829365968704224, "learning_rate": 1.2890214248695032e-05, "loss": 0.3098, "step": 18223, "teacher_loss": 0.24607492983341217 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.3881641626358032, "learning_rate": 1.2887965601537515e-05, "loss": 0.2366, "step": 18224, "teacher_loss": 0.2197500765323639 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.44385743141174316, "learning_rate": 1.2885717002802616e-05, "loss": 0.2897, "step": 18225, "teacher_loss": 0.2725197374820709 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.738135576248169, "learning_rate": 1.288346845254188e-05, "loss": 0.2768, "step": 18226, "teacher_loss": 0.22550088167190552 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.30323755741119385, "learning_rate": 1.2881219950806875e-05, "loss": 0.211, "step": 18227, "teacher_loss": 0.20074941217899323 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.9332331418991089, "learning_rate": 1.2878971497649142e-05, "loss": 0.2761, "step": 18228, "teacher_loss": 0.2030966579914093 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.4225686192512512, "learning_rate": 1.287672309312023e-05, "loss": 0.3973, "step": 18229, "teacher_loss": 0.3944445252418518 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.26022791862487793, "learning_rate": 1.2874474737271695e-05, "loss": 0.2484, "step": 18230, "teacher_loss": 0.2470693588256836 }, { "compression_loss": 0.0, "epoch": 3.29, "label_loss": 0.22164443135261536, "learning_rate": 1.287222643015508e-05, "loss": 0.1394, "step": 18231, "teacher_loss": 0.13028313219547272 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.3618132174015045, "learning_rate": 1.2869978171821933e-05, "loss": 0.2839, "step": 18232, "teacher_loss": 0.2752948999404907 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.9462795853614807, "learning_rate": 1.2867729962323803e-05, "loss": 0.3064, "step": 18233, "teacher_loss": 0.23527707159519196 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.3360162377357483, "learning_rate": 1.2865481801712235e-05, "loss": 0.1706, "step": 18234, "teacher_loss": 0.15221315622329712 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.28276145458221436, "learning_rate": 1.286323369003876e-05, "loss": 0.2206, "step": 18235, "teacher_loss": 0.21366214752197266 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.5406668782234192, "learning_rate": 1.2860985627354939e-05, "loss": 0.2928, "step": 18236, "teacher_loss": 0.26523077487945557 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.42783766984939575, "learning_rate": 1.2858737613712302e-05, "loss": 0.1918, "step": 18237, "teacher_loss": 0.1655229926109314 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.7270276546478271, "learning_rate": 1.2856489649162386e-05, "loss": 0.4084, "step": 18238, "teacher_loss": 0.3730219602584839 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.21659454703330994, "learning_rate": 1.2854241733756741e-05, "loss": 0.1323, "step": 18239, "teacher_loss": 0.12296003103256226 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.7499873042106628, "learning_rate": 1.2851993867546902e-05, "loss": 0.3206, "step": 18240, "teacher_loss": 0.272857129573822 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.5469962954521179, "learning_rate": 1.2849746050584402e-05, "loss": 0.342, "step": 18241, "teacher_loss": 0.3191797137260437 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.24198998510837555, "learning_rate": 1.2847498282920773e-05, "loss": 0.3538, "step": 18242, "teacher_loss": 0.36617863178253174 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.6941623687744141, "learning_rate": 1.284525056460756e-05, "loss": 0.4705, "step": 18243, "teacher_loss": 0.44569897651672363 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.8404003381729126, "learning_rate": 1.2843002895696294e-05, "loss": 0.2617, "step": 18244, "teacher_loss": 0.19744521379470825 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.4416939914226532, "learning_rate": 1.2840755276238494e-05, "loss": 0.3449, "step": 18245, "teacher_loss": 0.33420050144195557 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.2236153781414032, "learning_rate": 1.2838507706285712e-05, "loss": 0.2365, "step": 18246, "teacher_loss": 0.2379852682352066 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.203488290309906, "learning_rate": 1.2836260185889466e-05, "loss": 0.1725, "step": 18247, "teacher_loss": 0.1690545380115509 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.37065887451171875, "learning_rate": 1.283401271510128e-05, "loss": 0.2925, "step": 18248, "teacher_loss": 0.28385210037231445 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.5756394267082214, "learning_rate": 1.2831765293972695e-05, "loss": 0.2472, "step": 18249, "teacher_loss": 0.21070127189159393 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.14653512835502625, "learning_rate": 1.2829517922555233e-05, "loss": 0.1453, "step": 18250, "teacher_loss": 0.14512351155281067 }, { "epoch": 3.3, "eval_exact_match": 80.00946073793756, "eval_f1": 87.31498454197734, "step": 18250 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.35064953565597534, "learning_rate": 1.282727060090041e-05, "loss": 0.2029, "step": 18251, "teacher_loss": 0.186465322971344 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.5890791416168213, "learning_rate": 1.2825023329059764e-05, "loss": 0.2389, "step": 18252, "teacher_loss": 0.19997096061706543 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.6370557546615601, "learning_rate": 1.282277610708481e-05, "loss": 0.2301, "step": 18253, "teacher_loss": 0.18492162227630615 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.8886950016021729, "learning_rate": 1.282052893502707e-05, "loss": 0.309, "step": 18254, "teacher_loss": 0.2446364164352417 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.750601053237915, "learning_rate": 1.281828181293807e-05, "loss": 0.319, "step": 18255, "teacher_loss": 0.2710520625114441 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.4397768974304199, "learning_rate": 1.281603474086933e-05, "loss": 0.2175, "step": 18256, "teacher_loss": 0.19277824461460114 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.5954399108886719, "learning_rate": 1.2813787718872358e-05, "loss": 0.1936, "step": 18257, "teacher_loss": 0.14895348250865936 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.4053216576576233, "learning_rate": 1.2811540746998683e-05, "loss": 0.2253, "step": 18258, "teacher_loss": 0.2052481323480606 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.2893517017364502, "learning_rate": 1.2809293825299822e-05, "loss": 0.2267, "step": 18259, "teacher_loss": 0.21978828310966492 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.5441051721572876, "learning_rate": 1.2807046953827282e-05, "loss": 0.2301, "step": 18260, "teacher_loss": 0.19517385959625244 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.4789500832557678, "learning_rate": 1.2804800132632576e-05, "loss": 0.199, "step": 18261, "teacher_loss": 0.16787350177764893 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.4673159122467041, "learning_rate": 1.2802553361767226e-05, "loss": 0.2215, "step": 18262, "teacher_loss": 0.19424059987068176 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 1.1354711055755615, "learning_rate": 1.2800306641282739e-05, "loss": 0.3325, "step": 18263, "teacher_loss": 0.24322986602783203 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.5315986275672913, "learning_rate": 1.2798059971230622e-05, "loss": 0.2459, "step": 18264, "teacher_loss": 0.21417085826396942 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.27937644720077515, "learning_rate": 1.279581335166239e-05, "loss": 0.3183, "step": 18265, "teacher_loss": 0.3226276636123657 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.3810928165912628, "learning_rate": 1.2793566782629552e-05, "loss": 0.2505, "step": 18266, "teacher_loss": 0.2359771579504013 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.6993427872657776, "learning_rate": 1.2791320264183607e-05, "loss": 0.2301, "step": 18267, "teacher_loss": 0.17799808084964752 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.1814243048429489, "learning_rate": 1.2789073796376071e-05, "loss": 0.1558, "step": 18268, "teacher_loss": 0.15291723608970642 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.8807456493377686, "learning_rate": 1.2786827379258446e-05, "loss": 0.3443, "step": 18269, "teacher_loss": 0.28465864062309265 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.22938352823257446, "learning_rate": 1.2784581012882227e-05, "loss": 0.188, "step": 18270, "teacher_loss": 0.1834345906972885 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.8353972434997559, "learning_rate": 1.2782334697298931e-05, "loss": 0.254, "step": 18271, "teacher_loss": 0.18939417600631714 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.772862434387207, "learning_rate": 1.2780088432560048e-05, "loss": 0.2997, "step": 18272, "teacher_loss": 0.2471664696931839 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.49294817447662354, "learning_rate": 1.2777842218717075e-05, "loss": 0.2572, "step": 18273, "teacher_loss": 0.23105153441429138 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.2970609962940216, "learning_rate": 1.2775596055821527e-05, "loss": 0.1633, "step": 18274, "teacher_loss": 0.14845848083496094 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.27919793128967285, "learning_rate": 1.277334994392489e-05, "loss": 0.1756, "step": 18275, "teacher_loss": 0.1640544980764389 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 1.4128801822662354, "learning_rate": 1.2771103883078658e-05, "loss": 0.4013, "step": 18276, "teacher_loss": 0.2888622283935547 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.302116334438324, "learning_rate": 1.2768857873334338e-05, "loss": 0.2102, "step": 18277, "teacher_loss": 0.20002323389053345 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.33971232175827026, "learning_rate": 1.2766611914743415e-05, "loss": 0.2179, "step": 18278, "teacher_loss": 0.2043740302324295 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.4176620841026306, "learning_rate": 1.2764366007357382e-05, "loss": 0.1551, "step": 18279, "teacher_loss": 0.12589576840400696 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.559597373008728, "learning_rate": 1.2762120151227737e-05, "loss": 0.2546, "step": 18280, "teacher_loss": 0.22076496481895447 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.6185896396636963, "learning_rate": 1.2759874346405967e-05, "loss": 0.2561, "step": 18281, "teacher_loss": 0.21583634614944458 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.3272697329521179, "learning_rate": 1.2757628592943568e-05, "loss": 0.1968, "step": 18282, "teacher_loss": 0.18230679631233215 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.3319544792175293, "learning_rate": 1.275538289089201e-05, "loss": 0.3205, "step": 18283, "teacher_loss": 0.31917887926101685 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.1651412844657898, "learning_rate": 1.2753137240302801e-05, "loss": 0.1671, "step": 18284, "teacher_loss": 0.16733206808567047 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.16619503498077393, "learning_rate": 1.2750891641227418e-05, "loss": 0.1995, "step": 18285, "teacher_loss": 0.20316170156002045 }, { "compression_loss": 0.0, "epoch": 3.3, "label_loss": 0.3740761876106262, "learning_rate": 1.2748646093717342e-05, "loss": 0.2256, "step": 18286, "teacher_loss": 0.20914016664028168 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.6385414004325867, "learning_rate": 1.2746400597824066e-05, "loss": 0.2711, "step": 18287, "teacher_loss": 0.23029184341430664 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.5312670469284058, "learning_rate": 1.2744155153599065e-05, "loss": 0.2433, "step": 18288, "teacher_loss": 0.21129730343818665 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.13044904172420502, "learning_rate": 1.2741909761093822e-05, "loss": 0.1406, "step": 18289, "teacher_loss": 0.1417396068572998 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.407657265663147, "learning_rate": 1.273966442035982e-05, "loss": 0.2388, "step": 18290, "teacher_loss": 0.22000998258590698 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.22546306252479553, "learning_rate": 1.2737419131448537e-05, "loss": 0.1509, "step": 18291, "teacher_loss": 0.14261513948440552 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.1789315640926361, "learning_rate": 1.2735173894411445e-05, "loss": 0.1487, "step": 18292, "teacher_loss": 0.14535734057426453 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.5600394010543823, "learning_rate": 1.273292870930003e-05, "loss": 0.1949, "step": 18293, "teacher_loss": 0.15432533621788025 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.21180686354637146, "learning_rate": 1.2730683576165767e-05, "loss": 0.1712, "step": 18294, "teacher_loss": 0.16667698323726654 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.585758626461029, "learning_rate": 1.2728438495060113e-05, "loss": 0.259, "step": 18295, "teacher_loss": 0.2226904183626175 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.29648149013519287, "learning_rate": 1.2726193466034566e-05, "loss": 0.1632, "step": 18296, "teacher_loss": 0.14843764901161194 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.3029516339302063, "learning_rate": 1.272394848914058e-05, "loss": 0.229, "step": 18297, "teacher_loss": 0.22077150642871857 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.46519458293914795, "learning_rate": 1.2721703564429627e-05, "loss": 0.4411, "step": 18298, "teacher_loss": 0.4384276568889618 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.33070623874664307, "learning_rate": 1.271945869195319e-05, "loss": 0.2246, "step": 18299, "teacher_loss": 0.21280014514923096 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.3734726309776306, "learning_rate": 1.2717213871762723e-05, "loss": 0.2762, "step": 18300, "teacher_loss": 0.2653909921646118 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.6344031691551208, "learning_rate": 1.2714969103909695e-05, "loss": 0.3295, "step": 18301, "teacher_loss": 0.2956312894821167 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.2561222314834595, "learning_rate": 1.271272438844558e-05, "loss": 0.2091, "step": 18302, "teacher_loss": 0.20388305187225342 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.3513815999031067, "learning_rate": 1.2710479725421834e-05, "loss": 0.1924, "step": 18303, "teacher_loss": 0.17474044859409332 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.18251383304595947, "learning_rate": 1.2708235114889925e-05, "loss": 0.1789, "step": 18304, "teacher_loss": 0.17854559421539307 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.779865026473999, "learning_rate": 1.2705990556901311e-05, "loss": 0.3357, "step": 18305, "teacher_loss": 0.2863811254501343 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.5663478970527649, "learning_rate": 1.270374605150746e-05, "loss": 0.345, "step": 18306, "teacher_loss": 0.3204531967639923 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.3370470702648163, "learning_rate": 1.2701501598759828e-05, "loss": 0.1923, "step": 18307, "teacher_loss": 0.1762060970067978 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.47871798276901245, "learning_rate": 1.2699257198709863e-05, "loss": 0.2636, "step": 18308, "teacher_loss": 0.2396763563156128 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.5074809193611145, "learning_rate": 1.2697012851409045e-05, "loss": 0.2455, "step": 18309, "teacher_loss": 0.21635743975639343 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.3551393747329712, "learning_rate": 1.2694768556908815e-05, "loss": 0.3121, "step": 18310, "teacher_loss": 0.3073699474334717 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.36625397205352783, "learning_rate": 1.2692524315260625e-05, "loss": 0.2513, "step": 18311, "teacher_loss": 0.23851361870765686 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.5638904571533203, "learning_rate": 1.2690280126515937e-05, "loss": 0.2424, "step": 18312, "teacher_loss": 0.20671653747558594 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.4471657872200012, "learning_rate": 1.2688035990726202e-05, "loss": 0.2748, "step": 18313, "teacher_loss": 0.25560474395751953 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.2671241760253906, "learning_rate": 1.2685791907942866e-05, "loss": 0.1859, "step": 18314, "teacher_loss": 0.17683741450309753 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.6140532493591309, "learning_rate": 1.2683547878217388e-05, "loss": 0.2867, "step": 18315, "teacher_loss": 0.2503645122051239 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.7801412343978882, "learning_rate": 1.268130390160121e-05, "loss": 0.3084, "step": 18316, "teacher_loss": 0.2559375762939453 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.5742823481559753, "learning_rate": 1.267905997814578e-05, "loss": 0.2575, "step": 18317, "teacher_loss": 0.2222837507724762 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.4269393980503082, "learning_rate": 1.267681610790255e-05, "loss": 0.2846, "step": 18318, "teacher_loss": 0.2688036262989044 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.8354735970497131, "learning_rate": 1.2674572290922963e-05, "loss": 0.6003, "step": 18319, "teacher_loss": 0.5741674900054932 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.48213791847229004, "learning_rate": 1.2672328527258452e-05, "loss": 0.2572, "step": 18320, "teacher_loss": 0.2322022020816803 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.5484969615936279, "learning_rate": 1.267008481696048e-05, "loss": 0.3822, "step": 18321, "teacher_loss": 0.3637485206127167 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.4675028324127197, "learning_rate": 1.2667841160080474e-05, "loss": 0.391, "step": 18322, "teacher_loss": 0.38248568773269653 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.511128306388855, "learning_rate": 1.2665597556669876e-05, "loss": 0.1702, "step": 18323, "teacher_loss": 0.13231760263442993 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.5984614491462708, "learning_rate": 1.2663354006780127e-05, "loss": 0.2211, "step": 18324, "teacher_loss": 0.17914672195911407 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.4673621654510498, "learning_rate": 1.2661110510462666e-05, "loss": 0.22, "step": 18325, "teacher_loss": 0.19250322878360748 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.40711402893066406, "learning_rate": 1.2658867067768929e-05, "loss": 0.2386, "step": 18326, "teacher_loss": 0.21986910700798035 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.33419954776763916, "learning_rate": 1.265662367875035e-05, "loss": 0.2598, "step": 18327, "teacher_loss": 0.25149330496788025 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.1730618178844452, "learning_rate": 1.2654380343458366e-05, "loss": 0.2012, "step": 18328, "teacher_loss": 0.20434805750846863 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.633500337600708, "learning_rate": 1.2652137061944407e-05, "loss": 0.3769, "step": 18329, "teacher_loss": 0.34837621450424194 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.37324994802474976, "learning_rate": 1.2649893834259904e-05, "loss": 0.1901, "step": 18330, "teacher_loss": 0.16978952288627625 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.4630226492881775, "learning_rate": 1.2647650660456293e-05, "loss": 0.289, "step": 18331, "teacher_loss": 0.26962825655937195 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.7017282247543335, "learning_rate": 1.2645407540585e-05, "loss": 0.2923, "step": 18332, "teacher_loss": 0.24678532779216766 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.41106170415878296, "learning_rate": 1.2643164474697444e-05, "loss": 0.168, "step": 18333, "teacher_loss": 0.14096488058567047 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.24452099204063416, "learning_rate": 1.2640921462845073e-05, "loss": 0.1818, "step": 18334, "teacher_loss": 0.17480576038360596 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.5192121267318726, "learning_rate": 1.2638678505079295e-05, "loss": 0.3551, "step": 18335, "teacher_loss": 0.3368752598762512 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.2798868715763092, "learning_rate": 1.2636435601451537e-05, "loss": 0.2378, "step": 18336, "teacher_loss": 0.23314549028873444 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.7776020765304565, "learning_rate": 1.2634192752013225e-05, "loss": 0.2738, "step": 18337, "teacher_loss": 0.21782538294792175 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.2635144591331482, "learning_rate": 1.2631949956815782e-05, "loss": 0.2119, "step": 18338, "teacher_loss": 0.2061435580253601 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.37651556730270386, "learning_rate": 1.2629707215910624e-05, "loss": 0.1881, "step": 18339, "teacher_loss": 0.1671895980834961 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.5983116626739502, "learning_rate": 1.2627464529349175e-05, "loss": 0.2594, "step": 18340, "teacher_loss": 0.22173020243644714 }, { "compression_loss": 0.0, "epoch": 3.31, "label_loss": 0.4211875796318054, "learning_rate": 1.262522189718285e-05, "loss": 0.4726, "step": 18341, "teacher_loss": 0.47836044430732727 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.496562659740448, "learning_rate": 1.2622979319463065e-05, "loss": 0.2551, "step": 18342, "teacher_loss": 0.22828659415245056 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.502152144908905, "learning_rate": 1.2620736796241243e-05, "loss": 0.2828, "step": 18343, "teacher_loss": 0.2584381103515625 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3125140070915222, "learning_rate": 1.2618494327568794e-05, "loss": 0.2428, "step": 18344, "teacher_loss": 0.23502734303474426 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.7602489590644836, "learning_rate": 1.2616251913497126e-05, "loss": 0.3092, "step": 18345, "teacher_loss": 0.2591143250465393 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3397023379802704, "learning_rate": 1.2614009554077652e-05, "loss": 0.2134, "step": 18346, "teacher_loss": 0.1993771195411682 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.43602254986763, "learning_rate": 1.261176724936179e-05, "loss": 0.2482, "step": 18347, "teacher_loss": 0.2273297905921936 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.4830107092857361, "learning_rate": 1.2609524999400943e-05, "loss": 0.1912, "step": 18348, "teacher_loss": 0.15874908864498138 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.7457711696624756, "learning_rate": 1.2607282804246519e-05, "loss": 0.2458, "step": 18349, "teacher_loss": 0.19030043482780457 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.623133659362793, "learning_rate": 1.2605040663949928e-05, "loss": 0.3886, "step": 18350, "teacher_loss": 0.3625381290912628 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.13423186540603638, "learning_rate": 1.2602798578562575e-05, "loss": 0.199, "step": 18351, "teacher_loss": 0.20618563890457153 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3188090920448303, "learning_rate": 1.2600556548135861e-05, "loss": 0.2279, "step": 18352, "teacher_loss": 0.21781525015830994 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.6210907101631165, "learning_rate": 1.2598314572721193e-05, "loss": 0.3078, "step": 18353, "teacher_loss": 0.2730112075805664 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.19715633988380432, "learning_rate": 1.2596072652369973e-05, "loss": 0.1611, "step": 18354, "teacher_loss": 0.1571391522884369 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.6174234747886658, "learning_rate": 1.2593830787133594e-05, "loss": 0.244, "step": 18355, "teacher_loss": 0.20252615213394165 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.5951739549636841, "learning_rate": 1.2591588977063466e-05, "loss": 0.4373, "step": 18356, "teacher_loss": 0.4197431802749634 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.15973827242851257, "learning_rate": 1.2589347222210986e-05, "loss": 0.1897, "step": 18357, "teacher_loss": 0.19298920035362244 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.2966060936450958, "learning_rate": 1.2587105522627535e-05, "loss": 0.2353, "step": 18358, "teacher_loss": 0.2285279929637909 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.5642554759979248, "learning_rate": 1.2584863878364533e-05, "loss": 0.2607, "step": 18359, "teacher_loss": 0.22693070769309998 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.40447473526000977, "learning_rate": 1.2582622289473355e-05, "loss": 0.2848, "step": 18360, "teacher_loss": 0.2715034484863281 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.4652654230594635, "learning_rate": 1.2580380756005399e-05, "loss": 0.2666, "step": 18361, "teacher_loss": 0.24455666542053223 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.9684268236160278, "learning_rate": 1.2578139278012059e-05, "loss": 0.2885, "step": 18362, "teacher_loss": 0.21289989352226257 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.2056064009666443, "learning_rate": 1.2575897855544725e-05, "loss": 0.1709, "step": 18363, "teacher_loss": 0.16708341240882874 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.2496483027935028, "learning_rate": 1.2573656488654785e-05, "loss": 0.1559, "step": 18364, "teacher_loss": 0.1455153524875641 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.18078044056892395, "learning_rate": 1.2571415177393626e-05, "loss": 0.1819, "step": 18365, "teacher_loss": 0.18197625875473022 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.4521269202232361, "learning_rate": 1.2569173921812637e-05, "loss": 0.2203, "step": 18366, "teacher_loss": 0.19458147883415222 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.22433842718601227, "learning_rate": 1.2566932721963206e-05, "loss": 0.1908, "step": 18367, "teacher_loss": 0.18707457184791565 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.6782110929489136, "learning_rate": 1.2564691577896703e-05, "loss": 0.2328, "step": 18368, "teacher_loss": 0.18335847556591034 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.321159303188324, "learning_rate": 1.2562450489664531e-05, "loss": 0.2385, "step": 18369, "teacher_loss": 0.2292879968881607 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.34564781188964844, "learning_rate": 1.2560209457318055e-05, "loss": 0.2855, "step": 18370, "teacher_loss": 0.2788253426551819 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3948368728160858, "learning_rate": 1.2557968480908658e-05, "loss": 0.2728, "step": 18371, "teacher_loss": 0.2592817544937134 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.6856731176376343, "learning_rate": 1.2555727560487732e-05, "loss": 0.2681, "step": 18372, "teacher_loss": 0.22172386944293976 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.47335517406463623, "learning_rate": 1.2553486696106639e-05, "loss": 0.3098, "step": 18373, "teacher_loss": 0.2916069030761719 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.549426257610321, "learning_rate": 1.255124588781676e-05, "loss": 0.231, "step": 18374, "teacher_loss": 0.19566486775875092 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.2438855767250061, "learning_rate": 1.2549005135669474e-05, "loss": 0.2144, "step": 18375, "teacher_loss": 0.21109366416931152 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.310373991727829, "learning_rate": 1.254676443971615e-05, "loss": 0.2006, "step": 18376, "teacher_loss": 0.18842291831970215 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.2674780786037445, "learning_rate": 1.2544523800008161e-05, "loss": 0.2, "step": 18377, "teacher_loss": 0.19246046245098114 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.48570287227630615, "learning_rate": 1.254228321659688e-05, "loss": 0.3622, "step": 18378, "teacher_loss": 0.3484587073326111 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.472123384475708, "learning_rate": 1.254004268953368e-05, "loss": 0.2141, "step": 18379, "teacher_loss": 0.18545100092887878 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3307039141654968, "learning_rate": 1.253780221886992e-05, "loss": 0.2472, "step": 18380, "teacher_loss": 0.2378869652748108 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.2865726351737976, "learning_rate": 1.2535561804656977e-05, "loss": 0.1866, "step": 18381, "teacher_loss": 0.175489604473114 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3651304841041565, "learning_rate": 1.2533321446946216e-05, "loss": 0.1919, "step": 18382, "teacher_loss": 0.1726188063621521 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.6302614212036133, "learning_rate": 1.2531081145788989e-05, "loss": 0.3075, "step": 18383, "teacher_loss": 0.27162978053092957 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3901684880256653, "learning_rate": 1.2528840901236678e-05, "loss": 0.2248, "step": 18384, "teacher_loss": 0.20642103254795074 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.5575323104858398, "learning_rate": 1.2526600713340636e-05, "loss": 0.3089, "step": 18385, "teacher_loss": 0.2813228964805603 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.2238282561302185, "learning_rate": 1.2524360582152221e-05, "loss": 0.1644, "step": 18386, "teacher_loss": 0.157810240983963 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3236406743526459, "learning_rate": 1.2522120507722793e-05, "loss": 0.2446, "step": 18387, "teacher_loss": 0.2358473539352417 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.17705470323562622, "learning_rate": 1.2519880490103718e-05, "loss": 0.1924, "step": 18388, "teacher_loss": 0.1941005289554596 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.4171791970729828, "learning_rate": 1.2517640529346345e-05, "loss": 0.3579, "step": 18389, "teacher_loss": 0.3512924909591675 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3510870933532715, "learning_rate": 1.2515400625502031e-05, "loss": 0.237, "step": 18390, "teacher_loss": 0.224280446767807 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3695766031742096, "learning_rate": 1.2513160778622133e-05, "loss": 0.1601, "step": 18391, "teacher_loss": 0.13681301474571228 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.5040054321289062, "learning_rate": 1.2510920988758006e-05, "loss": 0.1985, "step": 18392, "teacher_loss": 0.16455751657485962 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.3480949401855469, "learning_rate": 1.2508681255960987e-05, "loss": 0.2216, "step": 18393, "teacher_loss": 0.20751655101776123 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.20031729340553284, "learning_rate": 1.2506441580282448e-05, "loss": 0.1818, "step": 18394, "teacher_loss": 0.17977438867092133 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.4729866683483124, "learning_rate": 1.2504201961773727e-05, "loss": 0.2311, "step": 18395, "teacher_loss": 0.20421051979064941 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.33365535736083984, "learning_rate": 1.2501962400486164e-05, "loss": 0.2196, "step": 18396, "teacher_loss": 0.20690679550170898 }, { "compression_loss": 0.0, "epoch": 3.32, "label_loss": 0.9292529821395874, "learning_rate": 1.2499722896471121e-05, "loss": 0.2346, "step": 18397, "teacher_loss": 0.15746843814849854 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.6349519491195679, "learning_rate": 1.2497483449779935e-05, "loss": 0.2953, "step": 18398, "teacher_loss": 0.25754842162132263 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.15506920218467712, "learning_rate": 1.2495244060463947e-05, "loss": 0.1953, "step": 18399, "teacher_loss": 0.1997290849685669 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.7448081374168396, "learning_rate": 1.2493004728574505e-05, "loss": 0.4059, "step": 18400, "teacher_loss": 0.36821648478507996 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.6886869668960571, "learning_rate": 1.2490765454162949e-05, "loss": 0.6381, "step": 18401, "teacher_loss": 0.6324926614761353 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.4537810683250427, "learning_rate": 1.2488526237280615e-05, "loss": 0.6914, "step": 18402, "teacher_loss": 0.7177980542182922 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.8059777021408081, "learning_rate": 1.248628707797885e-05, "loss": 0.3301, "step": 18403, "teacher_loss": 0.2772555351257324 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.4921862483024597, "learning_rate": 1.2484047976308984e-05, "loss": 0.2496, "step": 18404, "teacher_loss": 0.22259561717510223 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.31399106979370117, "learning_rate": 1.248180893232235e-05, "loss": 0.1766, "step": 18405, "teacher_loss": 0.16130468249320984 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.4702402353286743, "learning_rate": 1.2479569946070293e-05, "loss": 0.243, "step": 18406, "teacher_loss": 0.21779607236385345 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.5913668274879456, "learning_rate": 1.2477331017604143e-05, "loss": 0.3171, "step": 18407, "teacher_loss": 0.2866814136505127 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.370329350233078, "learning_rate": 1.2475092146975224e-05, "loss": 0.1781, "step": 18408, "teacher_loss": 0.15668591856956482 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.331412672996521, "learning_rate": 1.247285333423487e-05, "loss": 0.2057, "step": 18409, "teacher_loss": 0.1917145550251007 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.6745486259460449, "learning_rate": 1.2470614579434416e-05, "loss": 0.1956, "step": 18410, "teacher_loss": 0.1423964500427246 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.8741778135299683, "learning_rate": 1.2468375882625186e-05, "loss": 0.3204, "step": 18411, "teacher_loss": 0.2589215040206909 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.4578501880168915, "learning_rate": 1.2466137243858504e-05, "loss": 0.2694, "step": 18412, "teacher_loss": 0.24844232201576233 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.3624303936958313, "learning_rate": 1.24638986631857e-05, "loss": 0.2023, "step": 18413, "teacher_loss": 0.18451830744743347 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.45224764943122864, "learning_rate": 1.2461660140658098e-05, "loss": 0.1912, "step": 18414, "teacher_loss": 0.16218912601470947 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.39102041721343994, "learning_rate": 1.2459421676327013e-05, "loss": 0.2248, "step": 18415, "teacher_loss": 0.20634929835796356 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.3308330476284027, "learning_rate": 1.2457183270243778e-05, "loss": 0.1937, "step": 18416, "teacher_loss": 0.17841240763664246 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.6286229491233826, "learning_rate": 1.2454944922459709e-05, "loss": 0.2835, "step": 18417, "teacher_loss": 0.2451486438512802 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.154030442237854, "learning_rate": 1.245270663302611e-05, "loss": 0.1292, "step": 18418, "teacher_loss": 0.12639518082141876 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.5325549840927124, "learning_rate": 1.2450468401994325e-05, "loss": 0.2257, "step": 18419, "teacher_loss": 0.19166049361228943 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.26981422305107117, "learning_rate": 1.2448230229415651e-05, "loss": 0.2088, "step": 18420, "teacher_loss": 0.20201066136360168 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.3914691209793091, "learning_rate": 1.2445992115341399e-05, "loss": 0.199, "step": 18421, "teacher_loss": 0.1775984764099121 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.5279285311698914, "learning_rate": 1.2443754059822901e-05, "loss": 0.3394, "step": 18422, "teacher_loss": 0.31845352053642273 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.5221810340881348, "learning_rate": 1.2441516062911454e-05, "loss": 0.221, "step": 18423, "teacher_loss": 0.1874893307685852 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.6687108278274536, "learning_rate": 1.2439278124658374e-05, "loss": 0.2394, "step": 18424, "teacher_loss": 0.19169974327087402 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.8883898854255676, "learning_rate": 1.2437040245114966e-05, "loss": 0.2697, "step": 18425, "teacher_loss": 0.2009199857711792 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.33791857957839966, "learning_rate": 1.2434802424332546e-05, "loss": 0.1567, "step": 18426, "teacher_loss": 0.13655614852905273 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.5126756429672241, "learning_rate": 1.2432564662362414e-05, "loss": 0.2187, "step": 18427, "teacher_loss": 0.18605603277683258 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.17766505479812622, "learning_rate": 1.2430326959255873e-05, "loss": 0.1808, "step": 18428, "teacher_loss": 0.1811705231666565 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.42550814151763916, "learning_rate": 1.2428089315064236e-05, "loss": 0.263, "step": 18429, "teacher_loss": 0.2448895275592804 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.2661648988723755, "learning_rate": 1.24258517298388e-05, "loss": 0.1509, "step": 18430, "teacher_loss": 0.13806448876857758 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.6711167097091675, "learning_rate": 1.2423614203630858e-05, "loss": 0.2358, "step": 18431, "teacher_loss": 0.1874634474515915 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.26167038083076477, "learning_rate": 1.2421376736491728e-05, "loss": 0.2641, "step": 18432, "teacher_loss": 0.2644244134426117 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.1798754781484604, "learning_rate": 1.2419139328472693e-05, "loss": 0.1694, "step": 18433, "teacher_loss": 0.16824504733085632 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.652587890625, "learning_rate": 1.2416901979625055e-05, "loss": 0.2305, "step": 18434, "teacher_loss": 0.1835586428642273 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.23921085894107819, "learning_rate": 1.2414664690000113e-05, "loss": 0.1952, "step": 18435, "teacher_loss": 0.1903022974729538 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.5190736055374146, "learning_rate": 1.2412427459649156e-05, "loss": 0.1926, "step": 18436, "teacher_loss": 0.15629518032073975 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.3245939016342163, "learning_rate": 1.241019028862348e-05, "loss": 0.2996, "step": 18437, "teacher_loss": 0.2968396842479706 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.368893563747406, "learning_rate": 1.2407953176974377e-05, "loss": 0.232, "step": 18438, "teacher_loss": 0.21683043241500854 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.14916355907917023, "learning_rate": 1.2405716124753136e-05, "loss": 0.1683, "step": 18439, "teacher_loss": 0.1704126000404358 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.5537216663360596, "learning_rate": 1.2403479132011044e-05, "loss": 0.2416, "step": 18440, "teacher_loss": 0.2069188505411148 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.22304297983646393, "learning_rate": 1.240124219879939e-05, "loss": 0.1668, "step": 18441, "teacher_loss": 0.16059334576129913 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.30141621828079224, "learning_rate": 1.2399005325169468e-05, "loss": 0.3025, "step": 18442, "teacher_loss": 0.3026027977466583 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.4325682520866394, "learning_rate": 1.2396768511172546e-05, "loss": 0.1961, "step": 18443, "teacher_loss": 0.16981473565101624 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.45541608333587646, "learning_rate": 1.2394531756859927e-05, "loss": 0.3309, "step": 18444, "teacher_loss": 0.317097008228302 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.20820805430412292, "learning_rate": 1.2392295062282878e-05, "loss": 0.2086, "step": 18445, "teacher_loss": 0.2086043357849121 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.2874957025051117, "learning_rate": 1.239005842749268e-05, "loss": 0.1846, "step": 18446, "teacher_loss": 0.17313873767852783 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.7704253196716309, "learning_rate": 1.2387821852540628e-05, "loss": 0.3349, "step": 18447, "teacher_loss": 0.28652411699295044 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.2260725498199463, "learning_rate": 1.2385585337477983e-05, "loss": 0.1645, "step": 18448, "teacher_loss": 0.15767429769039154 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.33926627039909363, "learning_rate": 1.238334888235603e-05, "loss": 0.1951, "step": 18449, "teacher_loss": 0.1791081577539444 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.4083828330039978, "learning_rate": 1.2381112487226038e-05, "loss": 0.2822, "step": 18450, "teacher_loss": 0.2681264877319336 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.6318469047546387, "learning_rate": 1.2378876152139288e-05, "loss": 0.2726, "step": 18451, "teacher_loss": 0.23272892832756042 }, { "compression_loss": 0.0, "epoch": 3.33, "label_loss": 0.7110015749931335, "learning_rate": 1.237663987714705e-05, "loss": 0.2185, "step": 18452, "teacher_loss": 0.16375377774238586 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.39453110098838806, "learning_rate": 1.2374403662300589e-05, "loss": 0.2002, "step": 18453, "teacher_loss": 0.1786387413740158 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.8089221715927124, "learning_rate": 1.2372167507651187e-05, "loss": 0.3536, "step": 18454, "teacher_loss": 0.30305707454681396 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.39054885506629944, "learning_rate": 1.2369931413250104e-05, "loss": 0.1923, "step": 18455, "teacher_loss": 0.1702529788017273 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.10799497365951538, "learning_rate": 1.2367695379148601e-05, "loss": 0.1792, "step": 18456, "teacher_loss": 0.18709462881088257 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.17388957738876343, "learning_rate": 1.2365459405397963e-05, "loss": 0.2188, "step": 18457, "teacher_loss": 0.22379735112190247 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5735845565795898, "learning_rate": 1.2363223492049435e-05, "loss": 0.2928, "step": 18458, "teacher_loss": 0.26162970066070557 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5318232774734497, "learning_rate": 1.2360987639154286e-05, "loss": 0.2859, "step": 18459, "teacher_loss": 0.2586110234260559 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.2984362840652466, "learning_rate": 1.235875184676378e-05, "loss": 0.2299, "step": 18460, "teacher_loss": 0.22227036952972412 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.26036423444747925, "learning_rate": 1.2356516114929176e-05, "loss": 0.262, "step": 18461, "teacher_loss": 0.26213395595550537 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.4930025339126587, "learning_rate": 1.235428044370173e-05, "loss": 0.3238, "step": 18462, "teacher_loss": 0.3049967288970947 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.3786955177783966, "learning_rate": 1.2352044833132701e-05, "loss": 0.2032, "step": 18463, "teacher_loss": 0.1837441325187683 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.26752012968063354, "learning_rate": 1.2349809283273348e-05, "loss": 0.1905, "step": 18464, "teacher_loss": 0.18198224902153015 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.9993331432342529, "learning_rate": 1.2347573794174918e-05, "loss": 0.2399, "step": 18465, "teacher_loss": 0.15550720691680908 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.1885949969291687, "learning_rate": 1.2345338365888673e-05, "loss": 0.1855, "step": 18466, "teacher_loss": 0.18512190878391266 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.3058115243911743, "learning_rate": 1.2343102998465863e-05, "loss": 0.1693, "step": 18467, "teacher_loss": 0.15412163734436035 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.4665020704269409, "learning_rate": 1.234086769195773e-05, "loss": 0.3233, "step": 18468, "teacher_loss": 0.3074265420436859 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.4196681082248688, "learning_rate": 1.2338632446415526e-05, "loss": 0.2735, "step": 18469, "teacher_loss": 0.2572307586669922 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.3531424403190613, "learning_rate": 1.2336397261890506e-05, "loss": 0.1973, "step": 18470, "teacher_loss": 0.17996114492416382 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.3543007969856262, "learning_rate": 1.233416213843391e-05, "loss": 0.2157, "step": 18471, "teacher_loss": 0.20034965872764587 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5449449419975281, "learning_rate": 1.233192707609698e-05, "loss": 0.2124, "step": 18472, "teacher_loss": 0.17540481686592102 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.906792163848877, "learning_rate": 1.2329692074930967e-05, "loss": 0.3783, "step": 18473, "teacher_loss": 0.31960436701774597 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.38990023732185364, "learning_rate": 1.2327457134987107e-05, "loss": 0.2613, "step": 18474, "teacher_loss": 0.24697056412696838 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5535478591918945, "learning_rate": 1.2325222256316639e-05, "loss": 0.2907, "step": 18475, "teacher_loss": 0.26148879528045654 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5571458339691162, "learning_rate": 1.232298743897081e-05, "loss": 0.205, "step": 18476, "teacher_loss": 0.16584277153015137 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.19842708110809326, "learning_rate": 1.2320752683000852e-05, "loss": 0.1387, "step": 18477, "teacher_loss": 0.13203155994415283 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.417305052280426, "learning_rate": 1.2318517988457999e-05, "loss": 0.2651, "step": 18478, "teacher_loss": 0.24821697175502777 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.37035760283470154, "learning_rate": 1.2316283355393493e-05, "loss": 0.2122, "step": 18479, "teacher_loss": 0.19458505511283875 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.40091216564178467, "learning_rate": 1.2314048783858566e-05, "loss": 0.2877, "step": 18480, "teacher_loss": 0.27507850527763367 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5844032764434814, "learning_rate": 1.2311814273904437e-05, "loss": 0.2451, "step": 18481, "teacher_loss": 0.2074226438999176 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5006624460220337, "learning_rate": 1.2309579825582357e-05, "loss": 0.2057, "step": 18482, "teacher_loss": 0.17291507124900818 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.33015263080596924, "learning_rate": 1.2307345438943544e-05, "loss": 0.2545, "step": 18483, "teacher_loss": 0.24613966047763824 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5301461219787598, "learning_rate": 1.2305111114039224e-05, "loss": 0.2733, "step": 18484, "teacher_loss": 0.24474181234836578 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.38936296105384827, "learning_rate": 1.2302876850920627e-05, "loss": 0.221, "step": 18485, "teacher_loss": 0.20230278372764587 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 1.1155613660812378, "learning_rate": 1.2300642649638978e-05, "loss": 0.4622, "step": 18486, "teacher_loss": 0.38959842920303345 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5656943321228027, "learning_rate": 1.22984085102455e-05, "loss": 0.3216, "step": 18487, "teacher_loss": 0.2945324778556824 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5215837955474854, "learning_rate": 1.2296174432791415e-05, "loss": 0.2767, "step": 18488, "teacher_loss": 0.24953246116638184 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 1.0448509454727173, "learning_rate": 1.2293940417327945e-05, "loss": 0.4161, "step": 18489, "teacher_loss": 0.3462563455104828 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.9179197549819946, "learning_rate": 1.229170646390631e-05, "loss": 0.4136, "step": 18490, "teacher_loss": 0.3575159013271332 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.2976725995540619, "learning_rate": 1.2289472572577716e-05, "loss": 0.2105, "step": 18491, "teacher_loss": 0.20082589983940125 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.3082728981971741, "learning_rate": 1.2287238743393401e-05, "loss": 0.2324, "step": 18492, "teacher_loss": 0.22392529249191284 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.4041733741760254, "learning_rate": 1.2285004976404564e-05, "loss": 0.2867, "step": 18493, "teacher_loss": 0.273611843585968 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.3582770824432373, "learning_rate": 1.2282771271662417e-05, "loss": 0.26, "step": 18494, "teacher_loss": 0.2491290271282196 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.4711555242538452, "learning_rate": 1.2280537629218186e-05, "loss": 0.2387, "step": 18495, "teacher_loss": 0.21284595131874084 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.33075985312461853, "learning_rate": 1.2278304049123073e-05, "loss": 0.1746, "step": 18496, "teacher_loss": 0.15727092325687408 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.22954338788986206, "learning_rate": 1.2276070531428284e-05, "loss": 0.3185, "step": 18497, "teacher_loss": 0.32840949296951294 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.3590051531791687, "learning_rate": 1.2273837076185034e-05, "loss": 0.1554, "step": 18498, "teacher_loss": 0.13277405500411987 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.698039174079895, "learning_rate": 1.2271603683444525e-05, "loss": 0.2686, "step": 18499, "teacher_loss": 0.22086824476718903 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.24242046475410461, "learning_rate": 1.2269370353257963e-05, "loss": 0.1952, "step": 18500, "teacher_loss": 0.189897820353508 }, { "epoch": 3.34, "eval_exact_match": 79.81078524124882, "eval_f1": 87.28057577192935, "step": 18500 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.5803496837615967, "learning_rate": 1.2267137085676552e-05, "loss": 0.2751, "step": 18501, "teacher_loss": 0.2411813735961914 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.498191773891449, "learning_rate": 1.2264903880751496e-05, "loss": 0.2751, "step": 18502, "teacher_loss": 0.250287801027298 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.20128510892391205, "learning_rate": 1.226267073853399e-05, "loss": 0.1947, "step": 18503, "teacher_loss": 0.19400885701179504 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.40472954511642456, "learning_rate": 1.2260437659075242e-05, "loss": 0.2185, "step": 18504, "teacher_loss": 0.19781237840652466 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.12580493092536926, "learning_rate": 1.2258204642426445e-05, "loss": 0.2005, "step": 18505, "teacher_loss": 0.20878593623638153 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.7386248707771301, "learning_rate": 1.2255971688638787e-05, "loss": 0.3389, "step": 18506, "teacher_loss": 0.2945179343223572 }, { "compression_loss": 0.0, "epoch": 3.34, "label_loss": 0.2921562194824219, "learning_rate": 1.2253738797763482e-05, "loss": 0.2104, "step": 18507, "teacher_loss": 0.2013486623764038 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.1099204272031784, "learning_rate": 1.2251505969851708e-05, "loss": 0.1199, "step": 18508, "teacher_loss": 0.1210232824087143 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.3114200234413147, "learning_rate": 1.2249273204954659e-05, "loss": 0.28, "step": 18509, "teacher_loss": 0.2764671742916107 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.17083880305290222, "learning_rate": 1.2247040503123533e-05, "loss": 0.2225, "step": 18510, "teacher_loss": 0.22820451855659485 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.6430106163024902, "learning_rate": 1.224480786440951e-05, "loss": 0.2524, "step": 18511, "teacher_loss": 0.20896175503730774 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.2527446746826172, "learning_rate": 1.2242575288863785e-05, "loss": 0.2094, "step": 18512, "teacher_loss": 0.2046075463294983 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.3001673221588135, "learning_rate": 1.2240342776537538e-05, "loss": 0.2029, "step": 18513, "teacher_loss": 0.1921398639678955 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.27059808373451233, "learning_rate": 1.2238110327481961e-05, "loss": 0.1613, "step": 18514, "teacher_loss": 0.14914755523204803 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.39105039834976196, "learning_rate": 1.2235877941748237e-05, "loss": 0.1626, "step": 18515, "teacher_loss": 0.1371745765209198 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.7569479942321777, "learning_rate": 1.2233645619387536e-05, "loss": 0.5371, "step": 18516, "teacher_loss": 0.5126884579658508 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.2586199641227722, "learning_rate": 1.2231413360451054e-05, "loss": 0.1986, "step": 18517, "teacher_loss": 0.1919756382703781 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.4103398323059082, "learning_rate": 1.2229181164989963e-05, "loss": 0.1841, "step": 18518, "teacher_loss": 0.15899735689163208 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.9105491638183594, "learning_rate": 1.2226949033055429e-05, "loss": 0.3037, "step": 18519, "teacher_loss": 0.23623651266098022 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.2622298002243042, "learning_rate": 1.2224716964698653e-05, "loss": 0.1845, "step": 18520, "teacher_loss": 0.17591683566570282 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.334195077419281, "learning_rate": 1.2222484959970793e-05, "loss": 0.2239, "step": 18521, "teacher_loss": 0.21163348853588104 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.48472732305526733, "learning_rate": 1.222025301892302e-05, "loss": 0.1968, "step": 18522, "teacher_loss": 0.1648542284965515 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.47648879885673523, "learning_rate": 1.2218021141606517e-05, "loss": 0.2455, "step": 18523, "teacher_loss": 0.2198607623577118 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.7430230379104614, "learning_rate": 1.2215789328072448e-05, "loss": 0.2808, "step": 18524, "teacher_loss": 0.22943472862243652 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.5140787363052368, "learning_rate": 1.2213557578371977e-05, "loss": 0.2533, "step": 18525, "teacher_loss": 0.22428598999977112 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.27317333221435547, "learning_rate": 1.2211325892556282e-05, "loss": 0.2559, "step": 18526, "teacher_loss": 0.2539953887462616 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.5334306955337524, "learning_rate": 1.2209094270676522e-05, "loss": 0.2712, "step": 18527, "teacher_loss": 0.24201242625713348 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.3821364641189575, "learning_rate": 1.2206862712783859e-05, "loss": 0.2809, "step": 18528, "teacher_loss": 0.2696574330329895 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.4276137351989746, "learning_rate": 1.2204631218929463e-05, "loss": 0.2193, "step": 18529, "teacher_loss": 0.19615933299064636 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.3350640535354614, "learning_rate": 1.2202399789164499e-05, "loss": 0.1959, "step": 18530, "teacher_loss": 0.18041522800922394 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.548102617263794, "learning_rate": 1.2200168423540114e-05, "loss": 0.1884, "step": 18531, "teacher_loss": 0.1483854353427887 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.23635722696781158, "learning_rate": 1.219793712210747e-05, "loss": 0.2002, "step": 18532, "teacher_loss": 0.19615203142166138 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.24337252974510193, "learning_rate": 1.219570588491773e-05, "loss": 0.1771, "step": 18533, "teacher_loss": 0.16971978545188904 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.38437357544898987, "learning_rate": 1.2193474712022047e-05, "loss": 0.2749, "step": 18534, "teacher_loss": 0.2627674639225006 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.28723418712615967, "learning_rate": 1.219124360347157e-05, "loss": 0.2381, "step": 18535, "teacher_loss": 0.2326916754245758 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.7126603126525879, "learning_rate": 1.2189012559317459e-05, "loss": 0.3246, "step": 18536, "teacher_loss": 0.28148210048675537 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.5261780023574829, "learning_rate": 1.2186781579610864e-05, "loss": 0.3424, "step": 18537, "teacher_loss": 0.32196518778800964 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.5825536251068115, "learning_rate": 1.2184550664402927e-05, "loss": 0.2129, "step": 18538, "teacher_loss": 0.1717773675918579 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.4640715718269348, "learning_rate": 1.2182319813744807e-05, "loss": 0.2388, "step": 18539, "teacher_loss": 0.21379020810127258 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.17457765340805054, "learning_rate": 1.2180089027687649e-05, "loss": 0.189, "step": 18540, "teacher_loss": 0.1905689239501953 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.2991122603416443, "learning_rate": 1.2177858306282585e-05, "loss": 0.1187, "step": 18541, "teacher_loss": 0.09870870411396027 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.3548603057861328, "learning_rate": 1.217562764958078e-05, "loss": 0.2726, "step": 18542, "teacher_loss": 0.26343834400177 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.7231144905090332, "learning_rate": 1.2173397057633361e-05, "loss": 0.244, "step": 18543, "teacher_loss": 0.1907651126384735 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.20886202156543732, "learning_rate": 1.2171166530491466e-05, "loss": 0.1305, "step": 18544, "teacher_loss": 0.12178203463554382 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.5559341907501221, "learning_rate": 1.2168936068206252e-05, "loss": 0.2444, "step": 18545, "teacher_loss": 0.20973248779773712 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.5921544432640076, "learning_rate": 1.216670567082884e-05, "loss": 0.2553, "step": 18546, "teacher_loss": 0.2178599238395691 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.6064504384994507, "learning_rate": 1.2164475338410372e-05, "loss": 0.3017, "step": 18547, "teacher_loss": 0.2678424119949341 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.7721132040023804, "learning_rate": 1.2162245071001985e-05, "loss": 0.292, "step": 18548, "teacher_loss": 0.23870941996574402 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.29890891909599304, "learning_rate": 1.216001486865481e-05, "loss": 0.2652, "step": 18549, "teacher_loss": 0.26143068075180054 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.2940792441368103, "learning_rate": 1.2157784731419979e-05, "loss": 0.173, "step": 18550, "teacher_loss": 0.1595214456319809 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.30452167987823486, "learning_rate": 1.2155554659348622e-05, "loss": 0.19, "step": 18551, "teacher_loss": 0.1773168444633484 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.731475293636322, "learning_rate": 1.2153324652491873e-05, "loss": 0.2693, "step": 18552, "teacher_loss": 0.217957004904747 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.35454702377319336, "learning_rate": 1.2151094710900854e-05, "loss": 0.2856, "step": 18553, "teacher_loss": 0.27796247601509094 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.3648371994495392, "learning_rate": 1.2148864834626684e-05, "loss": 0.1973, "step": 18554, "teacher_loss": 0.17865684628486633 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.7870327830314636, "learning_rate": 1.2146635023720504e-05, "loss": 0.2375, "step": 18555, "teacher_loss": 0.17639723420143127 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.8622509241104126, "learning_rate": 1.2144405278233427e-05, "loss": 0.5382, "step": 18556, "teacher_loss": 0.5021752119064331 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.47104042768478394, "learning_rate": 1.2142175598216569e-05, "loss": 0.2151, "step": 18557, "teacher_loss": 0.18663811683654785 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.40209949016571045, "learning_rate": 1.2139945983721063e-05, "loss": 0.2722, "step": 18558, "teacher_loss": 0.2577453553676605 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.27314847707748413, "learning_rate": 1.2137716434798018e-05, "loss": 0.1867, "step": 18559, "teacher_loss": 0.17710702121257782 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 1.0312864780426025, "learning_rate": 1.2135486951498552e-05, "loss": 0.3745, "step": 18560, "teacher_loss": 0.30156177282333374 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.942995548248291, "learning_rate": 1.2133257533873783e-05, "loss": 0.2822, "step": 18561, "teacher_loss": 0.20877647399902344 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.7195024490356445, "learning_rate": 1.2131028181974827e-05, "loss": 0.2512, "step": 18562, "teacher_loss": 0.19915470480918884 }, { "compression_loss": 0.0, "epoch": 3.35, "label_loss": 0.5158272981643677, "learning_rate": 1.2128798895852787e-05, "loss": 0.3056, "step": 18563, "teacher_loss": 0.28220319747924805 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.17762607336044312, "learning_rate": 1.2126569675558785e-05, "loss": 0.1403, "step": 18564, "teacher_loss": 0.1361183375120163 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.22994890809059143, "learning_rate": 1.2124340521143929e-05, "loss": 0.183, "step": 18565, "teacher_loss": 0.1777379810810089 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.7605133652687073, "learning_rate": 1.212211143265931e-05, "loss": 0.3544, "step": 18566, "teacher_loss": 0.3092246949672699 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.4144997000694275, "learning_rate": 1.211988241015606e-05, "loss": 0.2093, "step": 18567, "teacher_loss": 0.18645372986793518 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.4535015821456909, "learning_rate": 1.2117653453685269e-05, "loss": 0.207, "step": 18568, "teacher_loss": 0.17958375811576843 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.5924638509750366, "learning_rate": 1.2115424563298035e-05, "loss": 0.2092, "step": 18569, "teacher_loss": 0.1665697991847992 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.3411828279495239, "learning_rate": 1.2113195739045477e-05, "loss": 0.2037, "step": 18570, "teacher_loss": 0.18839582800865173 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.05584712699055672, "learning_rate": 1.2110966980978682e-05, "loss": 0.2105, "step": 18571, "teacher_loss": 0.22770346701145172 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.8084791898727417, "learning_rate": 1.210873828914875e-05, "loss": 0.2323, "step": 18572, "teacher_loss": 0.16825725138187408 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.6047141551971436, "learning_rate": 1.2106509663606783e-05, "loss": 0.1975, "step": 18573, "teacher_loss": 0.1521998941898346 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.5429080724716187, "learning_rate": 1.2104281104403873e-05, "loss": 0.2789, "step": 18574, "teacher_loss": 0.24952450394630432 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.41864681243896484, "learning_rate": 1.2102052611591118e-05, "loss": 0.1989, "step": 18575, "teacher_loss": 0.17443254590034485 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.3734470009803772, "learning_rate": 1.2099824185219603e-05, "loss": 0.4059, "step": 18576, "teacher_loss": 0.40952742099761963 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.350409597158432, "learning_rate": 1.2097595825340429e-05, "loss": 0.2692, "step": 18577, "teacher_loss": 0.2601744532585144 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.26680731773376465, "learning_rate": 1.2095367532004685e-05, "loss": 0.1996, "step": 18578, "teacher_loss": 0.19217166304588318 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.4685589671134949, "learning_rate": 1.2093139305263442e-05, "loss": 0.2764, "step": 18579, "teacher_loss": 0.2550569176673889 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.7648811340332031, "learning_rate": 1.2090911145167811e-05, "loss": 0.2192, "step": 18580, "teacher_loss": 0.15860819816589355 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.7024475932121277, "learning_rate": 1.2088683051768863e-05, "loss": 0.3673, "step": 18581, "teacher_loss": 0.3300231695175171 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.2347404658794403, "learning_rate": 1.208645502511768e-05, "loss": 0.198, "step": 18582, "teacher_loss": 0.19395726919174194 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.48253703117370605, "learning_rate": 1.2084227065265351e-05, "loss": 0.2555, "step": 18583, "teacher_loss": 0.23032832145690918 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.6801644563674927, "learning_rate": 1.2081999172262955e-05, "loss": 0.3115, "step": 18584, "teacher_loss": 0.2705579400062561 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.7782101631164551, "learning_rate": 1.2079771346161564e-05, "loss": 0.2723, "step": 18585, "teacher_loss": 0.2161393165588379 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.30283600091934204, "learning_rate": 1.2077543587012266e-05, "loss": 0.1836, "step": 18586, "teacher_loss": 0.17035819590091705 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.22044560313224792, "learning_rate": 1.207531589486613e-05, "loss": 0.2006, "step": 18587, "teacher_loss": 0.1984504610300064 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.2638302147388458, "learning_rate": 1.2073088269774227e-05, "loss": 0.1986, "step": 18588, "teacher_loss": 0.19139699637889862 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.3105957806110382, "learning_rate": 1.2070860711787641e-05, "loss": 0.1705, "step": 18589, "teacher_loss": 0.154911607503891 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.5322176218032837, "learning_rate": 1.2068633220957438e-05, "loss": 0.2322, "step": 18590, "teacher_loss": 0.198826402425766 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.3685023784637451, "learning_rate": 1.2066405797334679e-05, "loss": 0.1734, "step": 18591, "teacher_loss": 0.1517159640789032 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.8182448148727417, "learning_rate": 1.2064178440970448e-05, "loss": 0.3095, "step": 18592, "teacher_loss": 0.25293880701065063 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.18817299604415894, "learning_rate": 1.20619511519158e-05, "loss": 0.2222, "step": 18593, "teacher_loss": 0.22597861289978027 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.4017627537250519, "learning_rate": 1.2059723930221805e-05, "loss": 0.2513, "step": 18594, "teacher_loss": 0.23456136882305145 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.752432107925415, "learning_rate": 1.2057496775939519e-05, "loss": 0.319, "step": 18595, "teacher_loss": 0.2708294987678528 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.34299203753471375, "learning_rate": 1.2055269689120014e-05, "loss": 0.1983, "step": 18596, "teacher_loss": 0.18218111991882324 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.3373622000217438, "learning_rate": 1.2053042669814347e-05, "loss": 0.1795, "step": 18597, "teacher_loss": 0.16193875670433044 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.630958080291748, "learning_rate": 1.2050815718073573e-05, "loss": 0.3137, "step": 18598, "teacher_loss": 0.27845728397369385 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.3826289772987366, "learning_rate": 1.2048588833948755e-05, "loss": 0.2518, "step": 18599, "teacher_loss": 0.23722949624061584 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.5850616097450256, "learning_rate": 1.2046362017490947e-05, "loss": 0.6712, "step": 18600, "teacher_loss": 0.6807987093925476 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.47760874032974243, "learning_rate": 1.2044135268751199e-05, "loss": 0.2563, "step": 18601, "teacher_loss": 0.23170697689056396 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.1457206904888153, "learning_rate": 1.2041908587780571e-05, "loss": 0.2071, "step": 18602, "teacher_loss": 0.21389545500278473 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.27983710169792175, "learning_rate": 1.2039681974630111e-05, "loss": 0.2171, "step": 18603, "teacher_loss": 0.2100846767425537 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.5690048933029175, "learning_rate": 1.203745542935086e-05, "loss": 0.3344, "step": 18604, "teacher_loss": 0.3083241879940033 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.2317158281803131, "learning_rate": 1.2035228951993885e-05, "loss": 0.165, "step": 18605, "teacher_loss": 0.15754428505897522 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.4852762222290039, "learning_rate": 1.2033002542610216e-05, "loss": 0.1732, "step": 18606, "teacher_loss": 0.1384781152009964 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.5166403651237488, "learning_rate": 1.20307762012509e-05, "loss": 0.2531, "step": 18607, "teacher_loss": 0.22379574179649353 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.8595681190490723, "learning_rate": 1.2028549927966987e-05, "loss": 0.2832, "step": 18608, "teacher_loss": 0.219117671251297 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.4757612943649292, "learning_rate": 1.2026323722809516e-05, "loss": 0.3655, "step": 18609, "teacher_loss": 0.35328495502471924 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.8425443172454834, "learning_rate": 1.2024097585829522e-05, "loss": 0.3063, "step": 18610, "teacher_loss": 0.24674092233181 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.4537784457206726, "learning_rate": 1.2021871517078052e-05, "loss": 0.2742, "step": 18611, "teacher_loss": 0.25426623225212097 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.6092991828918457, "learning_rate": 1.2019645516606139e-05, "loss": 0.2731, "step": 18612, "teacher_loss": 0.23575352132320404 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.6885086297988892, "learning_rate": 1.2017419584464815e-05, "loss": 0.238, "step": 18613, "teacher_loss": 0.18789339065551758 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.38987648487091064, "learning_rate": 1.2015193720705123e-05, "loss": 0.2065, "step": 18614, "teacher_loss": 0.18611271679401398 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.39823824167251587, "learning_rate": 1.201296792537809e-05, "loss": 0.2441, "step": 18615, "teacher_loss": 0.2269316017627716 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.186028853058815, "learning_rate": 1.2010742198534748e-05, "loss": 0.1884, "step": 18616, "teacher_loss": 0.18868900835514069 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.2704801857471466, "learning_rate": 1.2008516540226115e-05, "loss": 0.1735, "step": 18617, "teacher_loss": 0.16272449493408203 }, { "compression_loss": 0.0, "epoch": 3.36, "label_loss": 0.2526557445526123, "learning_rate": 1.2006290950503241e-05, "loss": 0.2194, "step": 18618, "teacher_loss": 0.2156938761472702 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.35308438539505005, "learning_rate": 1.2004065429417136e-05, "loss": 0.359, "step": 18619, "teacher_loss": 0.35966986417770386 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 1.0550552606582642, "learning_rate": 1.2001839977018822e-05, "loss": 0.2846, "step": 18620, "teacher_loss": 0.19896674156188965 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.2808707654476166, "learning_rate": 1.1999614593359335e-05, "loss": 0.2119, "step": 18621, "teacher_loss": 0.20427697896957397 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5573078989982605, "learning_rate": 1.199738927848969e-05, "loss": 0.2696, "step": 18622, "teacher_loss": 0.23759154975414276 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.2646842896938324, "learning_rate": 1.1995164032460903e-05, "loss": 0.1714, "step": 18623, "teacher_loss": 0.16099140048027039 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5229165554046631, "learning_rate": 1.1992938855323996e-05, "loss": 0.2148, "step": 18624, "teacher_loss": 0.180563822388649 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.8742836713790894, "learning_rate": 1.1990713747129988e-05, "loss": 0.3068, "step": 18625, "teacher_loss": 0.2437036782503128 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.8426339626312256, "learning_rate": 1.1988488707929887e-05, "loss": 0.4661, "step": 18626, "teacher_loss": 0.4242169260978699 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.20311963558197021, "learning_rate": 1.1986263737774717e-05, "loss": 0.1608, "step": 18627, "teacher_loss": 0.15607714653015137 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.32049351930618286, "learning_rate": 1.1984038836715484e-05, "loss": 0.2359, "step": 18628, "teacher_loss": 0.2264769971370697 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.41010230779647827, "learning_rate": 1.1981814004803191e-05, "loss": 0.2951, "step": 18629, "teacher_loss": 0.28229087591171265 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.6663646697998047, "learning_rate": 1.1979589242088862e-05, "loss": 0.313, "step": 18630, "teacher_loss": 0.2736872732639313 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.6314862370491028, "learning_rate": 1.1977364548623492e-05, "loss": 0.2767, "step": 18631, "teacher_loss": 0.237329363822937 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.4850807785987854, "learning_rate": 1.197513992445809e-05, "loss": 0.2486, "step": 18632, "teacher_loss": 0.2223183512687683 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5129311084747314, "learning_rate": 1.1972915369643662e-05, "loss": 0.218, "step": 18633, "teacher_loss": 0.1852179914712906 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.4080178439617157, "learning_rate": 1.1970690884231209e-05, "loss": 0.2027, "step": 18634, "teacher_loss": 0.17984004318714142 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.6929551959037781, "learning_rate": 1.196846646827173e-05, "loss": 0.2944, "step": 18635, "teacher_loss": 0.25014016032218933 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5995739698410034, "learning_rate": 1.1966242121816223e-05, "loss": 0.3651, "step": 18636, "teacher_loss": 0.3390834331512451 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.787972092628479, "learning_rate": 1.1964017844915694e-05, "loss": 0.2312, "step": 18637, "teacher_loss": 0.16933681070804596 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5746564269065857, "learning_rate": 1.1961793637621136e-05, "loss": 0.2216, "step": 18638, "teacher_loss": 0.18241187930107117 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.4892458915710449, "learning_rate": 1.1959569499983532e-05, "loss": 0.1803, "step": 18639, "teacher_loss": 0.14594025909900665 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.7154369950294495, "learning_rate": 1.1957345432053891e-05, "loss": 0.237, "step": 18640, "teacher_loss": 0.18384280800819397 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.2722594141960144, "learning_rate": 1.1955121433883197e-05, "loss": 0.2094, "step": 18641, "teacher_loss": 0.2023903876543045 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.2032523900270462, "learning_rate": 1.1952897505522431e-05, "loss": 0.1666, "step": 18642, "teacher_loss": 0.16253966093063354 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.40038394927978516, "learning_rate": 1.1950673647022597e-05, "loss": 0.348, "step": 18643, "teacher_loss": 0.34214136004447937 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 1.0477972030639648, "learning_rate": 1.1948449858434673e-05, "loss": 0.2337, "step": 18644, "teacher_loss": 0.143270805478096 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.3286711871623993, "learning_rate": 1.1946226139809643e-05, "loss": 0.2434, "step": 18645, "teacher_loss": 0.23391762375831604 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 1.3002586364746094, "learning_rate": 1.1944002491198492e-05, "loss": 0.4167, "step": 18646, "teacher_loss": 0.3184717297554016 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.35456693172454834, "learning_rate": 1.1941778912652205e-05, "loss": 0.2283, "step": 18647, "teacher_loss": 0.2142752707004547 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.6176414489746094, "learning_rate": 1.1939555404221753e-05, "loss": 0.2454, "step": 18648, "teacher_loss": 0.2040928602218628 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.8169912099838257, "learning_rate": 1.1937331965958125e-05, "loss": 0.2681, "step": 18649, "teacher_loss": 0.20716747641563416 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.4531940817832947, "learning_rate": 1.193510859791229e-05, "loss": 0.226, "step": 18650, "teacher_loss": 0.20073404908180237 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.44506001472473145, "learning_rate": 1.1932885300135223e-05, "loss": 0.4049, "step": 18651, "teacher_loss": 0.40047693252563477 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.507017195224762, "learning_rate": 1.1930662072677904e-05, "loss": 0.2208, "step": 18652, "teacher_loss": 0.18898217380046844 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.24870118498802185, "learning_rate": 1.1928438915591304e-05, "loss": 0.2632, "step": 18653, "teacher_loss": 0.26480260491371155 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.24229568243026733, "learning_rate": 1.1926215828926384e-05, "loss": 0.207, "step": 18654, "teacher_loss": 0.20306628942489624 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.582056999206543, "learning_rate": 1.1923992812734126e-05, "loss": 0.2198, "step": 18655, "teacher_loss": 0.17957313358783722 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5569716691970825, "learning_rate": 1.1921769867065487e-05, "loss": 0.2719, "step": 18656, "teacher_loss": 0.2402779459953308 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5424741506576538, "learning_rate": 1.1919546991971437e-05, "loss": 0.3, "step": 18657, "teacher_loss": 0.2730475664138794 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.7920173406600952, "learning_rate": 1.1917324187502936e-05, "loss": 0.3083, "step": 18658, "teacher_loss": 0.25450199842453003 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5328412055969238, "learning_rate": 1.1915101453710953e-05, "loss": 0.2336, "step": 18659, "teacher_loss": 0.20040258765220642 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 1.009603500366211, "learning_rate": 1.1912878790646444e-05, "loss": 0.3899, "step": 18660, "teacher_loss": 0.32107990980148315 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.4386749863624573, "learning_rate": 1.1910656198360363e-05, "loss": 0.2409, "step": 18661, "teacher_loss": 0.21890109777450562 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.21110102534294128, "learning_rate": 1.1908433676903679e-05, "loss": 0.1698, "step": 18662, "teacher_loss": 0.165176659822464 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.508363664150238, "learning_rate": 1.1906211226327346e-05, "loss": 0.2648, "step": 18663, "teacher_loss": 0.23777025938034058 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5425838232040405, "learning_rate": 1.1903988846682301e-05, "loss": 0.2806, "step": 18664, "teacher_loss": 0.25153613090515137 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.6043205261230469, "learning_rate": 1.1901766538019521e-05, "loss": 0.5395, "step": 18665, "teacher_loss": 0.532261848449707 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.7202823162078857, "learning_rate": 1.1899544300389942e-05, "loss": 0.2975, "step": 18666, "teacher_loss": 0.2504733204841614 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5415602922439575, "learning_rate": 1.189732213384451e-05, "loss": 0.2531, "step": 18667, "teacher_loss": 0.22105497121810913 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.5099426507949829, "learning_rate": 1.1895100038434188e-05, "loss": 0.2872, "step": 18668, "teacher_loss": 0.26243531703948975 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.3420504033565521, "learning_rate": 1.189287801420991e-05, "loss": 0.2183, "step": 18669, "teacher_loss": 0.20453864336013794 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.6645707488059998, "learning_rate": 1.189065606122262e-05, "loss": 0.3154, "step": 18670, "teacher_loss": 0.27655598521232605 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.7597312927246094, "learning_rate": 1.1888434179523269e-05, "loss": 0.25, "step": 18671, "teacher_loss": 0.1933150291442871 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.7268742322921753, "learning_rate": 1.1886212369162794e-05, "loss": 0.2375, "step": 18672, "teacher_loss": 0.183174729347229 }, { "compression_loss": 0.0, "epoch": 3.37, "label_loss": 0.6598705053329468, "learning_rate": 1.1883990630192128e-05, "loss": 0.23, "step": 18673, "teacher_loss": 0.18221454322338104 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.345536470413208, "learning_rate": 1.1881768962662219e-05, "loss": 0.1599, "step": 18674, "teacher_loss": 0.13929212093353271 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.8778254389762878, "learning_rate": 1.1879547366623999e-05, "loss": 0.2356, "step": 18675, "teacher_loss": 0.16423200070858002 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.14495152235031128, "learning_rate": 1.1877325842128398e-05, "loss": 0.1695, "step": 18676, "teacher_loss": 0.17222189903259277 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.517376184463501, "learning_rate": 1.1875104389226357e-05, "loss": 0.3108, "step": 18677, "teacher_loss": 0.28788653016090393 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 1.1780054569244385, "learning_rate": 1.1872883007968808e-05, "loss": 0.2984, "step": 18678, "teacher_loss": 0.20069104433059692 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5623835921287537, "learning_rate": 1.1870661698406671e-05, "loss": 0.2576, "step": 18679, "teacher_loss": 0.22378107905387878 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.1455666869878769, "learning_rate": 1.1868440460590876e-05, "loss": 0.1617, "step": 18680, "teacher_loss": 0.16349145770072937 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.19573749601840973, "learning_rate": 1.1866219294572357e-05, "loss": 0.2109, "step": 18681, "teacher_loss": 0.2126309871673584 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5442487001419067, "learning_rate": 1.1863998200402032e-05, "loss": 0.3475, "step": 18682, "teacher_loss": 0.32569241523742676 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.4690822958946228, "learning_rate": 1.1861777178130823e-05, "loss": 0.2952, "step": 18683, "teacher_loss": 0.2759130299091339 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.33882611989974976, "learning_rate": 1.1859556227809659e-05, "loss": 0.1751, "step": 18684, "teacher_loss": 0.15694256126880646 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.8876219987869263, "learning_rate": 1.1857335349489455e-05, "loss": 0.2238, "step": 18685, "teacher_loss": 0.1500946581363678 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.4225117266178131, "learning_rate": 1.1855114543221126e-05, "loss": 0.2922, "step": 18686, "teacher_loss": 0.2777276039123535 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.3716357350349426, "learning_rate": 1.1852893809055594e-05, "loss": 0.1932, "step": 18687, "teacher_loss": 0.17339164018630981 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.45414185523986816, "learning_rate": 1.1850673147043776e-05, "loss": 0.194, "step": 18688, "teacher_loss": 0.16507622599601746 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.36255526542663574, "learning_rate": 1.184845255723657e-05, "loss": 0.1637, "step": 18689, "teacher_loss": 0.14162112772464752 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 1.1253622770309448, "learning_rate": 1.1846232039684907e-05, "loss": 0.2705, "step": 18690, "teacher_loss": 0.1755523681640625 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.3908260464668274, "learning_rate": 1.1844011594439685e-05, "loss": 0.2289, "step": 18691, "teacher_loss": 0.21087965369224548 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5511235594749451, "learning_rate": 1.1841791221551809e-05, "loss": 0.2649, "step": 18692, "teacher_loss": 0.23314881324768066 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5060559511184692, "learning_rate": 1.1839570921072201e-05, "loss": 0.2539, "step": 18693, "teacher_loss": 0.2258489429950714 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.4552042484283447, "learning_rate": 1.1837350693051754e-05, "loss": 0.2167, "step": 18694, "teacher_loss": 0.1902216225862503 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.28415727615356445, "learning_rate": 1.1835130537541369e-05, "loss": 0.2435, "step": 18695, "teacher_loss": 0.23896238207817078 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.6941683888435364, "learning_rate": 1.1832910454591956e-05, "loss": 0.2748, "step": 18696, "teacher_loss": 0.2282366305589676 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 1.0125057697296143, "learning_rate": 1.183069044425441e-05, "loss": 0.4065, "step": 18697, "teacher_loss": 0.3391650319099426 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5119051933288574, "learning_rate": 1.1828470506579631e-05, "loss": 0.2326, "step": 18698, "teacher_loss": 0.20162129402160645 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5098068714141846, "learning_rate": 1.182625064161851e-05, "loss": 0.26, "step": 18699, "teacher_loss": 0.23221921920776367 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.24741458892822266, "learning_rate": 1.182403084942195e-05, "loss": 0.2844, "step": 18700, "teacher_loss": 0.28849589824676514 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.751213550567627, "learning_rate": 1.1821811130040844e-05, "loss": 0.3532, "step": 18701, "teacher_loss": 0.30895668268203735 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.34117844700813293, "learning_rate": 1.1819591483526073e-05, "loss": 0.1769, "step": 18702, "teacher_loss": 0.15861183404922485 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.4234802722930908, "learning_rate": 1.181737190992854e-05, "loss": 0.2672, "step": 18703, "teacher_loss": 0.24980738759040833 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.2781968116760254, "learning_rate": 1.1815152409299126e-05, "loss": 0.2325, "step": 18704, "teacher_loss": 0.22739183902740479 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.33883753418922424, "learning_rate": 1.1812932981688715e-05, "loss": 0.2382, "step": 18705, "teacher_loss": 0.22704745829105377 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.3931272029876709, "learning_rate": 1.1810713627148196e-05, "loss": 0.2447, "step": 18706, "teacher_loss": 0.22826120257377625 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.6611278057098389, "learning_rate": 1.1808494345728455e-05, "loss": 0.2757, "step": 18707, "teacher_loss": 0.23289810121059418 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.39021313190460205, "learning_rate": 1.1806275137480365e-05, "loss": 0.2206, "step": 18708, "teacher_loss": 0.20180051028728485 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5294166803359985, "learning_rate": 1.1804056002454814e-05, "loss": 0.2582, "step": 18709, "teacher_loss": 0.22805221378803253 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.35141512751579285, "learning_rate": 1.1801836940702678e-05, "loss": 0.2072, "step": 18710, "teacher_loss": 0.19123144447803497 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5447932481765747, "learning_rate": 1.1799617952274829e-05, "loss": 0.264, "step": 18711, "teacher_loss": 0.23275363445281982 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.46187686920166016, "learning_rate": 1.1797399037222148e-05, "loss": 0.2268, "step": 18712, "teacher_loss": 0.20068825781345367 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.6060706377029419, "learning_rate": 1.1795180195595509e-05, "loss": 0.2154, "step": 18713, "teacher_loss": 0.17198437452316284 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5784647464752197, "learning_rate": 1.1792961427445768e-05, "loss": 0.2095, "step": 18714, "teacher_loss": 0.16853109002113342 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.6584532856941223, "learning_rate": 1.1790742732823818e-05, "loss": 0.3387, "step": 18715, "teacher_loss": 0.30313533544540405 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5176960825920105, "learning_rate": 1.178852411178051e-05, "loss": 0.2262, "step": 18716, "teacher_loss": 0.1938420534133911 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.4572313725948334, "learning_rate": 1.178630556436671e-05, "loss": 0.2197, "step": 18717, "teacher_loss": 0.1933315098285675 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.6383875608444214, "learning_rate": 1.17840870906333e-05, "loss": 0.2734, "step": 18718, "teacher_loss": 0.2328152358531952 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.4089704155921936, "learning_rate": 1.1781868690631125e-05, "loss": 0.4248, "step": 18719, "teacher_loss": 0.42656123638153076 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.44494473934173584, "learning_rate": 1.1779650364411055e-05, "loss": 0.2041, "step": 18720, "teacher_loss": 0.17730557918548584 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.6373677253723145, "learning_rate": 1.1777432112023942e-05, "loss": 0.3568, "step": 18721, "teacher_loss": 0.32558226585388184 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.4969397783279419, "learning_rate": 1.1775213933520653e-05, "loss": 0.3057, "step": 18722, "teacher_loss": 0.28441283106803894 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5830061435699463, "learning_rate": 1.1772995828952038e-05, "loss": 0.2813, "step": 18723, "teacher_loss": 0.24774686992168427 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.6131986975669861, "learning_rate": 1.1770777798368954e-05, "loss": 0.3266, "step": 18724, "teacher_loss": 0.2947281002998352 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.4338865876197815, "learning_rate": 1.1768559841822254e-05, "loss": 0.291, "step": 18725, "teacher_loss": 0.27515262365341187 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.5621605515480042, "learning_rate": 1.1766341959362792e-05, "loss": 0.2911, "step": 18726, "teacher_loss": 0.2609741687774658 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.4560585618019104, "learning_rate": 1.1764124151041406e-05, "loss": 0.2429, "step": 18727, "teacher_loss": 0.21925197541713715 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.18669575452804565, "learning_rate": 1.1761906416908959e-05, "loss": 0.152, "step": 18728, "teacher_loss": 0.14810970425605774 }, { "compression_loss": 0.0, "epoch": 3.38, "label_loss": 0.3419337272644043, "learning_rate": 1.1759688757016288e-05, "loss": 0.2104, "step": 18729, "teacher_loss": 0.19575850665569305 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.4429096579551697, "learning_rate": 1.1757471171414235e-05, "loss": 0.2262, "step": 18730, "teacher_loss": 0.20216958224773407 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.4647054970264435, "learning_rate": 1.175525366015365e-05, "loss": 0.2032, "step": 18731, "teacher_loss": 0.1740989089012146 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2213570773601532, "learning_rate": 1.175303622328537e-05, "loss": 0.2293, "step": 18732, "teacher_loss": 0.23013561964035034 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.34916478395462036, "learning_rate": 1.175081886086023e-05, "loss": 0.1751, "step": 18733, "teacher_loss": 0.15580236911773682 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.30279654264450073, "learning_rate": 1.1748601572929076e-05, "loss": 0.1496, "step": 18734, "teacher_loss": 0.13256222009658813 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.4584543704986572, "learning_rate": 1.1746384359542741e-05, "loss": 0.2553, "step": 18735, "teacher_loss": 0.23273366689682007 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.42151910066604614, "learning_rate": 1.1744167220752053e-05, "loss": 0.2476, "step": 18736, "teacher_loss": 0.22827471792697906 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.5823575258255005, "learning_rate": 1.1741950156607852e-05, "loss": 0.2555, "step": 18737, "teacher_loss": 0.2191745489835739 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.18476945161819458, "learning_rate": 1.173973316716097e-05, "loss": 0.1987, "step": 18738, "teacher_loss": 0.20022518932819366 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.7398821115493774, "learning_rate": 1.173751625246223e-05, "loss": 0.7045, "step": 18739, "teacher_loss": 0.7005237936973572 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.3821154534816742, "learning_rate": 1.1735299412562455e-05, "loss": 0.2091, "step": 18740, "teacher_loss": 0.18984994292259216 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.30538034439086914, "learning_rate": 1.173308264751248e-05, "loss": 0.2451, "step": 18741, "teacher_loss": 0.2384234070777893 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2657424211502075, "learning_rate": 1.1730865957363128e-05, "loss": 0.2131, "step": 18742, "teacher_loss": 0.20729003846645355 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.24790242314338684, "learning_rate": 1.1728649342165212e-05, "loss": 0.1868, "step": 18743, "teacher_loss": 0.18000148236751556 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.5210214853286743, "learning_rate": 1.1726432801969562e-05, "loss": 0.2818, "step": 18744, "teacher_loss": 0.2552551329135895 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.5517781376838684, "learning_rate": 1.1724216336826994e-05, "loss": 0.2443, "step": 18745, "teacher_loss": 0.2101515382528305 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.32161837816238403, "learning_rate": 1.172199994678832e-05, "loss": 0.2124, "step": 18746, "teacher_loss": 0.20023053884506226 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.6778246164321899, "learning_rate": 1.1719783631904364e-05, "loss": 0.2216, "step": 18747, "teacher_loss": 0.17085419595241547 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2519404888153076, "learning_rate": 1.1717567392225934e-05, "loss": 0.177, "step": 18748, "teacher_loss": 0.1687122881412506 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.18910522758960724, "learning_rate": 1.1715351227803838e-05, "loss": 0.1924, "step": 18749, "teacher_loss": 0.1927390992641449 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.5733964443206787, "learning_rate": 1.1713135138688894e-05, "loss": 0.5271, "step": 18750, "teacher_loss": 0.521993100643158 }, { "epoch": 3.39, "eval_exact_match": 79.99053926206244, "eval_f1": 87.33547061003178, "step": 18750 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2888983488082886, "learning_rate": 1.1710919124931912e-05, "loss": 0.2211, "step": 18751, "teacher_loss": 0.21358919143676758 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.14913861453533173, "learning_rate": 1.1708703186583682e-05, "loss": 0.1729, "step": 18752, "teacher_loss": 0.17550814151763916 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.17416149377822876, "learning_rate": 1.1706487323695032e-05, "loss": 0.2119, "step": 18753, "teacher_loss": 0.21605046093463898 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.5031924843788147, "learning_rate": 1.1704271536316747e-05, "loss": 0.2446, "step": 18754, "teacher_loss": 0.21586453914642334 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 1.1033008098602295, "learning_rate": 1.1702055824499634e-05, "loss": 0.393, "step": 18755, "teacher_loss": 0.3140692710876465 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.614800214767456, "learning_rate": 1.1699840188294493e-05, "loss": 0.4199, "step": 18756, "teacher_loss": 0.39829424023628235 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.3757333755493164, "learning_rate": 1.1697624627752125e-05, "loss": 0.1729, "step": 18757, "teacher_loss": 0.15040722489356995 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.3248414397239685, "learning_rate": 1.169540914292332e-05, "loss": 0.1741, "step": 18758, "teacher_loss": 0.15730169415473938 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.716118335723877, "learning_rate": 1.1693193733858877e-05, "loss": 0.2567, "step": 18759, "teacher_loss": 0.20560383796691895 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.21993760764598846, "learning_rate": 1.169097840060959e-05, "loss": 0.1474, "step": 18760, "teacher_loss": 0.1393035352230072 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.46032804250717163, "learning_rate": 1.1688763143226247e-05, "loss": 0.1968, "step": 18761, "teacher_loss": 0.16754159331321716 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.5101932287216187, "learning_rate": 1.1686547961759629e-05, "loss": 0.2438, "step": 18762, "teacher_loss": 0.21421505510807037 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2799411416053772, "learning_rate": 1.1684332856260543e-05, "loss": 0.1226, "step": 18763, "teacher_loss": 0.10516917705535889 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.7636371850967407, "learning_rate": 1.1682117826779759e-05, "loss": 0.5698, "step": 18764, "teacher_loss": 0.5483071208000183 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.32063624262809753, "learning_rate": 1.167990287336806e-05, "loss": 0.2153, "step": 18765, "teacher_loss": 0.2036440670490265 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2634209096431732, "learning_rate": 1.1677687996076244e-05, "loss": 0.1549, "step": 18766, "teacher_loss": 0.14281633496284485 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2867833077907562, "learning_rate": 1.1675473194955078e-05, "loss": 0.1782, "step": 18767, "teacher_loss": 0.16610780358314514 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.3051109313964844, "learning_rate": 1.1673258470055341e-05, "loss": 0.213, "step": 18768, "teacher_loss": 0.20281875133514404 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.3268401622772217, "learning_rate": 1.1671043821427817e-05, "loss": 0.2527, "step": 18769, "teacher_loss": 0.24445046484470367 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.9010107517242432, "learning_rate": 1.1668829249123276e-05, "loss": 0.673, "step": 18770, "teacher_loss": 0.6476960778236389 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.6698606014251709, "learning_rate": 1.166661475319249e-05, "loss": 0.2396, "step": 18771, "teacher_loss": 0.19180506467819214 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2742948532104492, "learning_rate": 1.1664400333686237e-05, "loss": 0.1782, "step": 18772, "teacher_loss": 0.16748803853988647 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.8050655126571655, "learning_rate": 1.1662185990655285e-05, "loss": 0.2999, "step": 18773, "teacher_loss": 0.2437790334224701 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.4336487650871277, "learning_rate": 1.1659971724150397e-05, "loss": 0.2502, "step": 18774, "teacher_loss": 0.22984032332897186 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2619550824165344, "learning_rate": 1.1657757534222346e-05, "loss": 0.1686, "step": 18775, "teacher_loss": 0.15818355977535248 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.3273872137069702, "learning_rate": 1.1655543420921898e-05, "loss": 0.2071, "step": 18776, "teacher_loss": 0.19372232258319855 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.5201416015625, "learning_rate": 1.1653329384299804e-05, "loss": 0.2211, "step": 18777, "teacher_loss": 0.18786412477493286 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.5441088080406189, "learning_rate": 1.165111542440684e-05, "loss": 0.2664, "step": 18778, "teacher_loss": 0.2355455756187439 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.18394622206687927, "learning_rate": 1.1648901541293758e-05, "loss": 0.1833, "step": 18779, "teacher_loss": 0.18328258395195007 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.1553734540939331, "learning_rate": 1.1646687735011311e-05, "loss": 0.1751, "step": 18780, "teacher_loss": 0.17725330591201782 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2886493504047394, "learning_rate": 1.1644474005610266e-05, "loss": 0.2365, "step": 18781, "teacher_loss": 0.2307392954826355 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2710658311843872, "learning_rate": 1.164226035314137e-05, "loss": 0.1925, "step": 18782, "teacher_loss": 0.1837441474199295 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.4072994589805603, "learning_rate": 1.1640046777655378e-05, "loss": 0.2129, "step": 18783, "teacher_loss": 0.19130247831344604 }, { "compression_loss": 0.0, "epoch": 3.39, "label_loss": 0.2973078787326813, "learning_rate": 1.1637833279203036e-05, "loss": 0.2579, "step": 18784, "teacher_loss": 0.2534712255001068 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.4799741804599762, "learning_rate": 1.1635619857835102e-05, "loss": 0.3005, "step": 18785, "teacher_loss": 0.28051507472991943 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.3238498270511627, "learning_rate": 1.1633406513602318e-05, "loss": 0.2517, "step": 18786, "teacher_loss": 0.24368642270565033 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.5011175870895386, "learning_rate": 1.1631193246555422e-05, "loss": 0.2745, "step": 18787, "teacher_loss": 0.2493211328983307 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.3754923641681671, "learning_rate": 1.1628980056745172e-05, "loss": 0.1807, "step": 18788, "teacher_loss": 0.1590133011341095 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.23842690885066986, "learning_rate": 1.16267669442223e-05, "loss": 0.1986, "step": 18789, "teacher_loss": 0.19420309364795685 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.24969080090522766, "learning_rate": 1.1624553909037541e-05, "loss": 0.1678, "step": 18790, "teacher_loss": 0.158721923828125 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.6991925835609436, "learning_rate": 1.1622340951241652e-05, "loss": 0.2062, "step": 18791, "teacher_loss": 0.15144211053848267 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.2693486511707306, "learning_rate": 1.1620128070885354e-05, "loss": 0.2945, "step": 18792, "teacher_loss": 0.2973131537437439 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.908923327922821, "learning_rate": 1.1617915268019385e-05, "loss": 0.3342, "step": 18793, "teacher_loss": 0.2703251838684082 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.3230920135974884, "learning_rate": 1.161570254269448e-05, "loss": 0.1594, "step": 18794, "teacher_loss": 0.14115728437900543 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.757338285446167, "learning_rate": 1.161348989496137e-05, "loss": 0.2312, "step": 18795, "teacher_loss": 0.17272251844406128 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.4868299961090088, "learning_rate": 1.1611277324870778e-05, "loss": 0.3613, "step": 18796, "teacher_loss": 0.3473135828971863 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.32114261388778687, "learning_rate": 1.1609064832473442e-05, "loss": 0.2353, "step": 18797, "teacher_loss": 0.22577497363090515 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.34793853759765625, "learning_rate": 1.1606852417820085e-05, "loss": 0.1965, "step": 18798, "teacher_loss": 0.1796720325946808 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.20068097114562988, "learning_rate": 1.1604640080961422e-05, "loss": 0.2359, "step": 18799, "teacher_loss": 0.2398664951324463 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.3314172327518463, "learning_rate": 1.1602427821948187e-05, "loss": 0.1943, "step": 18800, "teacher_loss": 0.17908601462841034 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.48195499181747437, "learning_rate": 1.1600215640831098e-05, "loss": 0.2332, "step": 18801, "teacher_loss": 0.2056141197681427 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.2318498194217682, "learning_rate": 1.159800353766087e-05, "loss": 0.1417, "step": 18802, "teacher_loss": 0.1316973865032196 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.4067096710205078, "learning_rate": 1.1595791512488213e-05, "loss": 0.2447, "step": 18803, "teacher_loss": 0.226697638630867 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.6279998421669006, "learning_rate": 1.159357956536386e-05, "loss": 0.2522, "step": 18804, "teacher_loss": 0.21049439907073975 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.6382004022598267, "learning_rate": 1.1591367696338512e-05, "loss": 0.2299, "step": 18805, "teacher_loss": 0.18451760709285736 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.4014244079589844, "learning_rate": 1.1589155905462878e-05, "loss": 0.2257, "step": 18806, "teacher_loss": 0.20615805685520172 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.28749698400497437, "learning_rate": 1.1586944192787678e-05, "loss": 0.271, "step": 18807, "teacher_loss": 0.26920080184936523 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.6083661317825317, "learning_rate": 1.1584732558363613e-05, "loss": 0.291, "step": 18808, "teacher_loss": 0.25577524304389954 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.39062899351119995, "learning_rate": 1.158252100224139e-05, "loss": 0.196, "step": 18809, "teacher_loss": 0.17433369159698486 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.6228368878364563, "learning_rate": 1.1580309524471718e-05, "loss": 0.2506, "step": 18810, "teacher_loss": 0.20923474431037903 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 1.1299842596054077, "learning_rate": 1.1578098125105297e-05, "loss": 0.3197, "step": 18811, "teacher_loss": 0.2296578735113144 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.41368335485458374, "learning_rate": 1.1575886804192816e-05, "loss": 0.4057, "step": 18812, "teacher_loss": 0.4048406481742859 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.7156052589416504, "learning_rate": 1.1573675561784998e-05, "loss": 0.2779, "step": 18813, "teacher_loss": 0.22924241423606873 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.4381047785282135, "learning_rate": 1.157146439793252e-05, "loss": 0.1563, "step": 18814, "teacher_loss": 0.1250041127204895 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.4445344805717468, "learning_rate": 1.156925331268608e-05, "loss": 0.2076, "step": 18815, "teacher_loss": 0.18125315010547638 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.272841215133667, "learning_rate": 1.1567042306096383e-05, "loss": 0.1866, "step": 18816, "teacher_loss": 0.1770443618297577 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.5708388090133667, "learning_rate": 1.1564831378214112e-05, "loss": 0.1979, "step": 18817, "teacher_loss": 0.15647542476654053 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.21256472170352936, "learning_rate": 1.1562620529089955e-05, "loss": 0.1946, "step": 18818, "teacher_loss": 0.19255292415618896 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.5720242261886597, "learning_rate": 1.1560409758774606e-05, "loss": 0.3007, "step": 18819, "teacher_loss": 0.2705628275871277 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.37271326780319214, "learning_rate": 1.155819906731875e-05, "loss": 0.2058, "step": 18820, "teacher_loss": 0.18724578619003296 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.5156081914901733, "learning_rate": 1.1555988454773065e-05, "loss": 0.2233, "step": 18821, "teacher_loss": 0.19086718559265137 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.46330782771110535, "learning_rate": 1.1553777921188244e-05, "loss": 0.2426, "step": 18822, "teacher_loss": 0.2180497646331787 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.36449992656707764, "learning_rate": 1.1551567466614961e-05, "loss": 0.2528, "step": 18823, "teacher_loss": 0.24043524265289307 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.4515899419784546, "learning_rate": 1.1549357091103902e-05, "loss": 0.2779, "step": 18824, "teacher_loss": 0.2586180865764618 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.38402336835861206, "learning_rate": 1.154714679470573e-05, "loss": 0.2219, "step": 18825, "teacher_loss": 0.20390555262565613 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.17274367809295654, "learning_rate": 1.1544936577471138e-05, "loss": 0.2022, "step": 18826, "teacher_loss": 0.20545849204063416 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.16242387890815735, "learning_rate": 1.1542726439450788e-05, "loss": 0.2131, "step": 18827, "teacher_loss": 0.218703031539917 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.49619704484939575, "learning_rate": 1.1540516380695354e-05, "loss": 0.4296, "step": 18828, "teacher_loss": 0.4221862256526947 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.4356083571910858, "learning_rate": 1.153830640125551e-05, "loss": 0.2038, "step": 18829, "teacher_loss": 0.17801763117313385 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.3323622941970825, "learning_rate": 1.1536096501181921e-05, "loss": 0.2902, "step": 18830, "teacher_loss": 0.28548404574394226 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.9321970343589783, "learning_rate": 1.153388668052525e-05, "loss": 0.2885, "step": 18831, "teacher_loss": 0.2169387936592102 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.2834605574607849, "learning_rate": 1.153167693933617e-05, "loss": 0.1637, "step": 18832, "teacher_loss": 0.15038307011127472 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.7191187739372253, "learning_rate": 1.1529467277665339e-05, "loss": 0.2987, "step": 18833, "teacher_loss": 0.25203484296798706 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.47329574823379517, "learning_rate": 1.1527257695563415e-05, "loss": 0.2553, "step": 18834, "teacher_loss": 0.23102495074272156 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.29870399832725525, "learning_rate": 1.1525048193081062e-05, "loss": 0.3432, "step": 18835, "teacher_loss": 0.34815606474876404 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.6253160238265991, "learning_rate": 1.1522838770268941e-05, "loss": 0.226, "step": 18836, "teacher_loss": 0.18161892890930176 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.3708431124687195, "learning_rate": 1.1520629427177691e-05, "loss": 0.2483, "step": 18837, "teacher_loss": 0.23467296361923218 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.42725205421447754, "learning_rate": 1.1518420163857984e-05, "loss": 0.2034, "step": 18838, "teacher_loss": 0.178545743227005 }, { "compression_loss": 0.0, "epoch": 3.4, "label_loss": 0.14623664319515228, "learning_rate": 1.1516210980360465e-05, "loss": 0.1797, "step": 18839, "teacher_loss": 0.1834130734205246 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.22204023599624634, "learning_rate": 1.1514001876735775e-05, "loss": 0.1841, "step": 18840, "teacher_loss": 0.17983055114746094 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.7856538891792297, "learning_rate": 1.151179285303458e-05, "loss": 0.2888, "step": 18841, "teacher_loss": 0.23359917104244232 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.42608118057250977, "learning_rate": 1.1509583909307513e-05, "loss": 0.2135, "step": 18842, "teacher_loss": 0.18987855315208435 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.7350302338600159, "learning_rate": 1.1507375045605224e-05, "loss": 0.2659, "step": 18843, "teacher_loss": 0.2137278914451599 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4052891731262207, "learning_rate": 1.150516626197835e-05, "loss": 0.1503, "step": 18844, "teacher_loss": 0.12199300527572632 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 1.2065277099609375, "learning_rate": 1.1502957558477537e-05, "loss": 0.2977, "step": 18845, "teacher_loss": 0.19666868448257446 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4308619201183319, "learning_rate": 1.1500748935153423e-05, "loss": 0.2302, "step": 18846, "teacher_loss": 0.20790283381938934 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.2771860957145691, "learning_rate": 1.1498540392056645e-05, "loss": 0.1321, "step": 18847, "teacher_loss": 0.1160154789686203 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4189469814300537, "learning_rate": 1.1496331929237837e-05, "loss": 0.2548, "step": 18848, "teacher_loss": 0.23658421635627747 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.12749043107032776, "learning_rate": 1.1494123546747638e-05, "loss": 0.1811, "step": 18849, "teacher_loss": 0.18701088428497314 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.32508015632629395, "learning_rate": 1.1491915244636665e-05, "loss": 0.2438, "step": 18850, "teacher_loss": 0.2347555309534073 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.6691660284996033, "learning_rate": 1.1489707022955566e-05, "loss": 0.2684, "step": 18851, "teacher_loss": 0.2238641381263733 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.21738111972808838, "learning_rate": 1.148749888175496e-05, "loss": 0.1561, "step": 18852, "teacher_loss": 0.14927466213703156 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.6501132249832153, "learning_rate": 1.1485290821085468e-05, "loss": 0.2127, "step": 18853, "teacher_loss": 0.16410604119300842 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.49390333890914917, "learning_rate": 1.1483082840997725e-05, "loss": 0.3164, "step": 18854, "teacher_loss": 0.2966894805431366 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4720699191093445, "learning_rate": 1.1480874941542347e-05, "loss": 0.266, "step": 18855, "teacher_loss": 0.24312396347522736 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.287192702293396, "learning_rate": 1.1478667122769954e-05, "loss": 0.2206, "step": 18856, "teacher_loss": 0.21320399641990662 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.3090425133705139, "learning_rate": 1.1476459384731168e-05, "loss": 0.1725, "step": 18857, "teacher_loss": 0.15732666850090027 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.2239268720149994, "learning_rate": 1.1474251727476604e-05, "loss": 0.133, "step": 18858, "teacher_loss": 0.12293249368667603 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.45105957984924316, "learning_rate": 1.1472044151056873e-05, "loss": 0.2648, "step": 18859, "teacher_loss": 0.24412043392658234 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 1.0731204748153687, "learning_rate": 1.14698366555226e-05, "loss": 0.4085, "step": 18860, "teacher_loss": 0.3346370458602905 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.47917550802230835, "learning_rate": 1.1467629240924388e-05, "loss": 0.247, "step": 18861, "teacher_loss": 0.22118344902992249 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.7350554466247559, "learning_rate": 1.1465421907312836e-05, "loss": 0.2628, "step": 18862, "teacher_loss": 0.21031107008457184 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.5233379602432251, "learning_rate": 1.1463214654738574e-05, "loss": 0.3276, "step": 18863, "teacher_loss": 0.3058304786682129 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.3252178430557251, "learning_rate": 1.1461007483252193e-05, "loss": 0.2134, "step": 18864, "teacher_loss": 0.20097842812538147 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.2454577088356018, "learning_rate": 1.1458800392904301e-05, "loss": 0.2453, "step": 18865, "teacher_loss": 0.24530914425849915 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.46905744075775146, "learning_rate": 1.1456593383745494e-05, "loss": 0.2399, "step": 18866, "teacher_loss": 0.21445605158805847 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.2620818316936493, "learning_rate": 1.1454386455826381e-05, "loss": 0.1598, "step": 18867, "teacher_loss": 0.14843472838401794 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.20799601078033447, "learning_rate": 1.1452179609197555e-05, "loss": 0.2535, "step": 18868, "teacher_loss": 0.25855886936187744 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.3932074010372162, "learning_rate": 1.1449972843909612e-05, "loss": 0.2104, "step": 18869, "teacher_loss": 0.19013071060180664 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.43170738220214844, "learning_rate": 1.1447766160013151e-05, "loss": 0.2089, "step": 18870, "teacher_loss": 0.18413732945919037 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4197104871273041, "learning_rate": 1.1445559557558762e-05, "loss": 0.26, "step": 18871, "teacher_loss": 0.24226155877113342 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4333374500274658, "learning_rate": 1.1443353036597032e-05, "loss": 0.213, "step": 18872, "teacher_loss": 0.18848282098770142 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.8702367544174194, "learning_rate": 1.1441146597178557e-05, "loss": 0.3089, "step": 18873, "teacher_loss": 0.24652233719825745 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4325931966304779, "learning_rate": 1.1438940239353924e-05, "loss": 0.3264, "step": 18874, "teacher_loss": 0.31460779905319214 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.22639372944831848, "learning_rate": 1.1436733963173704e-05, "loss": 0.2599, "step": 18875, "teacher_loss": 0.26357021927833557 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.5459451675415039, "learning_rate": 1.1434527768688503e-05, "loss": 0.3696, "step": 18876, "teacher_loss": 0.3500426411628723 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.9373420476913452, "learning_rate": 1.1432321655948885e-05, "loss": 0.2586, "step": 18877, "teacher_loss": 0.18318231403827667 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.6682285070419312, "learning_rate": 1.143011562500543e-05, "loss": 0.2774, "step": 18878, "teacher_loss": 0.2339474856853485 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.3376731872558594, "learning_rate": 1.1427909675908728e-05, "loss": 0.2713, "step": 18879, "teacher_loss": 0.26393935084342957 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.5625085234642029, "learning_rate": 1.1425703808709344e-05, "loss": 0.2991, "step": 18880, "teacher_loss": 0.2698010206222534 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.27605465054512024, "learning_rate": 1.1423498023457855e-05, "loss": 0.1733, "step": 18881, "teacher_loss": 0.16189351677894592 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.27516597509384155, "learning_rate": 1.1421292320204836e-05, "loss": 0.2262, "step": 18882, "teacher_loss": 0.22075891494750977 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.3369408845901489, "learning_rate": 1.1419086699000854e-05, "loss": 0.2466, "step": 18883, "teacher_loss": 0.23659370839595795 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.48085057735443115, "learning_rate": 1.1416881159896473e-05, "loss": 0.221, "step": 18884, "teacher_loss": 0.19208906590938568 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.19140680134296417, "learning_rate": 1.141467570294227e-05, "loss": 0.2049, "step": 18885, "teacher_loss": 0.20636822283267975 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.5554202198982239, "learning_rate": 1.1412470328188807e-05, "loss": 0.2237, "step": 18886, "teacher_loss": 0.18686646223068237 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4390782117843628, "learning_rate": 1.1410265035686639e-05, "loss": 0.3179, "step": 18887, "teacher_loss": 0.3044508695602417 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.652686357498169, "learning_rate": 1.1408059825486324e-05, "loss": 0.3145, "step": 18888, "teacher_loss": 0.2768961489200592 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.47595202922821045, "learning_rate": 1.1405854697638439e-05, "loss": 0.1954, "step": 18889, "teacher_loss": 0.16426897048950195 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.8647781610488892, "learning_rate": 1.1403649652193526e-05, "loss": 0.6722, "step": 18890, "teacher_loss": 0.6508535146713257 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.46104782819747925, "learning_rate": 1.140144468920214e-05, "loss": 0.3176, "step": 18891, "teacher_loss": 0.30160611867904663 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4557260274887085, "learning_rate": 1.1399239808714843e-05, "loss": 0.2449, "step": 18892, "teacher_loss": 0.22148439288139343 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4610227644443512, "learning_rate": 1.139703501078218e-05, "loss": 0.1951, "step": 18893, "teacher_loss": 0.16556695103645325 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.4284716248512268, "learning_rate": 1.1394830295454701e-05, "loss": 0.228, "step": 18894, "teacher_loss": 0.20577165484428406 }, { "compression_loss": 0.0, "epoch": 3.41, "label_loss": 0.17895889282226562, "learning_rate": 1.1392625662782958e-05, "loss": 0.1523, "step": 18895, "teacher_loss": 0.14929591119289398 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.4584330916404724, "learning_rate": 1.139042111281749e-05, "loss": 0.2571, "step": 18896, "teacher_loss": 0.2346741408109665 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.5824363827705383, "learning_rate": 1.1388216645608842e-05, "loss": 0.2321, "step": 18897, "teacher_loss": 0.19319449365139008 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.35860949754714966, "learning_rate": 1.1386012261207561e-05, "loss": 0.1794, "step": 18898, "teacher_loss": 0.1595376580953598 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.29824715852737427, "learning_rate": 1.1383807959664189e-05, "loss": 0.2217, "step": 18899, "teacher_loss": 0.2132459282875061 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.807745099067688, "learning_rate": 1.1381603741029247e-05, "loss": 0.2959, "step": 18900, "teacher_loss": 0.23897285759449005 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.20956961810588837, "learning_rate": 1.137939960535329e-05, "loss": 0.2259, "step": 18901, "teacher_loss": 0.22776854038238525 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.6507896184921265, "learning_rate": 1.1377195552686845e-05, "loss": 0.3497, "step": 18902, "teacher_loss": 0.316256046295166 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.38403183221817017, "learning_rate": 1.1374991583080441e-05, "loss": 0.1905, "step": 18903, "teacher_loss": 0.1689785271883011 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.3045749366283417, "learning_rate": 1.1372787696584612e-05, "loss": 0.2466, "step": 18904, "teacher_loss": 0.24017807841300964 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.40457355976104736, "learning_rate": 1.1370583893249888e-05, "loss": 0.1895, "step": 18905, "teacher_loss": 0.16555386781692505 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.3533029556274414, "learning_rate": 1.1368380173126793e-05, "loss": 0.1632, "step": 18906, "teacher_loss": 0.14205724000930786 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.49052295088768005, "learning_rate": 1.1366176536265852e-05, "loss": 0.2564, "step": 18907, "teacher_loss": 0.23042628169059753 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.4184781610965729, "learning_rate": 1.1363972982717588e-05, "loss": 0.211, "step": 18908, "teacher_loss": 0.18789513409137726 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.37580859661102295, "learning_rate": 1.1361769512532529e-05, "loss": 0.2091, "step": 18909, "teacher_loss": 0.19052281975746155 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.6779211163520813, "learning_rate": 1.1359566125761173e-05, "loss": 0.2784, "step": 18910, "teacher_loss": 0.23397746682167053 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.36147624254226685, "learning_rate": 1.1357362822454062e-05, "loss": 0.2438, "step": 18911, "teacher_loss": 0.23074783384799957 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.23682788014411926, "learning_rate": 1.13551596026617e-05, "loss": 0.19, "step": 18912, "teacher_loss": 0.18476463854312897 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.35913145542144775, "learning_rate": 1.1352956466434592e-05, "loss": 0.188, "step": 18913, "teacher_loss": 0.16894227266311646 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.5400506854057312, "learning_rate": 1.1350753413823269e-05, "loss": 0.295, "step": 18914, "teacher_loss": 0.26782697439193726 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.1977507621049881, "learning_rate": 1.1348550444878224e-05, "loss": 0.1735, "step": 18915, "teacher_loss": 0.1708141267299652 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.2568507194519043, "learning_rate": 1.1346347559649966e-05, "loss": 0.2768, "step": 18916, "teacher_loss": 0.2790156602859497 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.6909016370773315, "learning_rate": 1.1344144758189009e-05, "loss": 0.2409, "step": 18917, "teacher_loss": 0.19092166423797607 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.7165605425834656, "learning_rate": 1.134194204054585e-05, "loss": 0.3382, "step": 18918, "teacher_loss": 0.2961709499359131 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.577447235584259, "learning_rate": 1.133973940677099e-05, "loss": 0.2308, "step": 18919, "teacher_loss": 0.192308709025383 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.2538950443267822, "learning_rate": 1.1337536856914938e-05, "loss": 0.196, "step": 18920, "teacher_loss": 0.18960469961166382 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.7067152261734009, "learning_rate": 1.1335334391028182e-05, "loss": 0.25, "step": 18921, "teacher_loss": 0.19928410649299622 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.7966921329498291, "learning_rate": 1.1333132009161218e-05, "loss": 0.3498, "step": 18922, "teacher_loss": 0.3001983165740967 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.7410800457000732, "learning_rate": 1.1330929711364547e-05, "loss": 0.2545, "step": 18923, "teacher_loss": 0.2004774808883667 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.6805134415626526, "learning_rate": 1.1328727497688663e-05, "loss": 0.2516, "step": 18924, "teacher_loss": 0.20390653610229492 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.7523657083511353, "learning_rate": 1.1326525368184037e-05, "loss": 0.31, "step": 18925, "teacher_loss": 0.260883092880249 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.23981522023677826, "learning_rate": 1.1324323322901181e-05, "loss": 0.1549, "step": 18926, "teacher_loss": 0.14540995657444 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.506084144115448, "learning_rate": 1.132212136189057e-05, "loss": 0.2565, "step": 18927, "teacher_loss": 0.228807270526886 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 1.0514582395553589, "learning_rate": 1.1319919485202687e-05, "loss": 0.3752, "step": 18928, "teacher_loss": 0.3000204861164093 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.39114874601364136, "learning_rate": 1.1317717692888014e-05, "loss": 0.2489, "step": 18929, "teacher_loss": 0.23314209282398224 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.46142902970314026, "learning_rate": 1.1315515984997038e-05, "loss": 0.3135, "step": 18930, "teacher_loss": 0.29708659648895264 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.5988812446594238, "learning_rate": 1.1313314361580234e-05, "loss": 0.2301, "step": 18931, "teacher_loss": 0.1890873908996582 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.4074750542640686, "learning_rate": 1.1311112822688074e-05, "loss": 0.1883, "step": 18932, "teacher_loss": 0.16400037705898285 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.41760775446891785, "learning_rate": 1.130891136837104e-05, "loss": 0.2281, "step": 18933, "teacher_loss": 0.2070143222808838 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.6495864391326904, "learning_rate": 1.1306709998679606e-05, "loss": 0.263, "step": 18934, "teacher_loss": 0.22009284794330597 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.3988271653652191, "learning_rate": 1.130450871366423e-05, "loss": 0.2261, "step": 18935, "teacher_loss": 0.20685747265815735 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.4406070113182068, "learning_rate": 1.1302307513375398e-05, "loss": 0.189, "step": 18936, "teacher_loss": 0.16103070974349976 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.34024372696876526, "learning_rate": 1.1300106397863566e-05, "loss": 0.1437, "step": 18937, "teacher_loss": 0.12190777063369751 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.501099705696106, "learning_rate": 1.1297905367179194e-05, "loss": 0.2107, "step": 18938, "teacher_loss": 0.17847856879234314 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.4490947723388672, "learning_rate": 1.1295704421372762e-05, "loss": 0.2429, "step": 18939, "teacher_loss": 0.21994373202323914 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.2901543378829956, "learning_rate": 1.129350356049472e-05, "loss": 0.1946, "step": 18940, "teacher_loss": 0.18396006524562836 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.36750486493110657, "learning_rate": 1.1291302784595525e-05, "loss": 0.2405, "step": 18941, "teacher_loss": 0.22644253075122833 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.7167357206344604, "learning_rate": 1.128910209372564e-05, "loss": 0.2832, "step": 18942, "teacher_loss": 0.23507775366306305 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.5238017439842224, "learning_rate": 1.128690148793552e-05, "loss": 0.1688, "step": 18943, "teacher_loss": 0.1293947696685791 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.5010131597518921, "learning_rate": 1.128470096727561e-05, "loss": 0.2643, "step": 18944, "teacher_loss": 0.23796433210372925 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.43500784039497375, "learning_rate": 1.1282500531796374e-05, "loss": 0.188, "step": 18945, "teacher_loss": 0.16054050624370575 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.312985360622406, "learning_rate": 1.1280300181548254e-05, "loss": 0.205, "step": 18946, "teacher_loss": 0.19297286868095398 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.2814686894416809, "learning_rate": 1.1278099916581696e-05, "loss": 0.1815, "step": 18947, "teacher_loss": 0.17043611407279968 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.457425981760025, "learning_rate": 1.127589973694715e-05, "loss": 0.2391, "step": 18948, "teacher_loss": 0.21484504640102386 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.27101561427116394, "learning_rate": 1.1273699642695062e-05, "loss": 0.1778, "step": 18949, "teacher_loss": 0.16740021109580994 }, { "compression_loss": 0.0, "epoch": 3.42, "label_loss": 0.5963525772094727, "learning_rate": 1.1271499633875865e-05, "loss": 0.316, "step": 18950, "teacher_loss": 0.28483080863952637 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.29575690627098083, "learning_rate": 1.1269299710540001e-05, "loss": 0.2006, "step": 18951, "teacher_loss": 0.1899731457233429 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.48723265528678894, "learning_rate": 1.1267099872737912e-05, "loss": 0.2149, "step": 18952, "teacher_loss": 0.18462856113910675 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.39204859733581543, "learning_rate": 1.126490012052003e-05, "loss": 0.3879, "step": 18953, "teacher_loss": 0.3874187767505646 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.36898645758628845, "learning_rate": 1.1262700453936788e-05, "loss": 0.2318, "step": 18954, "teacher_loss": 0.21651917695999146 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.5090503096580505, "learning_rate": 1.1260500873038622e-05, "loss": 0.2543, "step": 18955, "teacher_loss": 0.22600474953651428 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.8451501131057739, "learning_rate": 1.125830137787596e-05, "loss": 0.3483, "step": 18956, "teacher_loss": 0.2931361198425293 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.3353084921836853, "learning_rate": 1.1256101968499225e-05, "loss": 0.2914, "step": 18957, "teacher_loss": 0.28654128313064575 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.46174758672714233, "learning_rate": 1.1253902644958851e-05, "loss": 0.2772, "step": 18958, "teacher_loss": 0.2567267119884491 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.7253978252410889, "learning_rate": 1.1251703407305262e-05, "loss": 0.2254, "step": 18959, "teacher_loss": 0.16980654001235962 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.7977887392044067, "learning_rate": 1.1249504255588865e-05, "loss": 0.2797, "step": 18960, "teacher_loss": 0.22212207317352295 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.5071800947189331, "learning_rate": 1.1247305189860101e-05, "loss": 0.2219, "step": 18961, "teacher_loss": 0.19021233916282654 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.061401158571243286, "learning_rate": 1.1245106210169374e-05, "loss": 0.0904, "step": 18962, "teacher_loss": 0.09357604384422302 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.29076728224754333, "learning_rate": 1.12429073165671e-05, "loss": 0.1559, "step": 18963, "teacher_loss": 0.14086532592773438 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.172760009765625, "learning_rate": 1.1240708509103702e-05, "loss": 0.1478, "step": 18964, "teacher_loss": 0.14499524235725403 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.1276783049106598, "learning_rate": 1.1238509787829587e-05, "loss": 0.1806, "step": 18965, "teacher_loss": 0.18645638227462769 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.5798298120498657, "learning_rate": 1.1236311152795162e-05, "loss": 0.2034, "step": 18966, "teacher_loss": 0.16161195933818817 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.2813561260700226, "learning_rate": 1.1234112604050839e-05, "loss": 0.2153, "step": 18967, "teacher_loss": 0.20792612433433533 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.35899436473846436, "learning_rate": 1.1231914141647024e-05, "loss": 0.2199, "step": 18968, "teacher_loss": 0.20440340042114258 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.6920740604400635, "learning_rate": 1.1229715765634122e-05, "loss": 0.2215, "step": 18969, "teacher_loss": 0.16922463476657867 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.9821692109107971, "learning_rate": 1.122751747606253e-05, "loss": 0.388, "step": 18970, "teacher_loss": 0.3219813406467438 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.3911381959915161, "learning_rate": 1.1225319272982655e-05, "loss": 0.2089, "step": 18971, "teacher_loss": 0.1886652559041977 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.35358232259750366, "learning_rate": 1.1223121156444893e-05, "loss": 0.1922, "step": 18972, "teacher_loss": 0.17421412467956543 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.3133712410926819, "learning_rate": 1.1220923126499632e-05, "loss": 0.1976, "step": 18973, "teacher_loss": 0.184731125831604 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.1710585355758667, "learning_rate": 1.1218725183197282e-05, "loss": 0.1833, "step": 18974, "teacher_loss": 0.18468737602233887 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.28254732489585876, "learning_rate": 1.1216527326588222e-05, "loss": 0.2421, "step": 18975, "teacher_loss": 0.23765155673027039 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.24278013408184052, "learning_rate": 1.1214329556722846e-05, "loss": 0.1977, "step": 18976, "teacher_loss": 0.19265007972717285 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.26353296637535095, "learning_rate": 1.1212131873651546e-05, "loss": 0.1768, "step": 18977, "teacher_loss": 0.16713783144950867 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.34973233938217163, "learning_rate": 1.1209934277424705e-05, "loss": 0.2086, "step": 18978, "teacher_loss": 0.19293570518493652 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.6932762861251831, "learning_rate": 1.1207736768092707e-05, "loss": 0.3375, "step": 18979, "teacher_loss": 0.2980068624019623 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.4084545075893402, "learning_rate": 1.1205539345705935e-05, "loss": 0.2911, "step": 18980, "teacher_loss": 0.27811479568481445 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.45971405506134033, "learning_rate": 1.1203342010314772e-05, "loss": 0.2185, "step": 18981, "teacher_loss": 0.19168393313884735 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.7347719669342041, "learning_rate": 1.1201144761969591e-05, "loss": 0.2961, "step": 18982, "teacher_loss": 0.24735824763774872 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.4427201449871063, "learning_rate": 1.119894760072077e-05, "loss": 0.2618, "step": 18983, "teacher_loss": 0.24167536199092865 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.27861031889915466, "learning_rate": 1.1196750526618692e-05, "loss": 0.2102, "step": 18984, "teacher_loss": 0.20259132981300354 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.2403203397989273, "learning_rate": 1.119455353971371e-05, "loss": 0.3072, "step": 18985, "teacher_loss": 0.31458765268325806 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.29633820056915283, "learning_rate": 1.1192356640056216e-05, "loss": 0.1981, "step": 18986, "teacher_loss": 0.18721434473991394 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.36018747091293335, "learning_rate": 1.1190159827696565e-05, "loss": 0.1918, "step": 18987, "teacher_loss": 0.173102468252182 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.6048860549926758, "learning_rate": 1.118796310268512e-05, "loss": 0.3652, "step": 18988, "teacher_loss": 0.3385535478591919 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.3029094338417053, "learning_rate": 1.1185766465072262e-05, "loss": 0.2571, "step": 18989, "teacher_loss": 0.2520105838775635 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.26197317242622375, "learning_rate": 1.1183569914908341e-05, "loss": 0.2758, "step": 18990, "teacher_loss": 0.27731871604919434 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.24724102020263672, "learning_rate": 1.1181373452243717e-05, "loss": 0.1365, "step": 18991, "teacher_loss": 0.12416236847639084 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.34574952721595764, "learning_rate": 1.117917707712875e-05, "loss": 0.2275, "step": 18992, "teacher_loss": 0.21431893110275269 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.5497463941574097, "learning_rate": 1.11769807896138e-05, "loss": 0.2628, "step": 18993, "teacher_loss": 0.23092001676559448 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.5370451211929321, "learning_rate": 1.1174784589749218e-05, "loss": 0.2619, "step": 18994, "teacher_loss": 0.23129001259803772 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.46238672733306885, "learning_rate": 1.1172588477585354e-05, "loss": 0.2769, "step": 18995, "teacher_loss": 0.25628677010536194 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.1385607123374939, "learning_rate": 1.1170392453172563e-05, "loss": 0.1382, "step": 18996, "teacher_loss": 0.13811525702476501 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.2120944857597351, "learning_rate": 1.1168196516561196e-05, "loss": 0.2165, "step": 18997, "teacher_loss": 0.2170124500989914 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.4757639467716217, "learning_rate": 1.1166000667801583e-05, "loss": 0.211, "step": 18998, "teacher_loss": 0.18161174654960632 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.4381820261478424, "learning_rate": 1.1163804906944093e-05, "loss": 0.2057, "step": 18999, "teacher_loss": 0.17984804511070251 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.41334158182144165, "learning_rate": 1.1161609234039047e-05, "loss": 0.2713, "step": 19000, "teacher_loss": 0.2555544972419739 }, { "epoch": 3.43, "eval_exact_match": 79.82024597918638, "eval_f1": 87.46981365235351, "step": 19000 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.8471626043319702, "learning_rate": 1.1159413649136793e-05, "loss": 0.4394, "step": 19001, "teacher_loss": 0.39409661293029785 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.38093143701553345, "learning_rate": 1.1157218152287674e-05, "loss": 0.1904, "step": 19002, "teacher_loss": 0.16921091079711914 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.5689165592193604, "learning_rate": 1.1155022743542019e-05, "loss": 0.2834, "step": 19003, "teacher_loss": 0.25163358449935913 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.38006454706192017, "learning_rate": 1.115282742295016e-05, "loss": 0.3021, "step": 19004, "teacher_loss": 0.29344165325164795 }, { "compression_loss": 0.0, "epoch": 3.43, "label_loss": 0.532762885093689, "learning_rate": 1.115063219056244e-05, "loss": 0.3338, "step": 19005, "teacher_loss": 0.31166601181030273 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.49969178438186646, "learning_rate": 1.1148437046429182e-05, "loss": 0.2912, "step": 19006, "teacher_loss": 0.2680322527885437 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.2557436227798462, "learning_rate": 1.1146241990600713e-05, "loss": 0.1811, "step": 19007, "teacher_loss": 0.1728229820728302 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.6715871095657349, "learning_rate": 1.1144047023127361e-05, "loss": 0.3755, "step": 19008, "teacher_loss": 0.3425886631011963 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.8368261456489563, "learning_rate": 1.1141852144059455e-05, "loss": 0.4687, "step": 19009, "teacher_loss": 0.42783254384994507 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.5769778490066528, "learning_rate": 1.113965735344731e-05, "loss": 0.2561, "step": 19010, "teacher_loss": 0.22041678428649902 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.25773853063583374, "learning_rate": 1.113746265134124e-05, "loss": 0.2289, "step": 19011, "teacher_loss": 0.22568972408771515 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.7073142528533936, "learning_rate": 1.1135268037791582e-05, "loss": 0.2422, "step": 19012, "teacher_loss": 0.19047322869300842 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.23363101482391357, "learning_rate": 1.1133073512848635e-05, "loss": 0.2383, "step": 19013, "teacher_loss": 0.2387879192829132 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.18842419981956482, "learning_rate": 1.1130879076562717e-05, "loss": 0.1998, "step": 19014, "teacher_loss": 0.2010757476091385 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.1398010551929474, "learning_rate": 1.1128684728984144e-05, "loss": 0.1573, "step": 19015, "teacher_loss": 0.15925747156143188 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.5451032519340515, "learning_rate": 1.1126490470163224e-05, "loss": 0.3273, "step": 19016, "teacher_loss": 0.3031374216079712 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.16702450811862946, "learning_rate": 1.1124296300150264e-05, "loss": 0.1769, "step": 19017, "teacher_loss": 0.1780245304107666 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.37351179122924805, "learning_rate": 1.112210221899557e-05, "loss": 0.1987, "step": 19018, "teacher_loss": 0.17925170063972473 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.7907315492630005, "learning_rate": 1.1119908226749445e-05, "loss": 0.2356, "step": 19019, "teacher_loss": 0.1739162802696228 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.44762319326400757, "learning_rate": 1.1117714323462188e-05, "loss": 0.2861, "step": 19020, "teacher_loss": 0.2681659460067749 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.234996035695076, "learning_rate": 1.1115520509184105e-05, "loss": 0.2535, "step": 19021, "teacher_loss": 0.25557753443717957 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.17143544554710388, "learning_rate": 1.1113326783965497e-05, "loss": 0.1636, "step": 19022, "teacher_loss": 0.16274486482143402 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.4388921856880188, "learning_rate": 1.111113314785664e-05, "loss": 0.2156, "step": 19023, "teacher_loss": 0.19080013036727905 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.28796571493148804, "learning_rate": 1.110893960090785e-05, "loss": 0.1631, "step": 19024, "teacher_loss": 0.1491818130016327 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.41557204723358154, "learning_rate": 1.1106746143169406e-05, "loss": 0.2497, "step": 19025, "teacher_loss": 0.2313135266304016 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.42646127939224243, "learning_rate": 1.1104552774691598e-05, "loss": 0.2244, "step": 19026, "teacher_loss": 0.2019994854927063 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.22570274770259857, "learning_rate": 1.1102359495524718e-05, "loss": 0.1462, "step": 19027, "teacher_loss": 0.13731706142425537 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.5449619293212891, "learning_rate": 1.110016630571905e-05, "loss": 0.2715, "step": 19028, "teacher_loss": 0.24111050367355347 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.676845908164978, "learning_rate": 1.1097973205324875e-05, "loss": 0.3617, "step": 19029, "teacher_loss": 0.3266303539276123 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.5150049924850464, "learning_rate": 1.1095780194392476e-05, "loss": 0.2362, "step": 19030, "teacher_loss": 0.20525969564914703 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.32100632786750793, "learning_rate": 1.1093587272972132e-05, "loss": 0.171, "step": 19031, "teacher_loss": 0.15431350469589233 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.14585193991661072, "learning_rate": 1.1091394441114122e-05, "loss": 0.1191, "step": 19032, "teacher_loss": 0.11615870893001556 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.4267537593841553, "learning_rate": 1.1089201698868712e-05, "loss": 0.2493, "step": 19033, "teacher_loss": 0.22954407334327698 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.6894004344940186, "learning_rate": 1.108700904628619e-05, "loss": 0.2909, "step": 19034, "teacher_loss": 0.2466016709804535 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.3753123879432678, "learning_rate": 1.1084816483416816e-05, "loss": 0.185, "step": 19035, "teacher_loss": 0.1638604998588562 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.3636763393878937, "learning_rate": 1.1082624010310856e-05, "loss": 0.2218, "step": 19036, "teacher_loss": 0.20608311891555786 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.14077259600162506, "learning_rate": 1.1080431627018588e-05, "loss": 0.2015, "step": 19037, "teacher_loss": 0.20823225378990173 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.15060411393642426, "learning_rate": 1.1078239333590272e-05, "loss": 0.1453, "step": 19038, "teacher_loss": 0.14470574259757996 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.29928404092788696, "learning_rate": 1.1076047130076169e-05, "loss": 0.1936, "step": 19039, "teacher_loss": 0.18181224167346954 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.6164253950119019, "learning_rate": 1.1073855016526539e-05, "loss": 0.2601, "step": 19040, "teacher_loss": 0.22046592831611633 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.477509081363678, "learning_rate": 1.1071662992991644e-05, "loss": 0.2402, "step": 19041, "teacher_loss": 0.21382513642311096 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.6430342793464661, "learning_rate": 1.1069471059521736e-05, "loss": 0.2444, "step": 19042, "teacher_loss": 0.2001233696937561 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.397210955619812, "learning_rate": 1.1067279216167075e-05, "loss": 0.2302, "step": 19043, "teacher_loss": 0.21159601211547852 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.973497748374939, "learning_rate": 1.106508746297791e-05, "loss": 0.3607, "step": 19044, "teacher_loss": 0.29262202978134155 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.4283132553100586, "learning_rate": 1.106289580000449e-05, "loss": 0.3044, "step": 19045, "teacher_loss": 0.29058772325515747 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.2698388695716858, "learning_rate": 1.1060704227297068e-05, "loss": 0.1711, "step": 19046, "teacher_loss": 0.160079687833786 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.3356991112232208, "learning_rate": 1.1058512744905892e-05, "loss": 0.3469, "step": 19047, "teacher_loss": 0.3481284976005554 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.16967880725860596, "learning_rate": 1.105632135288119e-05, "loss": 0.1508, "step": 19048, "teacher_loss": 0.1486629843711853 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.38393399119377136, "learning_rate": 1.1054130051273225e-05, "loss": 0.1983, "step": 19049, "teacher_loss": 0.177656888961792 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.2889882028102875, "learning_rate": 1.1051938840132226e-05, "loss": 0.1523, "step": 19050, "teacher_loss": 0.13713288307189941 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.9815045595169067, "learning_rate": 1.1049747719508431e-05, "loss": 0.9973, "step": 19051, "teacher_loss": 0.9990469217300415 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.3875212073326111, "learning_rate": 1.1047556689452077e-05, "loss": 0.191, "step": 19052, "teacher_loss": 0.1692069172859192 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.3760157525539398, "learning_rate": 1.1045365750013399e-05, "loss": 0.2184, "step": 19053, "teacher_loss": 0.20087511837482452 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.714532732963562, "learning_rate": 1.1043174901242629e-05, "loss": 0.2904, "step": 19054, "teacher_loss": 0.2433064877986908 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.4075005054473877, "learning_rate": 1.104098414318999e-05, "loss": 0.2382, "step": 19055, "teacher_loss": 0.2193678617477417 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.26949581503868103, "learning_rate": 1.1038793475905722e-05, "loss": 0.2626, "step": 19056, "teacher_loss": 0.2618448734283447 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.7603981494903564, "learning_rate": 1.1036602899440042e-05, "loss": 0.2307, "step": 19057, "teacher_loss": 0.17186513543128967 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.22194135189056396, "learning_rate": 1.1034412413843166e-05, "loss": 0.2133, "step": 19058, "teacher_loss": 0.21234898269176483 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.49292635917663574, "learning_rate": 1.1032222019165334e-05, "loss": 0.2762, "step": 19059, "teacher_loss": 0.25212591886520386 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.22617828845977783, "learning_rate": 1.1030031715456753e-05, "loss": 0.2467, "step": 19060, "teacher_loss": 0.24898689985275269 }, { "compression_loss": 0.0, "epoch": 3.44, "label_loss": 0.498049795627594, "learning_rate": 1.1027841502767636e-05, "loss": 0.1907, "step": 19061, "teacher_loss": 0.1565786749124527 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.3925136923789978, "learning_rate": 1.1025651381148212e-05, "loss": 0.1655, "step": 19062, "teacher_loss": 0.14022096991539001 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.4340514540672302, "learning_rate": 1.1023461350648683e-05, "loss": 0.3079, "step": 19063, "teacher_loss": 0.2939187288284302 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.32648319005966187, "learning_rate": 1.1021271411319262e-05, "loss": 0.1963, "step": 19064, "teacher_loss": 0.18180987238883972 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.28082332015037537, "learning_rate": 1.1019081563210158e-05, "loss": 0.2673, "step": 19065, "teacher_loss": 0.26580411195755005 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.5896283984184265, "learning_rate": 1.1016891806371581e-05, "loss": 0.2738, "step": 19066, "teacher_loss": 0.23867081105709076 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.448432594537735, "learning_rate": 1.101470214085373e-05, "loss": 0.2158, "step": 19067, "teacher_loss": 0.18999098241329193 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.48557835817337036, "learning_rate": 1.1012512566706812e-05, "loss": 0.5893, "step": 19068, "teacher_loss": 0.6008109450340271 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.2265733778476715, "learning_rate": 1.1010323083981028e-05, "loss": 0.1714, "step": 19069, "teacher_loss": 0.16522319614887238 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.4067123830318451, "learning_rate": 1.1008133692726569e-05, "loss": 0.252, "step": 19070, "teacher_loss": 0.2347760796546936 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.5183794498443604, "learning_rate": 1.100594439299364e-05, "loss": 0.3212, "step": 19071, "teacher_loss": 0.2993224263191223 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.6018751859664917, "learning_rate": 1.1003755184832435e-05, "loss": 0.2658, "step": 19072, "teacher_loss": 0.22841951251029968 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.3192692697048187, "learning_rate": 1.1001566068293138e-05, "loss": 0.2198, "step": 19073, "teacher_loss": 0.20875096321105957 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.5363529920578003, "learning_rate": 1.0999377043425938e-05, "loss": 0.337, "step": 19074, "teacher_loss": 0.3147992491722107 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.3295190632343292, "learning_rate": 1.0997188110281034e-05, "loss": 0.166, "step": 19075, "teacher_loss": 0.14782433211803436 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.5258363485336304, "learning_rate": 1.0994999268908606e-05, "loss": 0.5233, "step": 19076, "teacher_loss": 0.5230728983879089 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.2074512541294098, "learning_rate": 1.0992810519358833e-05, "loss": 0.1599, "step": 19077, "teacher_loss": 0.15463721752166748 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.7323053479194641, "learning_rate": 1.0990621861681905e-05, "loss": 0.2794, "step": 19078, "teacher_loss": 0.22904568910598755 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.6553351879119873, "learning_rate": 1.0988433295927996e-05, "loss": 0.2806, "step": 19079, "teacher_loss": 0.23897188901901245 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.5344595909118652, "learning_rate": 1.0986244822147279e-05, "loss": 0.1869, "step": 19080, "teacher_loss": 0.1483137607574463 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.6060537695884705, "learning_rate": 1.098405644038994e-05, "loss": 0.2669, "step": 19081, "teacher_loss": 0.22919301688671112 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.20828035473823547, "learning_rate": 1.0981868150706148e-05, "loss": 0.1764, "step": 19082, "teacher_loss": 0.17282292246818542 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.4218991994857788, "learning_rate": 1.0979679953146064e-05, "loss": 0.1965, "step": 19083, "teacher_loss": 0.17145104706287384 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.5270686149597168, "learning_rate": 1.0977491847759874e-05, "loss": 0.2291, "step": 19084, "teacher_loss": 0.19599170982837677 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.45287320017814636, "learning_rate": 1.0975303834597734e-05, "loss": 0.1917, "step": 19085, "teacher_loss": 0.16270305216312408 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.6269698739051819, "learning_rate": 1.0973115913709801e-05, "loss": 0.2743, "step": 19086, "teacher_loss": 0.23513120412826538 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.6099036335945129, "learning_rate": 1.097092808514626e-05, "loss": 0.2866, "step": 19087, "teacher_loss": 0.25071215629577637 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.48839032649993896, "learning_rate": 1.0968740348957252e-05, "loss": 0.338, "step": 19088, "teacher_loss": 0.32131969928741455 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.8912617564201355, "learning_rate": 1.0966552705192941e-05, "loss": 0.2501, "step": 19089, "teacher_loss": 0.17885440587997437 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.6618897318840027, "learning_rate": 1.0964365153903487e-05, "loss": 0.233, "step": 19090, "teacher_loss": 0.18530690670013428 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.20693433284759521, "learning_rate": 1.0962177695139039e-05, "loss": 0.1647, "step": 19091, "teacher_loss": 0.160008043050766 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.5440467000007629, "learning_rate": 1.0959990328949746e-05, "loss": 0.2016, "step": 19092, "teacher_loss": 0.16355106234550476 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.580567479133606, "learning_rate": 1.0957803055385765e-05, "loss": 0.3179, "step": 19093, "teacher_loss": 0.2887495458126068 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.21344566345214844, "learning_rate": 1.0955615874497243e-05, "loss": 0.1541, "step": 19094, "teacher_loss": 0.14745765924453735 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.2570154666900635, "learning_rate": 1.0953428786334326e-05, "loss": 0.2103, "step": 19095, "teacher_loss": 0.20507436990737915 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.5489175915718079, "learning_rate": 1.0951241790947145e-05, "loss": 0.2582, "step": 19096, "teacher_loss": 0.22592632472515106 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.8758103847503662, "learning_rate": 1.0949054888385862e-05, "loss": 0.367, "step": 19097, "teacher_loss": 0.31043192744255066 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.5319894552230835, "learning_rate": 1.0946868078700599e-05, "loss": 0.2324, "step": 19098, "teacher_loss": 0.19914206862449646 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 1.2440495491027832, "learning_rate": 1.0944681361941498e-05, "loss": 0.3193, "step": 19099, "teacher_loss": 0.21660488843917847 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.787350058555603, "learning_rate": 1.0942494738158698e-05, "loss": 0.3602, "step": 19100, "teacher_loss": 0.3127365708351135 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.14992976188659668, "learning_rate": 1.0940308207402327e-05, "loss": 0.2129, "step": 19101, "teacher_loss": 0.21990343928337097 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.48523831367492676, "learning_rate": 1.0938121769722517e-05, "loss": 0.2114, "step": 19102, "teacher_loss": 0.1809995174407959 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.48777472972869873, "learning_rate": 1.0935935425169396e-05, "loss": 0.2081, "step": 19103, "teacher_loss": 0.17698656022548676 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.8260766863822937, "learning_rate": 1.0933749173793094e-05, "loss": 0.267, "step": 19104, "teacher_loss": 0.20483244955539703 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.427012175321579, "learning_rate": 1.0931563015643727e-05, "loss": 0.211, "step": 19105, "teacher_loss": 0.1870269775390625 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.45242053270339966, "learning_rate": 1.0929376950771425e-05, "loss": 0.296, "step": 19106, "teacher_loss": 0.2785981297492981 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.3336101174354553, "learning_rate": 1.092719097922631e-05, "loss": 0.1833, "step": 19107, "teacher_loss": 0.16663047671318054 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.3879373073577881, "learning_rate": 1.0925005101058484e-05, "loss": 0.3601, "step": 19108, "teacher_loss": 0.3569643497467041 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.3112136125564575, "learning_rate": 1.0922819316318086e-05, "loss": 0.1345, "step": 19109, "teacher_loss": 0.11481940746307373 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.367595911026001, "learning_rate": 1.0920633625055213e-05, "loss": 0.2732, "step": 19110, "teacher_loss": 0.2627606987953186 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.24126440286636353, "learning_rate": 1.0918448027319972e-05, "loss": 0.1679, "step": 19111, "teacher_loss": 0.15977492928504944 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.5351781845092773, "learning_rate": 1.0916262523162492e-05, "loss": 0.2342, "step": 19112, "teacher_loss": 0.20072713494300842 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.22612321376800537, "learning_rate": 1.0914077112632864e-05, "loss": 0.2108, "step": 19113, "teacher_loss": 0.2091098427772522 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.27265116572380066, "learning_rate": 1.09118917957812e-05, "loss": 0.1813, "step": 19114, "teacher_loss": 0.1711946427822113 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.507906436920166, "learning_rate": 1.0909706572657594e-05, "loss": 0.2572, "step": 19115, "teacher_loss": 0.22938573360443115 }, { "compression_loss": 0.0, "epoch": 3.45, "label_loss": 0.7333556413650513, "learning_rate": 1.0907521443312158e-05, "loss": 0.2457, "step": 19116, "teacher_loss": 0.19156251847743988 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.6207568049430847, "learning_rate": 1.0905336407794985e-05, "loss": 0.406, "step": 19117, "teacher_loss": 0.3821568489074707 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.3077746331691742, "learning_rate": 1.090315146615617e-05, "loss": 0.2525, "step": 19118, "teacher_loss": 0.2464039921760559 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.26714107394218445, "learning_rate": 1.0900966618445807e-05, "loss": 0.1751, "step": 19119, "teacher_loss": 0.1648259311914444 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.4723213016986847, "learning_rate": 1.0898781864713999e-05, "loss": 0.4401, "step": 19120, "teacher_loss": 0.43646758794784546 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.8825986385345459, "learning_rate": 1.0896597205010815e-05, "loss": 0.5143, "step": 19121, "teacher_loss": 0.4734327793121338 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.4030412435531616, "learning_rate": 1.0894412639386362e-05, "loss": 0.2868, "step": 19122, "teacher_loss": 0.27388477325439453 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.17659726738929749, "learning_rate": 1.0892228167890718e-05, "loss": 0.1779, "step": 19123, "teacher_loss": 0.1780160367488861 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.19377273321151733, "learning_rate": 1.0890043790573961e-05, "loss": 0.2127, "step": 19124, "teacher_loss": 0.21482019126415253 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.45831024646759033, "learning_rate": 1.0887859507486183e-05, "loss": 0.2027, "step": 19125, "teacher_loss": 0.1743345558643341 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.4269407391548157, "learning_rate": 1.0885675318677456e-05, "loss": 0.2013, "step": 19126, "teacher_loss": 0.17623060941696167 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.45461559295654297, "learning_rate": 1.0883491224197856e-05, "loss": 0.245, "step": 19127, "teacher_loss": 0.22169965505599976 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.4056253433227539, "learning_rate": 1.0881307224097463e-05, "loss": 0.2319, "step": 19128, "teacher_loss": 0.21257199347019196 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.2653721570968628, "learning_rate": 1.0879123318426346e-05, "loss": 0.243, "step": 19129, "teacher_loss": 0.24049286544322968 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.4330694079399109, "learning_rate": 1.0876939507234575e-05, "loss": 0.2731, "step": 19130, "teacher_loss": 0.2552933692932129 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.5745980143547058, "learning_rate": 1.0874755790572221e-05, "loss": 0.2519, "step": 19131, "teacher_loss": 0.21599452197551727 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.5256795883178711, "learning_rate": 1.0872572168489353e-05, "loss": 0.1883, "step": 19132, "teacher_loss": 0.15077659487724304 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.4138350784778595, "learning_rate": 1.0870388641036023e-05, "loss": 0.2443, "step": 19133, "teacher_loss": 0.22547096014022827 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.13589030504226685, "learning_rate": 1.0868205208262302e-05, "loss": 0.1576, "step": 19134, "teacher_loss": 0.16004526615142822 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.5820252895355225, "learning_rate": 1.0866021870218253e-05, "loss": 0.2479, "step": 19135, "teacher_loss": 0.21080715954303741 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.9297947883605957, "learning_rate": 1.0863838626953924e-05, "loss": 0.5392, "step": 19136, "teacher_loss": 0.4958457946777344 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.3560451567173004, "learning_rate": 1.0861655478519375e-05, "loss": 0.2492, "step": 19137, "teacher_loss": 0.2373504936695099 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.3579670488834381, "learning_rate": 1.0859472424964658e-05, "loss": 0.2291, "step": 19138, "teacher_loss": 0.21473124623298645 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.2981533408164978, "learning_rate": 1.0857289466339825e-05, "loss": 0.1637, "step": 19139, "teacher_loss": 0.14874543249607086 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.3355322480201721, "learning_rate": 1.0855106602694922e-05, "loss": 0.1803, "step": 19140, "teacher_loss": 0.16306746006011963 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.5337064266204834, "learning_rate": 1.085292383408e-05, "loss": 0.2835, "step": 19141, "teacher_loss": 0.2556573152542114 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.3092346787452698, "learning_rate": 1.0850741160545102e-05, "loss": 0.1575, "step": 19142, "teacher_loss": 0.1406756341457367 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.8014358878135681, "learning_rate": 1.0848558582140264e-05, "loss": 0.2952, "step": 19143, "teacher_loss": 0.23898519575595856 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.29980674386024475, "learning_rate": 1.0846376098915536e-05, "loss": 0.2679, "step": 19144, "teacher_loss": 0.26433664560317993 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.3592222034931183, "learning_rate": 1.0844193710920952e-05, "loss": 0.2944, "step": 19145, "teacher_loss": 0.28720492124557495 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.6348129510879517, "learning_rate": 1.0842011418206538e-05, "loss": 0.4556, "step": 19146, "teacher_loss": 0.43573644757270813 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.598501443862915, "learning_rate": 1.0839829220822347e-05, "loss": 0.2475, "step": 19147, "teacher_loss": 0.2084900289773941 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.546212911605835, "learning_rate": 1.0837647118818393e-05, "loss": 0.3756, "step": 19148, "teacher_loss": 0.3565971851348877 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.478501558303833, "learning_rate": 1.0835465112244709e-05, "loss": 0.3856, "step": 19149, "teacher_loss": 0.37531256675720215 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.45155566930770874, "learning_rate": 1.0833283201151328e-05, "loss": 0.3665, "step": 19150, "teacher_loss": 0.3570408225059509 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.163311168551445, "learning_rate": 1.0831101385588271e-05, "loss": 0.1857, "step": 19151, "teacher_loss": 0.18814617395401 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.3137606382369995, "learning_rate": 1.0828919665605557e-05, "loss": 0.1885, "step": 19152, "teacher_loss": 0.17458641529083252 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.37643373012542725, "learning_rate": 1.0826738041253211e-05, "loss": 0.2209, "step": 19153, "teacher_loss": 0.20356819033622742 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.8409779071807861, "learning_rate": 1.0824556512581252e-05, "loss": 0.3443, "step": 19154, "teacher_loss": 0.28911924362182617 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.8939721584320068, "learning_rate": 1.0822375079639688e-05, "loss": 0.2987, "step": 19155, "teacher_loss": 0.23253269493579865 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.2531440854072571, "learning_rate": 1.0820193742478542e-05, "loss": 0.1753, "step": 19156, "teacher_loss": 0.1666060984134674 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.5972980856895447, "learning_rate": 1.0818012501147824e-05, "loss": 0.2334, "step": 19157, "teacher_loss": 0.19298431277275085 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.3534064292907715, "learning_rate": 1.0815831355697541e-05, "loss": 0.2343, "step": 19158, "teacher_loss": 0.2210364192724228 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.34636884927749634, "learning_rate": 1.0813650306177693e-05, "loss": 0.2036, "step": 19159, "teacher_loss": 0.18772061169147491 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.32011911273002625, "learning_rate": 1.08114693526383e-05, "loss": 0.1816, "step": 19160, "teacher_loss": 0.16624769568443298 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.260425329208374, "learning_rate": 1.0809288495129354e-05, "loss": 0.2165, "step": 19161, "teacher_loss": 0.21158911287784576 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.5655578374862671, "learning_rate": 1.0807107733700856e-05, "loss": 0.2962, "step": 19162, "teacher_loss": 0.2663109004497528 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.18860939145088196, "learning_rate": 1.080492706840281e-05, "loss": 0.161, "step": 19163, "teacher_loss": 0.15794195234775543 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.12444545328617096, "learning_rate": 1.080274649928521e-05, "loss": 0.1328, "step": 19164, "teacher_loss": 0.13371118903160095 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.4685020446777344, "learning_rate": 1.0800566026398044e-05, "loss": 0.2251, "step": 19165, "teacher_loss": 0.1981002688407898 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.371686190366745, "learning_rate": 1.079838564979131e-05, "loss": 0.228, "step": 19166, "teacher_loss": 0.21199540793895721 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.17105284333229065, "learning_rate": 1.0796205369515e-05, "loss": 0.1771, "step": 19167, "teacher_loss": 0.17781619727611542 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.2694578766822815, "learning_rate": 1.079402518561909e-05, "loss": 0.2008, "step": 19168, "teacher_loss": 0.19312043488025665 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.42173853516578674, "learning_rate": 1.0791845098153577e-05, "loss": 0.2454, "step": 19169, "teacher_loss": 0.2257966250181198 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.32141679525375366, "learning_rate": 1.0789665107168445e-05, "loss": 0.2061, "step": 19170, "teacher_loss": 0.19332163035869598 }, { "compression_loss": 0.0, "epoch": 3.46, "label_loss": 0.5475047826766968, "learning_rate": 1.0787485212713656e-05, "loss": 0.2396, "step": 19171, "teacher_loss": 0.20534925162792206 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.3117268681526184, "learning_rate": 1.0785305414839213e-05, "loss": 0.1858, "step": 19172, "teacher_loss": 0.17180940508842468 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.14585213363170624, "learning_rate": 1.0783125713595075e-05, "loss": 0.2007, "step": 19173, "teacher_loss": 0.20678508281707764 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.5162680149078369, "learning_rate": 1.078094610903122e-05, "loss": 0.3348, "step": 19174, "teacher_loss": 0.31466004252433777 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.37309640645980835, "learning_rate": 1.0778766601197624e-05, "loss": 0.2163, "step": 19175, "teacher_loss": 0.1989280879497528 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.440687894821167, "learning_rate": 1.0776587190144254e-05, "loss": 0.2074, "step": 19176, "teacher_loss": 0.18144112825393677 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.24271106719970703, "learning_rate": 1.0774407875921078e-05, "loss": 0.1836, "step": 19177, "teacher_loss": 0.17701321840286255 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.27369099855422974, "learning_rate": 1.0772228658578057e-05, "loss": 0.2277, "step": 19178, "teacher_loss": 0.22257909178733826 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.2757619023323059, "learning_rate": 1.077004953816516e-05, "loss": 0.1951, "step": 19179, "teacher_loss": 0.18611925840377808 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.6268560886383057, "learning_rate": 1.076787051473235e-05, "loss": 0.3945, "step": 19180, "teacher_loss": 0.3687174916267395 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.7088404893875122, "learning_rate": 1.0765691588329567e-05, "loss": 0.3506, "step": 19181, "teacher_loss": 0.3107469081878662 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.1919446736574173, "learning_rate": 1.0763512759006792e-05, "loss": 0.1938, "step": 19182, "teacher_loss": 0.19402126967906952 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.5411832332611084, "learning_rate": 1.0761334026813966e-05, "loss": 0.2122, "step": 19183, "teacher_loss": 0.17563802003860474 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.36685672402381897, "learning_rate": 1.0759155391801035e-05, "loss": 0.2956, "step": 19184, "teacher_loss": 0.28765660524368286 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.3054330348968506, "learning_rate": 1.0756976854017967e-05, "loss": 0.1888, "step": 19185, "teacher_loss": 0.17584654688835144 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.33647847175598145, "learning_rate": 1.0754798413514694e-05, "loss": 0.1706, "step": 19186, "teacher_loss": 0.15215066075325012 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.5734248757362366, "learning_rate": 1.0752620070341162e-05, "loss": 0.2574, "step": 19187, "teacher_loss": 0.22229892015457153 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.2776779234409332, "learning_rate": 1.0750441824547323e-05, "loss": 0.2341, "step": 19188, "teacher_loss": 0.2292025089263916 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.7565782070159912, "learning_rate": 1.0748263676183109e-05, "loss": 0.2633, "step": 19189, "teacher_loss": 0.2085166573524475 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.21718642115592957, "learning_rate": 1.074608562529846e-05, "loss": 0.221, "step": 19190, "teacher_loss": 0.22139029204845428 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.14208725094795227, "learning_rate": 1.0743907671943317e-05, "loss": 0.119, "step": 19191, "teacher_loss": 0.1164025217294693 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.2378544807434082, "learning_rate": 1.0741729816167612e-05, "loss": 0.169, "step": 19192, "teacher_loss": 0.1613958775997162 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.7307143807411194, "learning_rate": 1.073955205802127e-05, "loss": 0.4817, "step": 19193, "teacher_loss": 0.4540741443634033 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.8807600736618042, "learning_rate": 1.073737439755423e-05, "loss": 0.2908, "step": 19194, "teacher_loss": 0.2252986878156662 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.3066364526748657, "learning_rate": 1.0735196834816419e-05, "loss": 0.2916, "step": 19195, "teacher_loss": 0.2899734675884247 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.37363147735595703, "learning_rate": 1.0733019369857747e-05, "loss": 0.2394, "step": 19196, "teacher_loss": 0.2245309203863144 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.5435505509376526, "learning_rate": 1.0730842002728158e-05, "loss": 0.2446, "step": 19197, "teacher_loss": 0.21135690808296204 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.5123671293258667, "learning_rate": 1.0728664733477558e-05, "loss": 0.291, "step": 19198, "teacher_loss": 0.266435444355011 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.9729100465774536, "learning_rate": 1.072648756215587e-05, "loss": 0.3255, "step": 19199, "teacher_loss": 0.2535231411457062 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.3581695854663849, "learning_rate": 1.0724310488813008e-05, "loss": 0.2028, "step": 19200, "teacher_loss": 0.18550726771354675 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.5466798543930054, "learning_rate": 1.0722133513498888e-05, "loss": 0.3936, "step": 19201, "teacher_loss": 0.3765909671783447 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.5606138706207275, "learning_rate": 1.0719956636263424e-05, "loss": 0.3024, "step": 19202, "teacher_loss": 0.27369555830955505 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.3890919089317322, "learning_rate": 1.0717779857156516e-05, "loss": 0.2201, "step": 19203, "teacher_loss": 0.20130646228790283 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.86761873960495, "learning_rate": 1.071560317622808e-05, "loss": 0.5903, "step": 19204, "teacher_loss": 0.5594816207885742 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.4426562786102295, "learning_rate": 1.0713426593528023e-05, "loss": 0.2194, "step": 19205, "teacher_loss": 0.19463102519512177 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.21859760582447052, "learning_rate": 1.0711250109106233e-05, "loss": 0.2037, "step": 19206, "teacher_loss": 0.2020566463470459 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.5120850205421448, "learning_rate": 1.0709073723012628e-05, "loss": 0.2194, "step": 19207, "teacher_loss": 0.18693125247955322 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.39459556341171265, "learning_rate": 1.0706897435297097e-05, "loss": 0.3121, "step": 19208, "teacher_loss": 0.30298829078674316 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.708952009677887, "learning_rate": 1.0704721246009527e-05, "loss": 0.2542, "step": 19209, "teacher_loss": 0.20363706350326538 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.13194632530212402, "learning_rate": 1.070254515519983e-05, "loss": 0.1621, "step": 19210, "teacher_loss": 0.16545280814170837 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.22494731843471527, "learning_rate": 1.0700369162917885e-05, "loss": 0.1636, "step": 19211, "teacher_loss": 0.1567291021347046 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.4305468797683716, "learning_rate": 1.0698193269213584e-05, "loss": 0.2041, "step": 19212, "teacher_loss": 0.17893174290657043 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.6655075550079346, "learning_rate": 1.0696017474136815e-05, "loss": 0.2289, "step": 19213, "teacher_loss": 0.18036577105522156 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.48842379450798035, "learning_rate": 1.0693841777737463e-05, "loss": 0.2826, "step": 19214, "teacher_loss": 0.259742796421051 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.5211811065673828, "learning_rate": 1.0691666180065403e-05, "loss": 0.2538, "step": 19215, "teacher_loss": 0.22405563294887543 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.5416889190673828, "learning_rate": 1.0689490681170524e-05, "loss": 0.1972, "step": 19216, "teacher_loss": 0.15895453095436096 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.4713010787963867, "learning_rate": 1.0687315281102701e-05, "loss": 0.278, "step": 19217, "teacher_loss": 0.25648993253707886 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.433030366897583, "learning_rate": 1.068513997991181e-05, "loss": 0.3604, "step": 19218, "teacher_loss": 0.35235482454299927 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.33772972226142883, "learning_rate": 1.0682964777647716e-05, "loss": 0.2082, "step": 19219, "teacher_loss": 0.19380691647529602 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.29328209161758423, "learning_rate": 1.0680789674360305e-05, "loss": 0.1914, "step": 19220, "teacher_loss": 0.18006017804145813 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.2782125174999237, "learning_rate": 1.0678614670099433e-05, "loss": 0.2892, "step": 19221, "teacher_loss": 0.29039037227630615 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.8001796007156372, "learning_rate": 1.0676439764914968e-05, "loss": 0.2811, "step": 19222, "teacher_loss": 0.22346360981464386 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.1304096281528473, "learning_rate": 1.0674264958856779e-05, "loss": 0.1769, "step": 19223, "teacher_loss": 0.1820920705795288 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.48276597261428833, "learning_rate": 1.0672090251974728e-05, "loss": 0.2425, "step": 19224, "teacher_loss": 0.2157881259918213 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.4614584445953369, "learning_rate": 1.0669915644318664e-05, "loss": 0.2024, "step": 19225, "teacher_loss": 0.17364266514778137 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.3686111569404602, "learning_rate": 1.0667741135938458e-05, "loss": 0.2103, "step": 19226, "teacher_loss": 0.192699134349823 }, { "compression_loss": 0.0, "epoch": 3.47, "label_loss": 0.4633331298828125, "learning_rate": 1.066556672688396e-05, "loss": 0.2645, "step": 19227, "teacher_loss": 0.2424132525920868 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.3806978464126587, "learning_rate": 1.0663392417205016e-05, "loss": 0.3477, "step": 19228, "teacher_loss": 0.3440232276916504 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.24845215678215027, "learning_rate": 1.0661218206951486e-05, "loss": 0.1902, "step": 19229, "teacher_loss": 0.1837640255689621 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.3281462788581848, "learning_rate": 1.0659044096173218e-05, "loss": 0.1715, "step": 19230, "teacher_loss": 0.15404045581817627 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.5688304901123047, "learning_rate": 1.0656870084920043e-05, "loss": 0.2116, "step": 19231, "teacher_loss": 0.17193885147571564 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.44448840618133545, "learning_rate": 1.0654696173241826e-05, "loss": 0.2892, "step": 19232, "teacher_loss": 0.2719712257385254 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.5308183431625366, "learning_rate": 1.0652522361188395e-05, "loss": 0.2493, "step": 19233, "teacher_loss": 0.21802425384521484 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.587203323841095, "learning_rate": 1.0650348648809585e-05, "loss": 0.2264, "step": 19234, "teacher_loss": 0.18635180592536926 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.2512023448944092, "learning_rate": 1.064817503615525e-05, "loss": 0.1918, "step": 19235, "teacher_loss": 0.1852271556854248 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.7842098474502563, "learning_rate": 1.064600152327521e-05, "loss": 0.2647, "step": 19236, "teacher_loss": 0.20699161291122437 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.5155953168869019, "learning_rate": 1.0643828110219298e-05, "loss": 0.2568, "step": 19237, "teacher_loss": 0.22805780172348022 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.7367535829544067, "learning_rate": 1.0641654797037349e-05, "loss": 0.2614, "step": 19238, "teacher_loss": 0.20855720341205597 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.3689441382884979, "learning_rate": 1.0639481583779192e-05, "loss": 0.2306, "step": 19239, "teacher_loss": 0.21524935960769653 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.3272366523742676, "learning_rate": 1.0637308470494646e-05, "loss": 0.1857, "step": 19240, "teacher_loss": 0.16994619369506836 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.9207844734191895, "learning_rate": 1.0635135457233533e-05, "loss": 0.257, "step": 19241, "teacher_loss": 0.1832602620124817 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.3810318112373352, "learning_rate": 1.0632962544045682e-05, "loss": 0.1797, "step": 19242, "teacher_loss": 0.15732485055923462 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.2114868313074112, "learning_rate": 1.0630789730980909e-05, "loss": 0.177, "step": 19243, "teacher_loss": 0.17320480942726135 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.264034241437912, "learning_rate": 1.0628617018089019e-05, "loss": 0.2025, "step": 19244, "teacher_loss": 0.19565868377685547 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.633360743522644, "learning_rate": 1.0626444405419844e-05, "loss": 0.2222, "step": 19245, "teacher_loss": 0.1764664500951767 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.2613506317138672, "learning_rate": 1.0624271893023184e-05, "loss": 0.1897, "step": 19246, "teacher_loss": 0.18168683350086212 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.8338223695755005, "learning_rate": 1.0622099480948845e-05, "loss": 0.3309, "step": 19247, "teacher_loss": 0.2750103175640106 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.4363275468349457, "learning_rate": 1.0619927169246644e-05, "loss": 0.1909, "step": 19248, "teacher_loss": 0.16368280351161957 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.5929557681083679, "learning_rate": 1.0617754957966382e-05, "loss": 0.2242, "step": 19249, "teacher_loss": 0.18324080109596252 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.29465240240097046, "learning_rate": 1.0615582847157854e-05, "loss": 0.2511, "step": 19250, "teacher_loss": 0.24624481797218323 }, { "epoch": 3.48, "eval_exact_match": 79.98107852412488, "eval_f1": 87.28192007756861, "step": 19250 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.46034079790115356, "learning_rate": 1.0613410836870873e-05, "loss": 0.3851, "step": 19251, "teacher_loss": 0.37673109769821167 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.8422898054122925, "learning_rate": 1.0611238927155229e-05, "loss": 0.4861, "step": 19252, "teacher_loss": 0.44649890065193176 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.40830233693122864, "learning_rate": 1.0609067118060712e-05, "loss": 0.2393, "step": 19253, "teacher_loss": 0.22055616974830627 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.40980762243270874, "learning_rate": 1.0606895409637127e-05, "loss": 0.1835, "step": 19254, "teacher_loss": 0.15840867161750793 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.21113336086273193, "learning_rate": 1.0604723801934262e-05, "loss": 0.1898, "step": 19255, "teacher_loss": 0.18739478290081024 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 1.049504041671753, "learning_rate": 1.060255229500189e-05, "loss": 0.4396, "step": 19256, "teacher_loss": 0.3717900514602661 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.5374554991722107, "learning_rate": 1.060038088888982e-05, "loss": 0.2123, "step": 19257, "teacher_loss": 0.17618578672409058 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.23970146477222443, "learning_rate": 1.0598209583647828e-05, "loss": 0.2371, "step": 19258, "teacher_loss": 0.23686037957668304 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.5487461090087891, "learning_rate": 1.0596038379325683e-05, "loss": 0.2111, "step": 19259, "teacher_loss": 0.1736009120941162 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.3135746717453003, "learning_rate": 1.0593867275973184e-05, "loss": 0.2044, "step": 19260, "teacher_loss": 0.19228631258010864 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.1259211152791977, "learning_rate": 1.0591696273640094e-05, "loss": 0.1535, "step": 19261, "teacher_loss": 0.15651443600654602 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 1.0688755512237549, "learning_rate": 1.058952537237619e-05, "loss": 0.3404, "step": 19262, "teacher_loss": 0.2594517469406128 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.05794157832860947, "learning_rate": 1.0587354572231246e-05, "loss": 0.1509, "step": 19263, "teacher_loss": 0.1612202525138855 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 1.153679370880127, "learning_rate": 1.0585183873255032e-05, "loss": 0.2392, "step": 19264, "teacher_loss": 0.13755811750888824 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.3633022904396057, "learning_rate": 1.0583013275497318e-05, "loss": 0.1657, "step": 19265, "teacher_loss": 0.14370760321617126 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.38161903619766235, "learning_rate": 1.0580842779007862e-05, "loss": 0.2243, "step": 19266, "teacher_loss": 0.2068396657705307 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.5384604930877686, "learning_rate": 1.0578672383836437e-05, "loss": 0.1892, "step": 19267, "teacher_loss": 0.15040919184684753 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.43093258142471313, "learning_rate": 1.05765020900328e-05, "loss": 0.1647, "step": 19268, "teacher_loss": 0.13507144153118134 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.38681650161743164, "learning_rate": 1.05743318976467e-05, "loss": 0.2567, "step": 19269, "teacher_loss": 0.24221184849739075 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.27728545665740967, "learning_rate": 1.0572161806727908e-05, "loss": 0.1872, "step": 19270, "teacher_loss": 0.1772192269563675 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.7905272841453552, "learning_rate": 1.0569991817326166e-05, "loss": 0.8354, "step": 19271, "teacher_loss": 0.8404158353805542 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.4105336666107178, "learning_rate": 1.056782192949123e-05, "loss": 0.2087, "step": 19272, "teacher_loss": 0.18627750873565674 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.1907138228416443, "learning_rate": 1.0565652143272851e-05, "loss": 0.2057, "step": 19273, "teacher_loss": 0.20741309225559235 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.3579472303390503, "learning_rate": 1.0563482458720773e-05, "loss": 0.2178, "step": 19274, "teacher_loss": 0.20220181345939636 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.42955437302589417, "learning_rate": 1.0561312875884739e-05, "loss": 0.3227, "step": 19275, "teacher_loss": 0.3108643591403961 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.7240515947341919, "learning_rate": 1.0559143394814494e-05, "loss": 0.2373, "step": 19276, "teacher_loss": 0.1831989735364914 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.451634019613266, "learning_rate": 1.0556974015559776e-05, "loss": 0.3309, "step": 19277, "teacher_loss": 0.31752827763557434 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.44515055418014526, "learning_rate": 1.0554804738170322e-05, "loss": 0.2004, "step": 19278, "teacher_loss": 0.17319729924201965 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.26225972175598145, "learning_rate": 1.0552635562695871e-05, "loss": 0.1444, "step": 19279, "teacher_loss": 0.13127672672271729 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.3869149386882782, "learning_rate": 1.0550466489186156e-05, "loss": 0.2881, "step": 19280, "teacher_loss": 0.27712059020996094 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.3492221236228943, "learning_rate": 1.05482975176909e-05, "loss": 0.2403, "step": 19281, "teacher_loss": 0.2282455414533615 }, { "compression_loss": 0.0, "epoch": 3.48, "label_loss": 0.4371429681777954, "learning_rate": 1.0546128648259828e-05, "loss": 0.2738, "step": 19282, "teacher_loss": 0.25567546486854553 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.48671260476112366, "learning_rate": 1.0543959880942684e-05, "loss": 0.2447, "step": 19283, "teacher_loss": 0.21780069172382355 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.22923651337623596, "learning_rate": 1.0541791215789175e-05, "loss": 0.1583, "step": 19284, "teacher_loss": 0.15043804049491882 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.5727065801620483, "learning_rate": 1.0539622652849026e-05, "loss": 0.4167, "step": 19285, "teacher_loss": 0.3993412256240845 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.8295423984527588, "learning_rate": 1.0537454192171958e-05, "loss": 0.2688, "step": 19286, "teacher_loss": 0.20652857422828674 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.2851209044456482, "learning_rate": 1.0535285833807684e-05, "loss": 0.1911, "step": 19287, "teacher_loss": 0.1806366890668869 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.2591969966888428, "learning_rate": 1.0533117577805921e-05, "loss": 0.21, "step": 19288, "teacher_loss": 0.20455129444599152 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.20610500872135162, "learning_rate": 1.0530949424216382e-05, "loss": 0.1519, "step": 19289, "teacher_loss": 0.145859032869339 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.5352863073348999, "learning_rate": 1.0528781373088772e-05, "loss": 0.2666, "step": 19290, "teacher_loss": 0.23679813742637634 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.39734768867492676, "learning_rate": 1.0526613424472797e-05, "loss": 0.1678, "step": 19291, "teacher_loss": 0.14229996502399445 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.31339430809020996, "learning_rate": 1.0524445578418168e-05, "loss": 0.3013, "step": 19292, "teacher_loss": 0.2999735176563263 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.2556731402873993, "learning_rate": 1.0522277834974586e-05, "loss": 0.2163, "step": 19293, "teacher_loss": 0.2119034081697464 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.4354918599128723, "learning_rate": 1.052011019419174e-05, "loss": 0.2511, "step": 19294, "teacher_loss": 0.23063281178474426 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.22145043313503265, "learning_rate": 1.0517942656119344e-05, "loss": 0.2039, "step": 19295, "teacher_loss": 0.20190554857254028 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.5245985984802246, "learning_rate": 1.0515775220807083e-05, "loss": 0.2546, "step": 19296, "teacher_loss": 0.22458285093307495 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.29969215393066406, "learning_rate": 1.0513607888304646e-05, "loss": 0.2778, "step": 19297, "teacher_loss": 0.2753612995147705 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.13588854670524597, "learning_rate": 1.0511440658661736e-05, "loss": 0.1707, "step": 19298, "teacher_loss": 0.1745852828025818 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.17867204546928406, "learning_rate": 1.0509273531928031e-05, "loss": 0.1957, "step": 19299, "teacher_loss": 0.1975673884153366 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.5238090753555298, "learning_rate": 1.050710650815322e-05, "loss": 0.2905, "step": 19300, "teacher_loss": 0.26462483406066895 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.520237922668457, "learning_rate": 1.0504939587386986e-05, "loss": 0.2013, "step": 19301, "teacher_loss": 0.16584312915802002 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.5799924731254578, "learning_rate": 1.0502772769679014e-05, "loss": 0.2181, "step": 19302, "teacher_loss": 0.17792558670043945 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.5792575478553772, "learning_rate": 1.050060605507898e-05, "loss": 0.3452, "step": 19303, "teacher_loss": 0.3191969394683838 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.1431734263896942, "learning_rate": 1.049843944363655e-05, "loss": 0.1469, "step": 19304, "teacher_loss": 0.1472712606191635 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.40714800357818604, "learning_rate": 1.0496272935401416e-05, "loss": 0.2605, "step": 19305, "teacher_loss": 0.24423368275165558 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.3165675401687622, "learning_rate": 1.049410653042324e-05, "loss": 0.2296, "step": 19306, "teacher_loss": 0.2199425846338272 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.8372650146484375, "learning_rate": 1.0491940228751683e-05, "loss": 0.2786, "step": 19307, "teacher_loss": 0.21653325855731964 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.20374202728271484, "learning_rate": 1.0489774030436433e-05, "loss": 0.2019, "step": 19308, "teacher_loss": 0.20172226428985596 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.3260893225669861, "learning_rate": 1.0487607935527136e-05, "loss": 0.2325, "step": 19309, "teacher_loss": 0.2220931053161621 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.3871397376060486, "learning_rate": 1.0485441944073458e-05, "loss": 0.2053, "step": 19310, "teacher_loss": 0.18509185314178467 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.21755719184875488, "learning_rate": 1.0483276056125063e-05, "loss": 0.1567, "step": 19311, "teacher_loss": 0.14996322989463806 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.25184404850006104, "learning_rate": 1.048111027173161e-05, "loss": 0.2082, "step": 19312, "teacher_loss": 0.2033727467060089 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.36648911237716675, "learning_rate": 1.0478944590942745e-05, "loss": 0.2385, "step": 19313, "teacher_loss": 0.22424963116645813 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.6335453987121582, "learning_rate": 1.0476779013808126e-05, "loss": 0.2557, "step": 19314, "teacher_loss": 0.21370047330856323 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.3886480927467346, "learning_rate": 1.0474613540377405e-05, "loss": 0.1956, "step": 19315, "teacher_loss": 0.17412322759628296 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.3643404245376587, "learning_rate": 1.0472448170700227e-05, "loss": 0.1942, "step": 19316, "teacher_loss": 0.1753401756286621 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.2642322778701782, "learning_rate": 1.0470282904826238e-05, "loss": 0.1774, "step": 19317, "teacher_loss": 0.16774627566337585 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.3977299630641937, "learning_rate": 1.0468117742805086e-05, "loss": 0.2492, "step": 19318, "teacher_loss": 0.23269028961658478 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.45396262407302856, "learning_rate": 1.0465952684686396e-05, "loss": 0.3028, "step": 19319, "teacher_loss": 0.2860533893108368 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.21616527438163757, "learning_rate": 1.0463787730519829e-05, "loss": 0.1515, "step": 19320, "teacher_loss": 0.14435552060604095 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.49721047282218933, "learning_rate": 1.0461622880355003e-05, "loss": 0.2598, "step": 19321, "teacher_loss": 0.23338137567043304 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.4627741575241089, "learning_rate": 1.0459458134241558e-05, "loss": 0.2447, "step": 19322, "teacher_loss": 0.2204170823097229 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.5647050738334656, "learning_rate": 1.0457293492229124e-05, "loss": 0.4037, "step": 19323, "teacher_loss": 0.38577407598495483 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.5595830678939819, "learning_rate": 1.0455128954367332e-05, "loss": 0.1945, "step": 19324, "teacher_loss": 0.15398472547531128 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.5528039932250977, "learning_rate": 1.0452964520705808e-05, "loss": 0.2231, "step": 19325, "teacher_loss": 0.1864643096923828 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.6555163860321045, "learning_rate": 1.0450800191294171e-05, "loss": 0.2811, "step": 19326, "teacher_loss": 0.23948630690574646 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.2831399440765381, "learning_rate": 1.0448635966182049e-05, "loss": 0.2835, "step": 19327, "teacher_loss": 0.283489853143692 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.7469034194946289, "learning_rate": 1.0446471845419063e-05, "loss": 0.2278, "step": 19328, "teacher_loss": 0.17009249329566956 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.3104090690612793, "learning_rate": 1.0444307829054814e-05, "loss": 0.1969, "step": 19329, "teacher_loss": 0.18427559733390808 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.6383261680603027, "learning_rate": 1.0442143917138939e-05, "loss": 0.3688, "step": 19330, "teacher_loss": 0.33880770206451416 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.24821141362190247, "learning_rate": 1.0439980109721035e-05, "loss": 0.1548, "step": 19331, "teacher_loss": 0.1444035768508911 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.34593719244003296, "learning_rate": 1.0437816406850706e-05, "loss": 0.1571, "step": 19332, "teacher_loss": 0.13606838881969452 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.4385605454444885, "learning_rate": 1.0435652808577583e-05, "loss": 0.2252, "step": 19333, "teacher_loss": 0.2015310376882553 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.3425580859184265, "learning_rate": 1.043348931495125e-05, "loss": 0.1793, "step": 19334, "teacher_loss": 0.16111284494400024 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.33186042308807373, "learning_rate": 1.0431325926021315e-05, "loss": 0.2403, "step": 19335, "teacher_loss": 0.23012122511863708 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.6210047006607056, "learning_rate": 1.0429162641837382e-05, "loss": 0.7844, "step": 19336, "teacher_loss": 0.8025637865066528 }, { "compression_loss": 0.0, "epoch": 3.49, "label_loss": 0.5344265103340149, "learning_rate": 1.0426999462449045e-05, "loss": 0.218, "step": 19337, "teacher_loss": 0.18279507756233215 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.3936270773410797, "learning_rate": 1.0424836387905895e-05, "loss": 0.1728, "step": 19338, "teacher_loss": 0.14822138845920563 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.2597653865814209, "learning_rate": 1.0422673418257536e-05, "loss": 0.19, "step": 19339, "teacher_loss": 0.18227365612983704 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.3911869525909424, "learning_rate": 1.042051055355355e-05, "loss": 0.1489, "step": 19340, "teacher_loss": 0.12196915596723557 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.4995865225791931, "learning_rate": 1.0418347793843524e-05, "loss": 0.2675, "step": 19341, "teacher_loss": 0.24170859158039093 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.9081000089645386, "learning_rate": 1.0416185139177048e-05, "loss": 0.501, "step": 19342, "teacher_loss": 0.45581477880477905 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.2463749349117279, "learning_rate": 1.041402258960371e-05, "loss": 0.1725, "step": 19343, "teacher_loss": 0.16426461935043335 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.4831945300102234, "learning_rate": 1.041186014517308e-05, "loss": 0.2348, "step": 19344, "teacher_loss": 0.20718804001808167 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.23190772533416748, "learning_rate": 1.0409697805934737e-05, "loss": 0.2275, "step": 19345, "teacher_loss": 0.22698181867599487 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.34947869181632996, "learning_rate": 1.0407535571938265e-05, "loss": 0.2124, "step": 19346, "teacher_loss": 0.197207510471344 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5225273370742798, "learning_rate": 1.0405373443233234e-05, "loss": 0.2165, "step": 19347, "teacher_loss": 0.18245337903499603 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.2556726932525635, "learning_rate": 1.040321141986921e-05, "loss": 0.1364, "step": 19348, "teacher_loss": 0.12319750338792801 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.6717689037322998, "learning_rate": 1.040104950189577e-05, "loss": 0.3268, "step": 19349, "teacher_loss": 0.28844213485717773 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.6527443528175354, "learning_rate": 1.0398887689362478e-05, "loss": 0.3123, "step": 19350, "teacher_loss": 0.2744565010070801 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.809870183467865, "learning_rate": 1.039672598231889e-05, "loss": 0.2892, "step": 19351, "teacher_loss": 0.23138704895973206 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5554569959640503, "learning_rate": 1.0394564380814578e-05, "loss": 0.2531, "step": 19352, "teacher_loss": 0.21948108077049255 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.11442992091178894, "learning_rate": 1.0392402884899102e-05, "loss": 0.14, "step": 19353, "teacher_loss": 0.1428043395280838 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.2875497341156006, "learning_rate": 1.0390241494622003e-05, "loss": 0.2001, "step": 19354, "teacher_loss": 0.19038638472557068 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.717458963394165, "learning_rate": 1.0388080210032856e-05, "loss": 0.2672, "step": 19355, "teacher_loss": 0.2171338051557541 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.2764398455619812, "learning_rate": 1.0385919031181199e-05, "loss": 0.1975, "step": 19356, "teacher_loss": 0.1886882185935974 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5169720649719238, "learning_rate": 1.0383757958116576e-05, "loss": 0.189, "step": 19357, "teacher_loss": 0.15260049700737 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.37131014466285706, "learning_rate": 1.0381596990888557e-05, "loss": 0.1948, "step": 19358, "teacher_loss": 0.17515847086906433 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.33470162749290466, "learning_rate": 1.0379436129546667e-05, "loss": 0.2349, "step": 19359, "teacher_loss": 0.22376102209091187 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.4096849262714386, "learning_rate": 1.0377275374140448e-05, "loss": 0.1906, "step": 19360, "teacher_loss": 0.16623128950595856 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.4260618984699249, "learning_rate": 1.0375114724719452e-05, "loss": 0.2383, "step": 19361, "teacher_loss": 0.21742624044418335 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5361185073852539, "learning_rate": 1.0372954181333206e-05, "loss": 0.2323, "step": 19362, "teacher_loss": 0.19852572679519653 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5701732635498047, "learning_rate": 1.0370793744031245e-05, "loss": 0.2734, "step": 19363, "teacher_loss": 0.2403847724199295 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.3092060685157776, "learning_rate": 1.0368633412863111e-05, "loss": 0.2307, "step": 19364, "teacher_loss": 0.2219313532114029 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.6654055118560791, "learning_rate": 1.0366473187878324e-05, "loss": 0.5202, "step": 19365, "teacher_loss": 0.5040406584739685 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.3943619728088379, "learning_rate": 1.0364313069126419e-05, "loss": 0.2207, "step": 19366, "teacher_loss": 0.2014119178056717 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.7261480689048767, "learning_rate": 1.0362153056656908e-05, "loss": 0.2408, "step": 19367, "teacher_loss": 0.18682153522968292 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.3227559030056, "learning_rate": 1.035999315051933e-05, "loss": 0.2871, "step": 19368, "teacher_loss": 0.28309759497642517 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.24620960652828217, "learning_rate": 1.0357833350763196e-05, "loss": 0.215, "step": 19369, "teacher_loss": 0.2114812731742859 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.759568452835083, "learning_rate": 1.0355673657438021e-05, "loss": 0.2296, "step": 19370, "teacher_loss": 0.1707087755203247 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.6023832559585571, "learning_rate": 1.035351407059333e-05, "loss": 0.2357, "step": 19371, "teacher_loss": 0.19490975141525269 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5730940699577332, "learning_rate": 1.035135459027863e-05, "loss": 0.2893, "step": 19372, "teacher_loss": 0.2577604651451111 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.32742127776145935, "learning_rate": 1.0349195216543426e-05, "loss": 0.3884, "step": 19373, "teacher_loss": 0.3951761722564697 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.1408384144306183, "learning_rate": 1.0347035949437237e-05, "loss": 0.1826, "step": 19374, "teacher_loss": 0.18722745776176453 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.35834234952926636, "learning_rate": 1.0344876789009563e-05, "loss": 0.2778, "step": 19375, "teacher_loss": 0.2688608169555664 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.475138396024704, "learning_rate": 1.0342717735309905e-05, "loss": 0.234, "step": 19376, "teacher_loss": 0.2072281837463379 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.3047199845314026, "learning_rate": 1.0340558788387768e-05, "loss": 0.2227, "step": 19377, "teacher_loss": 0.21363796293735504 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.36014455556869507, "learning_rate": 1.0338399948292651e-05, "loss": 0.1893, "step": 19378, "teacher_loss": 0.170371413230896 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.26450949907302856, "learning_rate": 1.0336241215074039e-05, "loss": 0.1898, "step": 19379, "teacher_loss": 0.18149280548095703 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.22230613231658936, "learning_rate": 1.033408258878144e-05, "loss": 0.2809, "step": 19380, "teacher_loss": 0.28739115595817566 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5090571045875549, "learning_rate": 1.033192406946434e-05, "loss": 0.2245, "step": 19381, "teacher_loss": 0.1929173469543457 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.323691189289093, "learning_rate": 1.0329765657172216e-05, "loss": 0.2136, "step": 19382, "teacher_loss": 0.20132896304130554 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.9403353929519653, "learning_rate": 1.0327607351954575e-05, "loss": 0.2712, "step": 19383, "teacher_loss": 0.1967974305152893 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5049049854278564, "learning_rate": 1.0325449153860884e-05, "loss": 0.4794, "step": 19384, "teacher_loss": 0.47658807039260864 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.6250417232513428, "learning_rate": 1.0323291062940628e-05, "loss": 0.2304, "step": 19385, "teacher_loss": 0.18660229444503784 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5964815616607666, "learning_rate": 1.0321133079243285e-05, "loss": 0.2614, "step": 19386, "teacher_loss": 0.2242087721824646 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.2767605483531952, "learning_rate": 1.0318975202818333e-05, "loss": 0.149, "step": 19387, "teacher_loss": 0.134830042719841 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.6674554347991943, "learning_rate": 1.0316817433715246e-05, "loss": 0.2743, "step": 19388, "teacher_loss": 0.23057660460472107 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.1375901699066162, "learning_rate": 1.0314659771983493e-05, "loss": 0.2116, "step": 19389, "teacher_loss": 0.21980033814907074 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5514234900474548, "learning_rate": 1.0312502217672547e-05, "loss": 0.2367, "step": 19390, "teacher_loss": 0.2017441689968109 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5218294262886047, "learning_rate": 1.0310344770831875e-05, "loss": 0.2714, "step": 19391, "teacher_loss": 0.24358510971069336 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.4563637375831604, "learning_rate": 1.0308187431510927e-05, "loss": 0.3057, "step": 19392, "teacher_loss": 0.28897643089294434 }, { "compression_loss": 0.0, "epoch": 3.5, "label_loss": 0.5893850922584534, "learning_rate": 1.0306030199759181e-05, "loss": 0.2697, "step": 19393, "teacher_loss": 0.23421460390090942 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.22820015251636505, "learning_rate": 1.0303873075626089e-05, "loss": 0.2006, "step": 19394, "teacher_loss": 0.19748564064502716 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.5321241617202759, "learning_rate": 1.0301716059161103e-05, "loss": 0.2146, "step": 19395, "teacher_loss": 0.17927706241607666 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.370492160320282, "learning_rate": 1.0299559150413685e-05, "loss": 0.285, "step": 19396, "teacher_loss": 0.2755116820335388 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.511408269405365, "learning_rate": 1.0297402349433286e-05, "loss": 0.2137, "step": 19397, "teacher_loss": 0.18058043718338013 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.3674183189868927, "learning_rate": 1.0295245656269346e-05, "loss": 0.2294, "step": 19398, "teacher_loss": 0.21405398845672607 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.32813528180122375, "learning_rate": 1.0293089070971322e-05, "loss": 0.2423, "step": 19399, "teacher_loss": 0.2327880561351776 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.3610363006591797, "learning_rate": 1.029093259358865e-05, "loss": 0.2426, "step": 19400, "teacher_loss": 0.22943758964538574 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.37287163734436035, "learning_rate": 1.0288776224170776e-05, "loss": 0.2092, "step": 19401, "teacher_loss": 0.19098162651062012 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.27948683500289917, "learning_rate": 1.028661996276714e-05, "loss": 0.2072, "step": 19402, "teacher_loss": 0.19917762279510498 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.7710708379745483, "learning_rate": 1.028446380942718e-05, "loss": 0.2988, "step": 19403, "teacher_loss": 0.24627827107906342 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.4671623706817627, "learning_rate": 1.0282307764200319e-05, "loss": 0.1893, "step": 19404, "teacher_loss": 0.15840734541416168 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.5016294121742249, "learning_rate": 1.0280151827136e-05, "loss": 0.2472, "step": 19405, "teacher_loss": 0.21895377337932587 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.5507128834724426, "learning_rate": 1.0277995998283652e-05, "loss": 0.266, "step": 19406, "teacher_loss": 0.2343181073665619 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.8995944857597351, "learning_rate": 1.0275840277692698e-05, "loss": 0.3289, "step": 19407, "teacher_loss": 0.26543593406677246 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.3049280047416687, "learning_rate": 1.0273684665412557e-05, "loss": 0.2274, "step": 19408, "teacher_loss": 0.21874544024467468 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.4382936358451843, "learning_rate": 1.0271529161492662e-05, "loss": 0.2408, "step": 19409, "teacher_loss": 0.21882781386375427 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.6913855671882629, "learning_rate": 1.0269373765982426e-05, "loss": 0.2669, "step": 19410, "teacher_loss": 0.21969617903232574 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.3898928165435791, "learning_rate": 1.0267218478931261e-05, "loss": 0.2026, "step": 19411, "teacher_loss": 0.18173912167549133 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.66672682762146, "learning_rate": 1.0265063300388591e-05, "loss": 0.2145, "step": 19412, "teacher_loss": 0.16424354910850525 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 1.2872543334960938, "learning_rate": 1.0262908230403824e-05, "loss": 0.6048, "step": 19413, "teacher_loss": 0.5289934277534485 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.19633536040782928, "learning_rate": 1.0260753269026366e-05, "loss": 0.17, "step": 19414, "teacher_loss": 0.1671261191368103 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.3945610523223877, "learning_rate": 1.0258598416305628e-05, "loss": 0.2328, "step": 19415, "teacher_loss": 0.21487906575202942 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.7016512155532837, "learning_rate": 1.0256443672291019e-05, "loss": 0.2497, "step": 19416, "teacher_loss": 0.19947555661201477 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.2866821587085724, "learning_rate": 1.0254289037031922e-05, "loss": 0.2178, "step": 19417, "teacher_loss": 0.21019750833511353 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.3654671907424927, "learning_rate": 1.025213451057776e-05, "loss": 0.1846, "step": 19418, "teacher_loss": 0.16452905535697937 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.4609503149986267, "learning_rate": 1.0249980092977916e-05, "loss": 0.2438, "step": 19419, "teacher_loss": 0.21971558034420013 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.2799150347709656, "learning_rate": 1.0247825784281782e-05, "loss": 0.2355, "step": 19420, "teacher_loss": 0.23053419589996338 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.3768486976623535, "learning_rate": 1.0245671584538762e-05, "loss": 0.2151, "step": 19421, "teacher_loss": 0.19709941744804382 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 1.0270946025848389, "learning_rate": 1.0243517493798234e-05, "loss": 0.2689, "step": 19422, "teacher_loss": 0.18467873334884644 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.16900645196437836, "learning_rate": 1.0241363512109588e-05, "loss": 0.2111, "step": 19423, "teacher_loss": 0.2158191204071045 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.6266013383865356, "learning_rate": 1.0239209639522213e-05, "loss": 0.3801, "step": 19424, "teacher_loss": 0.35273754596710205 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.5590620636940002, "learning_rate": 1.0237055876085487e-05, "loss": 0.258, "step": 19425, "teacher_loss": 0.22455421090126038 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.6067731976509094, "learning_rate": 1.0234902221848786e-05, "loss": 0.357, "step": 19426, "teacher_loss": 0.32928046584129333 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.48606717586517334, "learning_rate": 1.0232748676861495e-05, "loss": 0.2588, "step": 19427, "teacher_loss": 0.23349490761756897 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.9790364503860474, "learning_rate": 1.0230595241172987e-05, "loss": 0.2545, "step": 19428, "teacher_loss": 0.17399568855762482 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.12621092796325684, "learning_rate": 1.0228441914832626e-05, "loss": 0.2106, "step": 19429, "teacher_loss": 0.21996408700942993 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.5764368772506714, "learning_rate": 1.022628869788978e-05, "loss": 0.2159, "step": 19430, "teacher_loss": 0.17582494020462036 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.5570341348648071, "learning_rate": 1.0224135590393833e-05, "loss": 0.2366, "step": 19431, "teacher_loss": 0.20099316537380219 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.421671599149704, "learning_rate": 1.0221982592394134e-05, "loss": 0.2063, "step": 19432, "teacher_loss": 0.18232038617134094 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.3714757263660431, "learning_rate": 1.0219829703940047e-05, "loss": 0.2044, "step": 19433, "teacher_loss": 0.18580356240272522 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.32862144708633423, "learning_rate": 1.0217676925080933e-05, "loss": 0.308, "step": 19434, "teacher_loss": 0.3056778311729431 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.22198928892612457, "learning_rate": 1.021552425586615e-05, "loss": 0.1868, "step": 19435, "teacher_loss": 0.1829097419977188 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.5531752109527588, "learning_rate": 1.0213371696345051e-05, "loss": 0.3392, "step": 19436, "teacher_loss": 0.31539642810821533 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.670462965965271, "learning_rate": 1.021121924656699e-05, "loss": 0.3587, "step": 19437, "teacher_loss": 0.3240899443626404 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.311935693025589, "learning_rate": 1.0209066906581314e-05, "loss": 0.1779, "step": 19438, "teacher_loss": 0.16302621364593506 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.2325492799282074, "learning_rate": 1.0206914676437363e-05, "loss": 0.2081, "step": 19439, "teacher_loss": 0.20534178614616394 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.5304273366928101, "learning_rate": 1.0204762556184497e-05, "loss": 0.2397, "step": 19440, "teacher_loss": 0.20741626620292664 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.3660224676132202, "learning_rate": 1.0202610545872048e-05, "loss": 0.1891, "step": 19441, "teacher_loss": 0.1694117933511734 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.7343100309371948, "learning_rate": 1.0200458645549347e-05, "loss": 0.3189, "step": 19442, "teacher_loss": 0.2727666199207306 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.39585065841674805, "learning_rate": 1.019830685526575e-05, "loss": 0.3638, "step": 19443, "teacher_loss": 0.36021149158477783 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.5108075737953186, "learning_rate": 1.0196155175070576e-05, "loss": 0.3285, "step": 19444, "teacher_loss": 0.3082049489021301 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.30642494559288025, "learning_rate": 1.019400360501316e-05, "loss": 0.1703, "step": 19445, "teacher_loss": 0.15512561798095703 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.5225207805633545, "learning_rate": 1.0191852145142833e-05, "loss": 0.3237, "step": 19446, "teacher_loss": 0.3016514182090759 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 0.3370410203933716, "learning_rate": 1.0189700795508924e-05, "loss": 0.2077, "step": 19447, "teacher_loss": 0.19327399134635925 }, { "compression_loss": 0.0, "epoch": 3.51, "label_loss": 1.027052402496338, "learning_rate": 1.018754955616075e-05, "loss": 0.2772, "step": 19448, "teacher_loss": 0.1938624382019043 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.36173707246780396, "learning_rate": 1.0185398427147635e-05, "loss": 0.196, "step": 19449, "teacher_loss": 0.17761212587356567 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.28245627880096436, "learning_rate": 1.0183247408518903e-05, "loss": 0.1624, "step": 19450, "teacher_loss": 0.14907124638557434 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.1912381947040558, "learning_rate": 1.0181096500323868e-05, "loss": 0.2131, "step": 19451, "teacher_loss": 0.21557781100273132 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.38765954971313477, "learning_rate": 1.0178945702611832e-05, "loss": 0.2891, "step": 19452, "teacher_loss": 0.27810221910476685 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.3280712366104126, "learning_rate": 1.0176795015432129e-05, "loss": 0.2165, "step": 19453, "teacher_loss": 0.20407480001449585 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.31434187293052673, "learning_rate": 1.0174644438834053e-05, "loss": 0.2596, "step": 19454, "teacher_loss": 0.2535129487514496 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.531960666179657, "learning_rate": 1.0172493972866904e-05, "loss": 0.2585, "step": 19455, "teacher_loss": 0.22806000709533691 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.2145223617553711, "learning_rate": 1.0170343617580005e-05, "loss": 0.2598, "step": 19456, "teacher_loss": 0.2648307681083679 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.5329656600952148, "learning_rate": 1.0168193373022644e-05, "loss": 0.2438, "step": 19457, "teacher_loss": 0.2117207795381546 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.6372746825218201, "learning_rate": 1.0166043239244119e-05, "loss": 0.2692, "step": 19458, "teacher_loss": 0.22828815877437592 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.44345855712890625, "learning_rate": 1.0163893216293733e-05, "loss": 0.207, "step": 19459, "teacher_loss": 0.1807098388671875 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.5052556395530701, "learning_rate": 1.0161743304220774e-05, "loss": 0.3101, "step": 19460, "teacher_loss": 0.2884673774242401 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.25631698966026306, "learning_rate": 1.0159593503074535e-05, "loss": 0.1972, "step": 19461, "teacher_loss": 0.19068075716495514 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.3342117369174957, "learning_rate": 1.0157443812904306e-05, "loss": 0.1802, "step": 19462, "teacher_loss": 0.1631053239107132 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.5531876683235168, "learning_rate": 1.0155294233759373e-05, "loss": 0.291, "step": 19463, "teacher_loss": 0.26189544796943665 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.24584102630615234, "learning_rate": 1.0153144765689014e-05, "loss": 0.193, "step": 19464, "teacher_loss": 0.18716412782669067 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.32678771018981934, "learning_rate": 1.0150995408742518e-05, "loss": 0.2532, "step": 19465, "teacher_loss": 0.24505354464054108 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.45356231927871704, "learning_rate": 1.014884616296916e-05, "loss": 0.2194, "step": 19466, "teacher_loss": 0.19334356486797333 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.8779394626617432, "learning_rate": 1.0146697028418207e-05, "loss": 0.3283, "step": 19467, "teacher_loss": 0.26720917224884033 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.6529161334037781, "learning_rate": 1.014454800513895e-05, "loss": 0.2171, "step": 19468, "teacher_loss": 0.16865789890289307 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.5911378860473633, "learning_rate": 1.0142399093180646e-05, "loss": 0.2277, "step": 19469, "teacher_loss": 0.1873677670955658 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.261684387922287, "learning_rate": 1.0140250292592568e-05, "loss": 0.1864, "step": 19470, "teacher_loss": 0.17801953852176666 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.3821369409561157, "learning_rate": 1.0138101603423978e-05, "loss": 0.3003, "step": 19471, "teacher_loss": 0.2911791205406189 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.4049680233001709, "learning_rate": 1.0135953025724142e-05, "loss": 0.2797, "step": 19472, "teacher_loss": 0.26583564281463623 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.3070334792137146, "learning_rate": 1.0133804559542322e-05, "loss": 0.1655, "step": 19473, "teacher_loss": 0.14982284605503082 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.18226468563079834, "learning_rate": 1.013165620492777e-05, "loss": 0.1957, "step": 19474, "teacher_loss": 0.19716814160346985 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.19842079281806946, "learning_rate": 1.0129507961929749e-05, "loss": 0.2234, "step": 19475, "teacher_loss": 0.22617238759994507 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.5729424953460693, "learning_rate": 1.012735983059751e-05, "loss": 0.2573, "step": 19476, "teacher_loss": 0.22227749228477478 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.30991387367248535, "learning_rate": 1.0125211810980292e-05, "loss": 0.1476, "step": 19477, "teacher_loss": 0.12961483001708984 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.6316435933113098, "learning_rate": 1.012306390312736e-05, "loss": 0.3661, "step": 19478, "teacher_loss": 0.3365713953971863 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.6574933528900146, "learning_rate": 1.012091610708795e-05, "loss": 0.2795, "step": 19479, "teacher_loss": 0.23745989799499512 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.5261728167533875, "learning_rate": 1.01187684229113e-05, "loss": 0.3401, "step": 19480, "teacher_loss": 0.3194471299648285 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.20688362419605255, "learning_rate": 1.0116620850646661e-05, "loss": 0.1552, "step": 19481, "teacher_loss": 0.14949235320091248 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.6179096698760986, "learning_rate": 1.0114473390343264e-05, "loss": 0.2737, "step": 19482, "teacher_loss": 0.23549196124076843 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.9272891283035278, "learning_rate": 1.011232604205034e-05, "loss": 0.386, "step": 19483, "teacher_loss": 0.3259025812149048 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.2913195788860321, "learning_rate": 1.0110178805817132e-05, "loss": 0.1867, "step": 19484, "teacher_loss": 0.17502276599407196 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.3823346495628357, "learning_rate": 1.0108031681692862e-05, "loss": 0.2281, "step": 19485, "teacher_loss": 0.21092775464057922 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.6832438707351685, "learning_rate": 1.0105884669726755e-05, "loss": 0.3161, "step": 19486, "teacher_loss": 0.2752572298049927 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.33236807584762573, "learning_rate": 1.010373776996804e-05, "loss": 0.2819, "step": 19487, "teacher_loss": 0.2762737274169922 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.44646745920181274, "learning_rate": 1.0101590982465942e-05, "loss": 0.2059, "step": 19488, "teacher_loss": 0.17920741438865662 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.38612663745880127, "learning_rate": 1.009944430726968e-05, "loss": 0.2807, "step": 19489, "teacher_loss": 0.26895081996917725 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.25808462500572205, "learning_rate": 1.0097297744428456e-05, "loss": 0.211, "step": 19490, "teacher_loss": 0.2057158350944519 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.21082568168640137, "learning_rate": 1.0095151293991506e-05, "loss": 0.1702, "step": 19491, "teacher_loss": 0.16574159264564514 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.5113463401794434, "learning_rate": 1.0093004956008026e-05, "loss": 0.2925, "step": 19492, "teacher_loss": 0.26819223165512085 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.4059942364692688, "learning_rate": 1.009085873052723e-05, "loss": 0.189, "step": 19493, "teacher_loss": 0.1648443192243576 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.38825899362564087, "learning_rate": 1.0088712617598325e-05, "loss": 0.2052, "step": 19494, "teacher_loss": 0.1848527491092682 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.5189722180366516, "learning_rate": 1.0086566617270518e-05, "loss": 0.4149, "step": 19495, "teacher_loss": 0.40337663888931274 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.20231488347053528, "learning_rate": 1.0084420729593004e-05, "loss": 0.1949, "step": 19496, "teacher_loss": 0.19408470392227173 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.23983778059482574, "learning_rate": 1.0082274954614987e-05, "loss": 0.1574, "step": 19497, "teacher_loss": 0.14820244908332825 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.32351869344711304, "learning_rate": 1.0080129292385661e-05, "loss": 0.203, "step": 19498, "teacher_loss": 0.18957358598709106 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.34295618534088135, "learning_rate": 1.0077983742954217e-05, "loss": 0.2042, "step": 19499, "teacher_loss": 0.18879255652427673 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.13781006634235382, "learning_rate": 1.0075838306369852e-05, "loss": 0.1721, "step": 19500, "teacher_loss": 0.1759313941001892 }, { "epoch": 3.52, "eval_exact_match": 79.78240302743615, "eval_f1": 87.32966072047768, "step": 19500 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.5822824239730835, "learning_rate": 1.0073692982681755e-05, "loss": 0.3034, "step": 19501, "teacher_loss": 0.27242371439933777 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.18304210901260376, "learning_rate": 1.0071547771939098e-05, "loss": 0.1976, "step": 19502, "teacher_loss": 0.19925439357757568 }, { "compression_loss": 0.0, "epoch": 3.52, "label_loss": 0.24840891361236572, "learning_rate": 1.0069402674191083e-05, "loss": 0.2145, "step": 19503, "teacher_loss": 0.21070721745491028 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.16617180407047272, "learning_rate": 1.0067257689486879e-05, "loss": 0.1785, "step": 19504, "teacher_loss": 0.17992311716079712 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.2808770537376404, "learning_rate": 1.0065112817875662e-05, "loss": 0.1952, "step": 19505, "teacher_loss": 0.18571394681930542 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.49665361642837524, "learning_rate": 1.0062968059406621e-05, "loss": 0.2884, "step": 19506, "teacher_loss": 0.2652703523635864 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.4683324992656708, "learning_rate": 1.0060823414128917e-05, "loss": 0.2236, "step": 19507, "teacher_loss": 0.19642239809036255 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.6576347351074219, "learning_rate": 1.0058678882091721e-05, "loss": 0.2549, "step": 19508, "teacher_loss": 0.21014100313186646 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.8172670006752014, "learning_rate": 1.0056534463344207e-05, "loss": 0.3044, "step": 19509, "teacher_loss": 0.24744324386119843 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.6028466820716858, "learning_rate": 1.0054390157935536e-05, "loss": 0.3047, "step": 19510, "teacher_loss": 0.27160191535949707 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.36131414771080017, "learning_rate": 1.0052245965914872e-05, "loss": 0.2061, "step": 19511, "teacher_loss": 0.18881818652153015 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.5852622985839844, "learning_rate": 1.005010188733137e-05, "loss": 0.2484, "step": 19512, "teacher_loss": 0.2109193354845047 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.41420358419418335, "learning_rate": 1.0047957922234194e-05, "loss": 0.1796, "step": 19513, "teacher_loss": 0.15348698198795319 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.21973851323127747, "learning_rate": 1.0045814070672498e-05, "loss": 0.1682, "step": 19514, "teacher_loss": 0.16244924068450928 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.8999911546707153, "learning_rate": 1.0043670332695426e-05, "loss": 0.3054, "step": 19515, "teacher_loss": 0.23933374881744385 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.5979779362678528, "learning_rate": 1.0041526708352139e-05, "loss": 0.2097, "step": 19516, "teacher_loss": 0.16652944684028625 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.3461306393146515, "learning_rate": 1.0039383197691775e-05, "loss": 0.2179, "step": 19517, "teacher_loss": 0.2036415934562683 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.8049126863479614, "learning_rate": 1.003723980076348e-05, "loss": 0.3317, "step": 19518, "teacher_loss": 0.2790743112564087 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.4850609302520752, "learning_rate": 1.00350965176164e-05, "loss": 0.2241, "step": 19519, "teacher_loss": 0.1950729787349701 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.3148100972175598, "learning_rate": 1.0032953348299674e-05, "loss": 0.1669, "step": 19520, "teacher_loss": 0.15047243237495422 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.6103925108909607, "learning_rate": 1.0030810292862429e-05, "loss": 0.2993, "step": 19521, "teacher_loss": 0.26469236612319946 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.6049237251281738, "learning_rate": 1.0028667351353809e-05, "loss": 0.2884, "step": 19522, "teacher_loss": 0.2531885802745819 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.4821338653564453, "learning_rate": 1.0026524523822944e-05, "loss": 0.2713, "step": 19523, "teacher_loss": 0.24789506196975708 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.7074856758117676, "learning_rate": 1.0024381810318956e-05, "loss": 0.3152, "step": 19524, "teacher_loss": 0.2716485857963562 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.45420753955841064, "learning_rate": 1.0022239210890977e-05, "loss": 0.2018, "step": 19525, "teacher_loss": 0.17370617389678955 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.42822980880737305, "learning_rate": 1.0020096725588135e-05, "loss": 0.258, "step": 19526, "teacher_loss": 0.23907725512981415 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.707569420337677, "learning_rate": 1.0017954354459535e-05, "loss": 0.3548, "step": 19527, "teacher_loss": 0.3156408667564392 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.5429368019104004, "learning_rate": 1.001581209755431e-05, "loss": 0.2333, "step": 19528, "teacher_loss": 0.19890055060386658 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.6895972490310669, "learning_rate": 1.0013669954921572e-05, "loss": 0.3556, "step": 19529, "teacher_loss": 0.3184818625450134 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.3465479612350464, "learning_rate": 1.0011527926610425e-05, "loss": 0.2006, "step": 19530, "teacher_loss": 0.18438659608364105 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.3681927025318146, "learning_rate": 1.0009386012669995e-05, "loss": 0.2389, "step": 19531, "teacher_loss": 0.22450140118598938 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.24070215225219727, "learning_rate": 1.0007244213149377e-05, "loss": 0.1996, "step": 19532, "teacher_loss": 0.1950719654560089 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.9446883201599121, "learning_rate": 1.0005102528097679e-05, "loss": 0.3247, "step": 19533, "teacher_loss": 0.2557964324951172 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.2997077405452728, "learning_rate": 1.0002960957564004e-05, "loss": 0.2306, "step": 19534, "teacher_loss": 0.2228684425354004 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.3986530601978302, "learning_rate": 1.0000819501597454e-05, "loss": 0.1827, "step": 19535, "teacher_loss": 0.15870392322540283 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.5486023426055908, "learning_rate": 9.998678160247127e-06, "loss": 0.2135, "step": 19536, "teacher_loss": 0.17630544304847717 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.445093035697937, "learning_rate": 9.996536933562108e-06, "loss": 0.2582, "step": 19537, "teacher_loss": 0.23742592334747314 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.11764411628246307, "learning_rate": 9.994395821591501e-06, "loss": 0.1817, "step": 19538, "teacher_loss": 0.18886131048202515 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.6120966672897339, "learning_rate": 9.992254824384396e-06, "loss": 0.5214, "step": 19539, "teacher_loss": 0.5113601088523865 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.1887083798646927, "learning_rate": 9.99011394198986e-06, "loss": 0.1641, "step": 19540, "teacher_loss": 0.16137921810150146 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.8668692708015442, "learning_rate": 9.987973174457002e-06, "loss": 0.2877, "step": 19541, "teacher_loss": 0.2233489602804184 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.3059224784374237, "learning_rate": 9.98583252183489e-06, "loss": 0.2073, "step": 19542, "teacher_loss": 0.19631874561309814 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.31433677673339844, "learning_rate": 9.983691984172602e-06, "loss": 0.2466, "step": 19543, "teacher_loss": 0.2390194684267044 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.23233336210250854, "learning_rate": 9.981551561519219e-06, "loss": 0.2949, "step": 19544, "teacher_loss": 0.3018597364425659 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.23078562319278717, "learning_rate": 9.979411253923813e-06, "loss": 0.3537, "step": 19545, "teacher_loss": 0.36735644936561584 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.49961578845977783, "learning_rate": 9.977271061435451e-06, "loss": 0.2149, "step": 19546, "teacher_loss": 0.18331004679203033 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.5616172552108765, "learning_rate": 9.97513098410321e-06, "loss": 0.2807, "step": 19547, "teacher_loss": 0.2495173066854477 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.48349398374557495, "learning_rate": 9.972991021976147e-06, "loss": 0.1909, "step": 19548, "teacher_loss": 0.15840110182762146 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.18089839816093445, "learning_rate": 9.970851175103329e-06, "loss": 0.1626, "step": 19549, "teacher_loss": 0.16053500771522522 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.270516037940979, "learning_rate": 9.968711443533814e-06, "loss": 0.1732, "step": 19550, "teacher_loss": 0.16240260004997253 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.4094671607017517, "learning_rate": 9.966571827316666e-06, "loss": 0.2032, "step": 19551, "teacher_loss": 0.18033233284950256 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.2510198950767517, "learning_rate": 9.964432326500933e-06, "loss": 0.1724, "step": 19552, "teacher_loss": 0.1636662483215332 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.9874939918518066, "learning_rate": 9.96229294113566e-06, "loss": 0.4215, "step": 19553, "teacher_loss": 0.3585950434207916 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.6145588159561157, "learning_rate": 9.960153671269917e-06, "loss": 0.2127, "step": 19554, "teacher_loss": 0.1680116355419159 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.49198538064956665, "learning_rate": 9.95801451695274e-06, "loss": 0.2809, "step": 19555, "teacher_loss": 0.2574813961982727 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.6815842986106873, "learning_rate": 9.955875478233166e-06, "loss": 0.3187, "step": 19556, "teacher_loss": 0.27836674451828003 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.6621488332748413, "learning_rate": 9.95373655516025e-06, "loss": 0.2417, "step": 19557, "teacher_loss": 0.1949477195739746 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.4806235432624817, "learning_rate": 9.951597747783024e-06, "loss": 0.4308, "step": 19558, "teacher_loss": 0.42531657218933105 }, { "compression_loss": 0.0, "epoch": 3.53, "label_loss": 0.09620915353298187, "learning_rate": 9.949459056150524e-06, "loss": 0.1217, "step": 19559, "teacher_loss": 0.12455607950687408 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.4135897159576416, "learning_rate": 9.947320480311788e-06, "loss": 0.3702, "step": 19560, "teacher_loss": 0.36541837453842163 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.44637465476989746, "learning_rate": 9.945182020315845e-06, "loss": 0.2654, "step": 19561, "teacher_loss": 0.24532447755336761 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.23862197995185852, "learning_rate": 9.943043676211718e-06, "loss": 0.1755, "step": 19562, "teacher_loss": 0.1684625893831253 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.27720773220062256, "learning_rate": 9.940905448048444e-06, "loss": 0.1631, "step": 19563, "teacher_loss": 0.15041495859622955 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.886763334274292, "learning_rate": 9.93876733587504e-06, "loss": 0.5617, "step": 19564, "teacher_loss": 0.5256365537643433 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.49012044072151184, "learning_rate": 9.936629339740519e-06, "loss": 0.4488, "step": 19565, "teacher_loss": 0.44417595863342285 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.18380270898342133, "learning_rate": 9.934491459693916e-06, "loss": 0.1244, "step": 19566, "teacher_loss": 0.11774909496307373 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.5364276766777039, "learning_rate": 9.932353695784233e-06, "loss": 0.2726, "step": 19567, "teacher_loss": 0.24334004521369934 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.2974734306335449, "learning_rate": 9.930216048060484e-06, "loss": 0.1837, "step": 19568, "teacher_loss": 0.17110282182693481 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.29208338260650635, "learning_rate": 9.928078516571683e-06, "loss": 0.2176, "step": 19569, "teacher_loss": 0.20934754610061646 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.2601548433303833, "learning_rate": 9.925941101366835e-06, "loss": 0.1785, "step": 19570, "teacher_loss": 0.1694105863571167 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.37648963928222656, "learning_rate": 9.923803802494945e-06, "loss": 0.2679, "step": 19571, "teacher_loss": 0.25581812858581543 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.24660523235797882, "learning_rate": 9.921666620005013e-06, "loss": 0.2102, "step": 19572, "teacher_loss": 0.20613731443881989 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.24940378963947296, "learning_rate": 9.91952955394604e-06, "loss": 0.1771, "step": 19573, "teacher_loss": 0.16907119750976562 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 1.0295100212097168, "learning_rate": 9.91739260436703e-06, "loss": 0.4258, "step": 19574, "teacher_loss": 0.3586958646774292 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.6346731185913086, "learning_rate": 9.915255771316957e-06, "loss": 0.3011, "step": 19575, "teacher_loss": 0.26407235860824585 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.47836095094680786, "learning_rate": 9.913119054844833e-06, "loss": 0.2731, "step": 19576, "teacher_loss": 0.2502423822879791 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.3255135416984558, "learning_rate": 9.910982454999636e-06, "loss": 0.2904, "step": 19577, "teacher_loss": 0.2864677309989929 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.3129858076572418, "learning_rate": 9.908845971830345e-06, "loss": 0.3032, "step": 19578, "teacher_loss": 0.3021564483642578 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.4534338116645813, "learning_rate": 9.906709605385963e-06, "loss": 0.2795, "step": 19579, "teacher_loss": 0.26018524169921875 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.38047921657562256, "learning_rate": 9.904573355715457e-06, "loss": 0.2543, "step": 19580, "teacher_loss": 0.24026885628700256 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.22300752997398376, "learning_rate": 9.902437222867802e-06, "loss": 0.2371, "step": 19581, "teacher_loss": 0.23871418833732605 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.5068744421005249, "learning_rate": 9.900301206891984e-06, "loss": 0.1771, "step": 19582, "teacher_loss": 0.14046257734298706 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 1.2976257801055908, "learning_rate": 9.898165307836966e-06, "loss": 0.367, "step": 19583, "teacher_loss": 0.2636162340641022 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.37200456857681274, "learning_rate": 9.896029525751721e-06, "loss": 0.1799, "step": 19584, "teacher_loss": 0.15856097638607025 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.25944653153419495, "learning_rate": 9.893893860685219e-06, "loss": 0.2153, "step": 19585, "teacher_loss": 0.21040308475494385 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.7618865370750427, "learning_rate": 9.891758312686421e-06, "loss": 0.2938, "step": 19586, "teacher_loss": 0.24180731177330017 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.3785143494606018, "learning_rate": 9.88962288180429e-06, "loss": 0.2325, "step": 19587, "teacher_loss": 0.21629473567008972 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.2821401059627533, "learning_rate": 9.887487568087782e-06, "loss": 0.2851, "step": 19588, "teacher_loss": 0.28543734550476074 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.17639687657356262, "learning_rate": 9.885352371585862e-06, "loss": 0.1488, "step": 19589, "teacher_loss": 0.1457221806049347 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.35193362832069397, "learning_rate": 9.883217292347469e-06, "loss": 0.2283, "step": 19590, "teacher_loss": 0.2145354449748993 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.39796435832977295, "learning_rate": 9.881082330421571e-06, "loss": 0.2486, "step": 19591, "teacher_loss": 0.2319633960723877 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.7542493343353271, "learning_rate": 9.878947485857104e-06, "loss": 0.2582, "step": 19592, "teacher_loss": 0.20306503772735596 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.284929484128952, "learning_rate": 9.87681275870302e-06, "loss": 0.162, "step": 19593, "teacher_loss": 0.14835819602012634 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.4068587124347687, "learning_rate": 9.874678149008254e-06, "loss": 0.2256, "step": 19594, "teacher_loss": 0.20545810461044312 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.6160931587219238, "learning_rate": 9.872543656821755e-06, "loss": 0.3716, "step": 19595, "teacher_loss": 0.34442809224128723 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.8495842218399048, "learning_rate": 9.870409282192456e-06, "loss": 0.3121, "step": 19596, "teacher_loss": 0.2524319887161255 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.4769754409790039, "learning_rate": 9.868275025169291e-06, "loss": 0.3982, "step": 19597, "teacher_loss": 0.3894667625427246 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.37383565306663513, "learning_rate": 9.866140885801198e-06, "loss": 0.1873, "step": 19598, "teacher_loss": 0.16660335659980774 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.35413438081741333, "learning_rate": 9.864006864137104e-06, "loss": 0.2652, "step": 19599, "teacher_loss": 0.2553294003009796 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.6458280086517334, "learning_rate": 9.861872960225925e-06, "loss": 0.43, "step": 19600, "teacher_loss": 0.4060248136520386 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.22548463940620422, "learning_rate": 9.859739174116606e-06, "loss": 0.176, "step": 19601, "teacher_loss": 0.17048616707324982 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.5701205730438232, "learning_rate": 9.85760550585805e-06, "loss": 0.3207, "step": 19602, "teacher_loss": 0.2929496467113495 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.4363097846508026, "learning_rate": 9.855471955499175e-06, "loss": 0.2301, "step": 19603, "teacher_loss": 0.20724061131477356 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.17100924253463745, "learning_rate": 9.853338523088917e-06, "loss": 0.1994, "step": 19604, "teacher_loss": 0.20257365703582764 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.24860334396362305, "learning_rate": 9.85120520867617e-06, "loss": 0.2299, "step": 19605, "teacher_loss": 0.22783200442790985 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.6802994012832642, "learning_rate": 9.84907201230985e-06, "loss": 0.2452, "step": 19606, "teacher_loss": 0.1968328058719635 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.21299207210540771, "learning_rate": 9.846938934038867e-06, "loss": 0.3307, "step": 19607, "teacher_loss": 0.3437294363975525 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.32776594161987305, "learning_rate": 9.844805973912126e-06, "loss": 0.1946, "step": 19608, "teacher_loss": 0.1798492819070816 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.09490704536437988, "learning_rate": 9.842673131978522e-06, "loss": 0.1889, "step": 19609, "teacher_loss": 0.19938504695892334 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.3669765293598175, "learning_rate": 9.840540408286966e-06, "loss": 0.1617, "step": 19610, "teacher_loss": 0.13884076476097107 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.27929139137268066, "learning_rate": 9.838407802886349e-06, "loss": 0.1934, "step": 19611, "teacher_loss": 0.18383339047431946 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.33364367485046387, "learning_rate": 9.836275315825563e-06, "loss": 0.2464, "step": 19612, "teacher_loss": 0.23672693967819214 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 1.0206588506698608, "learning_rate": 9.834142947153507e-06, "loss": 0.5595, "step": 19613, "teacher_loss": 0.5082470178604126 }, { "compression_loss": 0.0, "epoch": 3.54, "label_loss": 0.4007933735847473, "learning_rate": 9.832010696919065e-06, "loss": 0.2217, "step": 19614, "teacher_loss": 0.20182600617408752 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.3271321654319763, "learning_rate": 9.829878565171123e-06, "loss": 0.3442, "step": 19615, "teacher_loss": 0.34608978033065796 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.7647700309753418, "learning_rate": 9.82774655195856e-06, "loss": 0.285, "step": 19616, "teacher_loss": 0.2316952496767044 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.49964356422424316, "learning_rate": 9.825614657330264e-06, "loss": 0.2612, "step": 19617, "teacher_loss": 0.23469607532024384 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.6729662418365479, "learning_rate": 9.823482881335113e-06, "loss": 0.2283, "step": 19618, "teacher_loss": 0.17889279127120972 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.14052435755729675, "learning_rate": 9.821351224021974e-06, "loss": 0.1614, "step": 19619, "teacher_loss": 0.16371260583400726 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.3000051975250244, "learning_rate": 9.819219685439728e-06, "loss": 0.2022, "step": 19620, "teacher_loss": 0.19131392240524292 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.2325604259967804, "learning_rate": 9.817088265637242e-06, "loss": 0.1537, "step": 19621, "teacher_loss": 0.14499130845069885 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.17120827734470367, "learning_rate": 9.81495696466338e-06, "loss": 0.1663, "step": 19622, "teacher_loss": 0.165754497051239 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.6758211255073547, "learning_rate": 9.812825782567011e-06, "loss": 0.222, "step": 19623, "teacher_loss": 0.1715477555990219 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.33912909030914307, "learning_rate": 9.810694719397003e-06, "loss": 0.2778, "step": 19624, "teacher_loss": 0.2710181474685669 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.7419806718826294, "learning_rate": 9.808563775202192e-06, "loss": 0.3864, "step": 19625, "teacher_loss": 0.34690678119659424 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.43733108043670654, "learning_rate": 9.806432950031461e-06, "loss": 0.2267, "step": 19626, "teacher_loss": 0.20334112644195557 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.6312950849533081, "learning_rate": 9.804302243933646e-06, "loss": 0.283, "step": 19627, "teacher_loss": 0.24428969621658325 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.29214754700660706, "learning_rate": 9.8021716569576e-06, "loss": 0.2382, "step": 19628, "teacher_loss": 0.23222273588180542 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.516738772392273, "learning_rate": 9.800041189152183e-06, "loss": 0.2018, "step": 19629, "teacher_loss": 0.16680949926376343 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.5969705581665039, "learning_rate": 9.797910840566224e-06, "loss": 0.2601, "step": 19630, "teacher_loss": 0.22270682454109192 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.28059905767440796, "learning_rate": 9.795780611248572e-06, "loss": 0.2574, "step": 19631, "teacher_loss": 0.2547770142555237 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.8923881649971008, "learning_rate": 9.793650501248071e-06, "loss": 0.2567, "step": 19632, "teacher_loss": 0.18606463074684143 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.31804007291793823, "learning_rate": 9.791520510613555e-06, "loss": 0.2269, "step": 19633, "teacher_loss": 0.21677199006080627 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.38350895047187805, "learning_rate": 9.789390639393856e-06, "loss": 0.2146, "step": 19634, "teacher_loss": 0.1958124041557312 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.42553991079330444, "learning_rate": 9.787260887637809e-06, "loss": 0.1813, "step": 19635, "teacher_loss": 0.15418201684951782 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.40932339429855347, "learning_rate": 9.78513125539424e-06, "loss": 0.2379, "step": 19636, "teacher_loss": 0.21889640390872955 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.27775052189826965, "learning_rate": 9.78300174271198e-06, "loss": 0.37, "step": 19637, "teacher_loss": 0.3802076578140259 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.3273114264011383, "learning_rate": 9.78087234963984e-06, "loss": 0.1676, "step": 19638, "teacher_loss": 0.14987322688102722 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.2997969686985016, "learning_rate": 9.77874307622666e-06, "loss": 0.1805, "step": 19639, "teacher_loss": 0.16725599765777588 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.5749364495277405, "learning_rate": 9.776613922521243e-06, "loss": 0.2255, "step": 19640, "teacher_loss": 0.186646968126297 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.29374784231185913, "learning_rate": 9.774484888572404e-06, "loss": 0.2075, "step": 19641, "teacher_loss": 0.19790875911712646 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 1.1676479578018188, "learning_rate": 9.772355974428962e-06, "loss": 0.2977, "step": 19642, "teacher_loss": 0.20104123651981354 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.716153085231781, "learning_rate": 9.770227180139727e-06, "loss": 0.2459, "step": 19643, "teacher_loss": 0.19365613162517548 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.8064671754837036, "learning_rate": 9.768098505753498e-06, "loss": 0.2993, "step": 19644, "teacher_loss": 0.24296057224273682 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.5965031385421753, "learning_rate": 9.765969951319088e-06, "loss": 0.2498, "step": 19645, "teacher_loss": 0.21131715178489685 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.4783465266227722, "learning_rate": 9.763841516885293e-06, "loss": 0.2606, "step": 19646, "teacher_loss": 0.23639436066150665 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.4817565381526947, "learning_rate": 9.761713202500911e-06, "loss": 0.2585, "step": 19647, "teacher_loss": 0.2337372750043869 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.29709237813949585, "learning_rate": 9.759585008214745e-06, "loss": 0.1979, "step": 19648, "teacher_loss": 0.18688073754310608 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.4689497947692871, "learning_rate": 9.757456934075585e-06, "loss": 0.182, "step": 19649, "teacher_loss": 0.1501690149307251 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.32565292716026306, "learning_rate": 9.755328980132211e-06, "loss": 0.2267, "step": 19650, "teacher_loss": 0.2156551331281662 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.49341973662376404, "learning_rate": 9.753201146433422e-06, "loss": 0.2323, "step": 19651, "teacher_loss": 0.2032516598701477 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.5770326852798462, "learning_rate": 9.751073433028006e-06, "loss": 0.3138, "step": 19652, "teacher_loss": 0.28451165556907654 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.14524126052856445, "learning_rate": 9.74894583996473e-06, "loss": 0.2479, "step": 19653, "teacher_loss": 0.2593310475349426 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.49560484290122986, "learning_rate": 9.746818367292392e-06, "loss": 0.2796, "step": 19654, "teacher_loss": 0.25560450553894043 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.7056359052658081, "learning_rate": 9.744691015059755e-06, "loss": 0.2445, "step": 19655, "teacher_loss": 0.19325533509254456 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.6361950635910034, "learning_rate": 9.742563783315596e-06, "loss": 0.3001, "step": 19656, "teacher_loss": 0.26275214552879333 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.2903074026107788, "learning_rate": 9.740436672108686e-06, "loss": 0.172, "step": 19657, "teacher_loss": 0.15886715054512024 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.687466561794281, "learning_rate": 9.738309681487794e-06, "loss": 0.2652, "step": 19658, "teacher_loss": 0.21822988986968994 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.3813740015029907, "learning_rate": 9.736182811501688e-06, "loss": 0.2624, "step": 19659, "teacher_loss": 0.2491583675146103 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.5326455235481262, "learning_rate": 9.734056062199124e-06, "loss": 0.2689, "step": 19660, "teacher_loss": 0.23958337306976318 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.20033520460128784, "learning_rate": 9.731929433628871e-06, "loss": 0.2578, "step": 19661, "teacher_loss": 0.2641976773738861 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.4973335266113281, "learning_rate": 9.729802925839683e-06, "loss": 0.2825, "step": 19662, "teacher_loss": 0.2585833668708801 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.7444344162940979, "learning_rate": 9.727676538880306e-06, "loss": 0.298, "step": 19663, "teacher_loss": 0.24838249385356903 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.26262909173965454, "learning_rate": 9.725550272799506e-06, "loss": 0.1571, "step": 19664, "teacher_loss": 0.14540642499923706 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.5884849429130554, "learning_rate": 9.723424127646022e-06, "loss": 0.1952, "step": 19665, "teacher_loss": 0.15154540538787842 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.5201140642166138, "learning_rate": 9.721298103468599e-06, "loss": 0.2697, "step": 19666, "teacher_loss": 0.2418575882911682 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.23165124654769897, "learning_rate": 9.719172200315989e-06, "loss": 0.162, "step": 19667, "teacher_loss": 0.15423768758773804 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.659850001335144, "learning_rate": 9.717046418236927e-06, "loss": 0.3562, "step": 19668, "teacher_loss": 0.3224598467350006 }, { "compression_loss": 0.0, "epoch": 3.55, "label_loss": 0.4666239023208618, "learning_rate": 9.714920757280147e-06, "loss": 0.3202, "step": 19669, "teacher_loss": 0.3039490878582001 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.28869518637657166, "learning_rate": 9.712795217494394e-06, "loss": 0.1846, "step": 19670, "teacher_loss": 0.1730433702468872 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.34344086050987244, "learning_rate": 9.710669798928395e-06, "loss": 0.2003, "step": 19671, "teacher_loss": 0.18440741300582886 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.8189817070960999, "learning_rate": 9.708544501630874e-06, "loss": 0.371, "step": 19672, "teacher_loss": 0.3212584853172302 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.8164449334144592, "learning_rate": 9.706419325650566e-06, "loss": 0.2908, "step": 19673, "teacher_loss": 0.23241671919822693 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.347476601600647, "learning_rate": 9.704294271036196e-06, "loss": 0.2873, "step": 19674, "teacher_loss": 0.28063392639160156 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.4515160322189331, "learning_rate": 9.70216933783647e-06, "loss": 0.2043, "step": 19675, "teacher_loss": 0.1768435835838318 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.16849662363529205, "learning_rate": 9.700044526100126e-06, "loss": 0.1706, "step": 19676, "teacher_loss": 0.1708613932132721 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.542802095413208, "learning_rate": 9.697919835875873e-06, "loss": 0.2467, "step": 19677, "teacher_loss": 0.2138374298810959 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.6454569101333618, "learning_rate": 9.69579526721242e-06, "loss": 0.3479, "step": 19678, "teacher_loss": 0.31488853693008423 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.2728041112422943, "learning_rate": 9.693670820158474e-06, "loss": 0.1694, "step": 19679, "teacher_loss": 0.15791793167591095 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.3199935853481293, "learning_rate": 9.691546494762749e-06, "loss": 0.1908, "step": 19680, "teacher_loss": 0.17641231417655945 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.3441593050956726, "learning_rate": 9.689422291073949e-06, "loss": 0.3505, "step": 19681, "teacher_loss": 0.351242333650589 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.505041241645813, "learning_rate": 9.68729820914077e-06, "loss": 0.2512, "step": 19682, "teacher_loss": 0.22300291061401367 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.22614985704421997, "learning_rate": 9.685174249011919e-06, "loss": 0.173, "step": 19683, "teacher_loss": 0.16712301969528198 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.5780359506607056, "learning_rate": 9.683050410736087e-06, "loss": 0.289, "step": 19684, "teacher_loss": 0.25691157579421997 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.3680039942264557, "learning_rate": 9.680926694361966e-06, "loss": 0.2006, "step": 19685, "teacher_loss": 0.18197846412658691 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.30155012011528015, "learning_rate": 9.67880309993825e-06, "loss": 0.1846, "step": 19686, "teacher_loss": 0.17155423760414124 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.282605916261673, "learning_rate": 9.676679627513628e-06, "loss": 0.234, "step": 19687, "teacher_loss": 0.22864636778831482 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.4235907196998596, "learning_rate": 9.674556277136775e-06, "loss": 0.24, "step": 19688, "teacher_loss": 0.21956247091293335 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.5603359341621399, "learning_rate": 9.672433048856387e-06, "loss": 0.1933, "step": 19689, "teacher_loss": 0.15255481004714966 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.41177940368652344, "learning_rate": 9.670309942721134e-06, "loss": 0.2663, "step": 19690, "teacher_loss": 0.25018393993377686 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.2997133433818817, "learning_rate": 9.668186958779692e-06, "loss": 0.2282, "step": 19691, "teacher_loss": 0.22028802335262299 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.25963905453681946, "learning_rate": 9.666064097080742e-06, "loss": 0.2026, "step": 19692, "teacher_loss": 0.1962304413318634 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.369697630405426, "learning_rate": 9.663941357672948e-06, "loss": 0.2502, "step": 19693, "teacher_loss": 0.23697349429130554 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.45318636298179626, "learning_rate": 9.661818740604977e-06, "loss": 0.2049, "step": 19694, "teacher_loss": 0.17726637423038483 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.5361760854721069, "learning_rate": 9.659696245925502e-06, "loss": 0.2746, "step": 19695, "teacher_loss": 0.24550312757492065 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.32205134630203247, "learning_rate": 9.65757387368318e-06, "loss": 0.2276, "step": 19696, "teacher_loss": 0.21709540486335754 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.32642316818237305, "learning_rate": 9.655451623926668e-06, "loss": 0.2563, "step": 19697, "teacher_loss": 0.2485392987728119 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.23751585185527802, "learning_rate": 9.65332949670463e-06, "loss": 0.1956, "step": 19698, "teacher_loss": 0.19098608195781708 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.1672850251197815, "learning_rate": 9.651207492065723e-06, "loss": 0.1519, "step": 19699, "teacher_loss": 0.15018382668495178 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.19994695484638214, "learning_rate": 9.649085610058585e-06, "loss": 0.1768, "step": 19700, "teacher_loss": 0.1741989105939865 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.37423282861709595, "learning_rate": 9.646963850731865e-06, "loss": 0.2486, "step": 19701, "teacher_loss": 0.23466435074806213 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.33560335636138916, "learning_rate": 9.644842214134225e-06, "loss": 0.3181, "step": 19702, "teacher_loss": 0.31615251302719116 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.17105823755264282, "learning_rate": 9.642720700314294e-06, "loss": 0.238, "step": 19703, "teacher_loss": 0.2453921139240265 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.24075853824615479, "learning_rate": 9.640599309320713e-06, "loss": 0.181, "step": 19704, "teacher_loss": 0.17439356446266174 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.5519812107086182, "learning_rate": 9.638478041202124e-06, "loss": 0.3002, "step": 19705, "teacher_loss": 0.27227842807769775 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.5329298973083496, "learning_rate": 9.636356896007158e-06, "loss": 0.4112, "step": 19706, "teacher_loss": 0.3976333737373352 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.7675859928131104, "learning_rate": 9.634235873784446e-06, "loss": 0.3045, "step": 19707, "teacher_loss": 0.25303101539611816 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.4073547124862671, "learning_rate": 9.63211497458262e-06, "loss": 0.2117, "step": 19708, "teacher_loss": 0.18997898697853088 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.6389116048812866, "learning_rate": 9.629994198450305e-06, "loss": 0.2748, "step": 19709, "teacher_loss": 0.23433153331279755 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.3859766721725464, "learning_rate": 9.627873545436117e-06, "loss": 0.2466, "step": 19710, "teacher_loss": 0.23110729455947876 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.26664724946022034, "learning_rate": 9.625753015588688e-06, "loss": 0.1996, "step": 19711, "teacher_loss": 0.19218634068965912 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.3008532226085663, "learning_rate": 9.623632608956632e-06, "loss": 0.298, "step": 19712, "teacher_loss": 0.29765942692756653 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.63437819480896, "learning_rate": 9.62151232558855e-06, "loss": 0.2343, "step": 19713, "teacher_loss": 0.18979284167289734 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.1864594668149948, "learning_rate": 9.619392165533077e-06, "loss": 0.2025, "step": 19714, "teacher_loss": 0.20422762632369995 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.35578083992004395, "learning_rate": 9.617272128838806e-06, "loss": 0.2375, "step": 19715, "teacher_loss": 0.22437231242656708 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.21124783158302307, "learning_rate": 9.615152215554341e-06, "loss": 0.1833, "step": 19716, "teacher_loss": 0.18023629486560822 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.3528038263320923, "learning_rate": 9.613032425728297e-06, "loss": 0.1581, "step": 19717, "teacher_loss": 0.13650211691856384 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.3676678538322449, "learning_rate": 9.610912759409269e-06, "loss": 0.2048, "step": 19718, "teacher_loss": 0.18675173819065094 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.5473875999450684, "learning_rate": 9.608793216645852e-06, "loss": 0.3036, "step": 19719, "teacher_loss": 0.2765269875526428 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.4059361219406128, "learning_rate": 9.60667379748664e-06, "loss": 0.2038, "step": 19720, "teacher_loss": 0.18134932219982147 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.22240974009037018, "learning_rate": 9.604554501980231e-06, "loss": 0.1592, "step": 19721, "teacher_loss": 0.15213757753372192 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.5658891797065735, "learning_rate": 9.602435330175215e-06, "loss": 0.2799, "step": 19722, "teacher_loss": 0.24810951948165894 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.3812227249145508, "learning_rate": 9.600316282120165e-06, "loss": 0.1969, "step": 19723, "teacher_loss": 0.1763743758201599 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.4620589017868042, "learning_rate": 9.598197357863681e-06, "loss": 0.1919, "step": 19724, "teacher_loss": 0.161843404173851 }, { "compression_loss": 0.0, "epoch": 3.56, "label_loss": 0.16642612218856812, "learning_rate": 9.596078557454334e-06, "loss": 0.1813, "step": 19725, "teacher_loss": 0.18297290802001953 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.5155349969863892, "learning_rate": 9.593959880940699e-06, "loss": 0.2287, "step": 19726, "teacher_loss": 0.19686922430992126 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.8422822952270508, "learning_rate": 9.591841328371364e-06, "loss": 0.2554, "step": 19727, "teacher_loss": 0.1902158558368683 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.42126667499542236, "learning_rate": 9.589722899794888e-06, "loss": 0.1773, "step": 19728, "teacher_loss": 0.1502058207988739 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.31620368361473083, "learning_rate": 9.587604595259844e-06, "loss": 0.2388, "step": 19729, "teacher_loss": 0.23018312454223633 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.31127387285232544, "learning_rate": 9.585486414814804e-06, "loss": 0.2395, "step": 19730, "teacher_loss": 0.23148967325687408 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.15470194816589355, "learning_rate": 9.583368358508322e-06, "loss": 0.1475, "step": 19731, "teacher_loss": 0.14666257798671722 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.9790284633636475, "learning_rate": 9.581250426388966e-06, "loss": 0.2884, "step": 19732, "teacher_loss": 0.21169723570346832 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.7181142568588257, "learning_rate": 9.579132618505291e-06, "loss": 0.3231, "step": 19733, "teacher_loss": 0.27922362089157104 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.3325485587120056, "learning_rate": 9.577014934905854e-06, "loss": 0.2193, "step": 19734, "teacher_loss": 0.20668581128120422 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.5428762435913086, "learning_rate": 9.574897375639202e-06, "loss": 0.2741, "step": 19735, "teacher_loss": 0.2442161738872528 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.31958791613578796, "learning_rate": 9.572779940753894e-06, "loss": 0.2593, "step": 19736, "teacher_loss": 0.2525697946548462 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.9320069551467896, "learning_rate": 9.57066263029847e-06, "loss": 0.2562, "step": 19737, "teacher_loss": 0.18114247918128967 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.33093491196632385, "learning_rate": 9.568545444321464e-06, "loss": 0.1872, "step": 19738, "teacher_loss": 0.17118746042251587 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.34154704213142395, "learning_rate": 9.566428382871439e-06, "loss": 0.1772, "step": 19739, "teacher_loss": 0.1589348316192627 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.57374107837677, "learning_rate": 9.564311445996914e-06, "loss": 0.207, "step": 19740, "teacher_loss": 0.16623975336551666 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.2143765538930893, "learning_rate": 9.562194633746433e-06, "loss": 0.2155, "step": 19741, "teacher_loss": 0.215635746717453 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.642687201499939, "learning_rate": 9.56007794616852e-06, "loss": 0.2673, "step": 19742, "teacher_loss": 0.2256081998348236 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.4992945194244385, "learning_rate": 9.557961383311715e-06, "loss": 0.2359, "step": 19743, "teacher_loss": 0.20667928457260132 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.3154093623161316, "learning_rate": 9.555844945224538e-06, "loss": 0.2369, "step": 19744, "teacher_loss": 0.22817052900791168 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.7092342376708984, "learning_rate": 9.553728631955511e-06, "loss": 0.3649, "step": 19745, "teacher_loss": 0.3266732096672058 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.22606298327445984, "learning_rate": 9.551612443553163e-06, "loss": 0.1477, "step": 19746, "teacher_loss": 0.13901641964912415 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.29060009121894836, "learning_rate": 9.549496380066009e-06, "loss": 0.2204, "step": 19747, "teacher_loss": 0.2125692069530487 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.470231294631958, "learning_rate": 9.54738044154255e-06, "loss": 0.2281, "step": 19748, "teacher_loss": 0.2012127786874771 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.7771170139312744, "learning_rate": 9.545264628031323e-06, "loss": 0.4027, "step": 19749, "teacher_loss": 0.36106836795806885 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.24146516621112823, "learning_rate": 9.543148939580817e-06, "loss": 0.1963, "step": 19750, "teacher_loss": 0.1912476122379303 }, { "epoch": 3.57, "eval_exact_match": 79.80132450331126, "eval_f1": 87.25628193867762, "step": 19750 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.22695963084697723, "learning_rate": 9.54103337623954e-06, "loss": 0.1522, "step": 19751, "teacher_loss": 0.1438606083393097 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.41737300157546997, "learning_rate": 9.538917938056012e-06, "loss": 0.2016, "step": 19752, "teacher_loss": 0.17765183746814728 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.3666348159313202, "learning_rate": 9.536802625078718e-06, "loss": 0.222, "step": 19753, "teacher_loss": 0.2059706300497055 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.41892117261886597, "learning_rate": 9.534687437356157e-06, "loss": 0.1816, "step": 19754, "teacher_loss": 0.15522420406341553 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.7090497016906738, "learning_rate": 9.53257237493683e-06, "loss": 0.3244, "step": 19755, "teacher_loss": 0.2816123068332672 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.7846797704696655, "learning_rate": 9.530457437869227e-06, "loss": 0.2401, "step": 19756, "teacher_loss": 0.17958106100559235 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.14173713326454163, "learning_rate": 9.528342626201834e-06, "loss": 0.1678, "step": 19757, "teacher_loss": 0.1707158386707306 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.270608127117157, "learning_rate": 9.526227939983142e-06, "loss": 0.1714, "step": 19758, "teacher_loss": 0.1603391468524933 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.36473992466926575, "learning_rate": 9.524113379261631e-06, "loss": 0.1827, "step": 19759, "teacher_loss": 0.16248352825641632 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.2568415403366089, "learning_rate": 9.521998944085788e-06, "loss": 0.2071, "step": 19760, "teacher_loss": 0.2015584409236908 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.245318204164505, "learning_rate": 9.519884634504074e-06, "loss": 0.1875, "step": 19761, "teacher_loss": 0.1810951828956604 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.4073965549468994, "learning_rate": 9.517770450564984e-06, "loss": 0.2526, "step": 19762, "teacher_loss": 0.2353518307209015 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.8636690378189087, "learning_rate": 9.515656392316981e-06, "loss": 0.3351, "step": 19763, "teacher_loss": 0.27636533975601196 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.583938717842102, "learning_rate": 9.513542459808528e-06, "loss": 0.2299, "step": 19764, "teacher_loss": 0.19061490893363953 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.6297131776809692, "learning_rate": 9.511428653088101e-06, "loss": 0.1918, "step": 19765, "teacher_loss": 0.14317730069160461 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.13741648197174072, "learning_rate": 9.50931497220416e-06, "loss": 0.1451, "step": 19766, "teacher_loss": 0.1459297090768814 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.3024732768535614, "learning_rate": 9.507201417205162e-06, "loss": 0.1995, "step": 19767, "teacher_loss": 0.18808643519878387 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.3508206605911255, "learning_rate": 9.50508798813957e-06, "loss": 0.221, "step": 19768, "teacher_loss": 0.20655420422554016 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.4434640407562256, "learning_rate": 9.502974685055835e-06, "loss": 0.2214, "step": 19769, "teacher_loss": 0.19676610827445984 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.21700438857078552, "learning_rate": 9.500861508002407e-06, "loss": 0.2402, "step": 19770, "teacher_loss": 0.2427355945110321 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.28291788697242737, "learning_rate": 9.49874845702774e-06, "loss": 0.2072, "step": 19771, "teacher_loss": 0.19881662726402283 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.34595683217048645, "learning_rate": 9.496635532180281e-06, "loss": 0.1931, "step": 19772, "teacher_loss": 0.1761069893836975 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.45131373405456543, "learning_rate": 9.494522733508459e-06, "loss": 0.2085, "step": 19773, "teacher_loss": 0.1815440058708191 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.4023161828517914, "learning_rate": 9.492410061060731e-06, "loss": 0.2319, "step": 19774, "teacher_loss": 0.21292363107204437 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.2056112289428711, "learning_rate": 9.490297514885533e-06, "loss": 0.1888, "step": 19775, "teacher_loss": 0.1868990957736969 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.5489335656166077, "learning_rate": 9.488185095031283e-06, "loss": 0.2291, "step": 19776, "teacher_loss": 0.19359859824180603 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.432267427444458, "learning_rate": 9.486072801546433e-06, "loss": 0.353, "step": 19777, "teacher_loss": 0.3442351818084717 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.6630464196205139, "learning_rate": 9.483960634479399e-06, "loss": 0.2625, "step": 19778, "teacher_loss": 0.2180473506450653 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.36091816425323486, "learning_rate": 9.481848593878605e-06, "loss": 0.2313, "step": 19779, "teacher_loss": 0.21692229807376862 }, { "compression_loss": 0.0, "epoch": 3.57, "label_loss": 0.16834931075572968, "learning_rate": 9.479736679792484e-06, "loss": 0.1688, "step": 19780, "teacher_loss": 0.1688673347234726 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.32525840401649475, "learning_rate": 9.47762489226945e-06, "loss": 0.3969, "step": 19781, "teacher_loss": 0.40490737557411194 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.35260459780693054, "learning_rate": 9.475513231357917e-06, "loss": 0.236, "step": 19782, "teacher_loss": 0.22307217121124268 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.4594229459762573, "learning_rate": 9.4734016971063e-06, "loss": 0.2273, "step": 19783, "teacher_loss": 0.20146408677101135 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.2580691874027252, "learning_rate": 9.471290289563019e-06, "loss": 0.2617, "step": 19784, "teacher_loss": 0.26212555170059204 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.14252987504005432, "learning_rate": 9.469179008776478e-06, "loss": 0.1776, "step": 19785, "teacher_loss": 0.18147867918014526 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.13024646043777466, "learning_rate": 9.46706785479507e-06, "loss": 0.1852, "step": 19786, "teacher_loss": 0.1913457214832306 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.5175319314002991, "learning_rate": 9.464956827667216e-06, "loss": 0.2593, "step": 19787, "teacher_loss": 0.23060579597949982 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.4064214825630188, "learning_rate": 9.462845927441304e-06, "loss": 0.1978, "step": 19788, "teacher_loss": 0.17465084791183472 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.38115522265434265, "learning_rate": 9.46073515416573e-06, "loss": 0.2661, "step": 19789, "teacher_loss": 0.2533676326274872 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.3713221848011017, "learning_rate": 9.458624507888897e-06, "loss": 0.2685, "step": 19790, "teacher_loss": 0.2570367455482483 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.4722519814968109, "learning_rate": 9.45651398865919e-06, "loss": 0.2489, "step": 19791, "teacher_loss": 0.2241356074810028 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.7023818492889404, "learning_rate": 9.454403596524991e-06, "loss": 0.4036, "step": 19792, "teacher_loss": 0.3704553246498108 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.7064534425735474, "learning_rate": 9.452293331534696e-06, "loss": 0.2968, "step": 19793, "teacher_loss": 0.2512907385826111 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.5741615891456604, "learning_rate": 9.450183193736682e-06, "loss": 0.2205, "step": 19794, "teacher_loss": 0.18122228980064392 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.6621631383895874, "learning_rate": 9.448073183179326e-06, "loss": 0.213, "step": 19795, "teacher_loss": 0.1630450189113617 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.2787085771560669, "learning_rate": 9.445963299911007e-06, "loss": 0.1785, "step": 19796, "teacher_loss": 0.16735535860061646 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.4371373951435089, "learning_rate": 9.443853543980101e-06, "loss": 0.2717, "step": 19797, "teacher_loss": 0.2533169984817505 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.1518123596906662, "learning_rate": 9.441743915434967e-06, "loss": 0.1826, "step": 19798, "teacher_loss": 0.1860014796257019 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.7233725786209106, "learning_rate": 9.439634414323987e-06, "loss": 0.2851, "step": 19799, "teacher_loss": 0.23637276887893677 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.4535370469093323, "learning_rate": 9.43752504069552e-06, "loss": 0.2725, "step": 19800, "teacher_loss": 0.25242918729782104 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.24376991391181946, "learning_rate": 9.435415794597919e-06, "loss": 0.2056, "step": 19801, "teacher_loss": 0.20139597356319427 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.8627008199691772, "learning_rate": 9.433306676079562e-06, "loss": 0.372, "step": 19802, "teacher_loss": 0.31751590967178345 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.3033873736858368, "learning_rate": 9.431197685188785e-06, "loss": 0.2309, "step": 19803, "teacher_loss": 0.22280624508857727 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.36360687017440796, "learning_rate": 9.429088821973953e-06, "loss": 0.2498, "step": 19804, "teacher_loss": 0.2371712028980255 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.5851638317108154, "learning_rate": 9.426980086483407e-06, "loss": 0.2976, "step": 19805, "teacher_loss": 0.2656567096710205 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.25133275985717773, "learning_rate": 9.424871478765503e-06, "loss": 0.1911, "step": 19806, "teacher_loss": 0.18446116149425507 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.2726135849952698, "learning_rate": 9.422762998868579e-06, "loss": 0.2081, "step": 19807, "teacher_loss": 0.2009601891040802 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.5450992584228516, "learning_rate": 9.420654646840974e-06, "loss": 0.3117, "step": 19808, "teacher_loss": 0.2857212424278259 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.42858901619911194, "learning_rate": 9.418546422731037e-06, "loss": 0.2175, "step": 19809, "teacher_loss": 0.19407512247562408 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.542395830154419, "learning_rate": 9.4164383265871e-06, "loss": 0.2459, "step": 19810, "teacher_loss": 0.2129519134759903 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.487173855304718, "learning_rate": 9.41433035845748e-06, "loss": 0.2589, "step": 19811, "teacher_loss": 0.23353135585784912 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.27700042724609375, "learning_rate": 9.412222518390526e-06, "loss": 0.2371, "step": 19812, "teacher_loss": 0.23265469074249268 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.4004698097705841, "learning_rate": 9.410114806434555e-06, "loss": 0.2487, "step": 19813, "teacher_loss": 0.23188698291778564 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.6729552149772644, "learning_rate": 9.408007222637892e-06, "loss": 0.2735, "step": 19814, "teacher_loss": 0.22916674613952637 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.3781355619430542, "learning_rate": 9.405899767048856e-06, "loss": 0.2254, "step": 19815, "teacher_loss": 0.20846641063690186 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.17719748616218567, "learning_rate": 9.403792439715768e-06, "loss": 0.1645, "step": 19816, "teacher_loss": 0.16308313608169556 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.517128050327301, "learning_rate": 9.40168524068694e-06, "loss": 0.2487, "step": 19817, "teacher_loss": 0.21883654594421387 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.4026499390602112, "learning_rate": 9.399578170010685e-06, "loss": 0.2319, "step": 19818, "teacher_loss": 0.21290743350982666 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.5714460015296936, "learning_rate": 9.397471227735313e-06, "loss": 0.2312, "step": 19819, "teacher_loss": 0.1934167593717575 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.8639388084411621, "learning_rate": 9.395364413909126e-06, "loss": 0.2548, "step": 19820, "teacher_loss": 0.1871698796749115 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.16402670741081238, "learning_rate": 9.393257728580432e-06, "loss": 0.2029, "step": 19821, "teacher_loss": 0.20721864700317383 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.4520057141780853, "learning_rate": 9.391151171797531e-06, "loss": 0.232, "step": 19822, "teacher_loss": 0.2075609564781189 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.3670817017555237, "learning_rate": 9.389044743608716e-06, "loss": 0.1519, "step": 19823, "teacher_loss": 0.12797099351882935 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.950454831123352, "learning_rate": 9.386938444062275e-06, "loss": 0.3227, "step": 19824, "teacher_loss": 0.25294923782348633 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.2584499418735504, "learning_rate": 9.384832273206514e-06, "loss": 0.2518, "step": 19825, "teacher_loss": 0.2510247230529785 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.5860365629196167, "learning_rate": 9.382726231089714e-06, "loss": 0.4688, "step": 19826, "teacher_loss": 0.45581158995628357 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.5522368550300598, "learning_rate": 9.380620317760156e-06, "loss": 0.2177, "step": 19827, "teacher_loss": 0.1804906129837036 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.5885094404220581, "learning_rate": 9.378514533266131e-06, "loss": 0.2393, "step": 19828, "teacher_loss": 0.20053012669086456 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.43129289150238037, "learning_rate": 9.37640887765591e-06, "loss": 0.2453, "step": 19829, "teacher_loss": 0.22465723752975464 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.09199099242687225, "learning_rate": 9.374303350977772e-06, "loss": 0.1513, "step": 19830, "teacher_loss": 0.1579378843307495 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.2707913815975189, "learning_rate": 9.372197953279995e-06, "loss": 0.1628, "step": 19831, "teacher_loss": 0.15074864029884338 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.2685883343219757, "learning_rate": 9.370092684610844e-06, "loss": 0.2421, "step": 19832, "teacher_loss": 0.23920132219791412 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 1.0313339233398438, "learning_rate": 9.367987545018588e-06, "loss": 0.2631, "step": 19833, "teacher_loss": 0.17778009176254272 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.4770818054676056, "learning_rate": 9.365882534551494e-06, "loss": 0.245, "step": 19834, "teacher_loss": 0.21923308074474335 }, { "compression_loss": 0.0, "epoch": 3.58, "label_loss": 0.26384660601615906, "learning_rate": 9.363777653257823e-06, "loss": 0.1777, "step": 19835, "teacher_loss": 0.16809137165546417 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.2815355956554413, "learning_rate": 9.361672901185824e-06, "loss": 0.2293, "step": 19836, "teacher_loss": 0.22352814674377441 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.7137065529823303, "learning_rate": 9.35956827838377e-06, "loss": 0.3584, "step": 19837, "teacher_loss": 0.31895363330841064 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.26560086011886597, "learning_rate": 9.357463784899901e-06, "loss": 0.228, "step": 19838, "teacher_loss": 0.22382019460201263 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.5089600086212158, "learning_rate": 9.35535942078247e-06, "loss": 0.2875, "step": 19839, "teacher_loss": 0.26288866996765137 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.49309974908828735, "learning_rate": 9.353255186079722e-06, "loss": 0.2206, "step": 19840, "teacher_loss": 0.1902964562177658 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.51983243227005, "learning_rate": 9.351151080839908e-06, "loss": 0.3306, "step": 19841, "teacher_loss": 0.3095444440841675 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.49582162499427795, "learning_rate": 9.349047105111258e-06, "loss": 0.2716, "step": 19842, "teacher_loss": 0.24669964611530304 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.1670864373445511, "learning_rate": 9.346943258942019e-06, "loss": 0.1483, "step": 19843, "teacher_loss": 0.14618347585201263 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.21581795811653137, "learning_rate": 9.34483954238042e-06, "loss": 0.155, "step": 19844, "teacher_loss": 0.14826829731464386 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.2758815884590149, "learning_rate": 9.3427359554747e-06, "loss": 0.2416, "step": 19845, "teacher_loss": 0.2378234565258026 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.1983284056186676, "learning_rate": 9.340632498273073e-06, "loss": 0.1586, "step": 19846, "teacher_loss": 0.154202401638031 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.31562134623527527, "learning_rate": 9.338529170823787e-06, "loss": 0.3529, "step": 19847, "teacher_loss": 0.35700684785842896 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.4613686203956604, "learning_rate": 9.336425973175048e-06, "loss": 0.3214, "step": 19848, "teacher_loss": 0.3058563768863678 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.22698768973350525, "learning_rate": 9.334322905375077e-06, "loss": 0.1709, "step": 19849, "teacher_loss": 0.16466376185417175 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.16260185837745667, "learning_rate": 9.332219967472102e-06, "loss": 0.1651, "step": 19850, "teacher_loss": 0.16540908813476562 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.10824403166770935, "learning_rate": 9.33011715951433e-06, "loss": 0.1614, "step": 19851, "teacher_loss": 0.16731780767440796 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.64410799741745, "learning_rate": 9.328014481549965e-06, "loss": 0.2314, "step": 19852, "teacher_loss": 0.18554972112178802 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.3166159689426422, "learning_rate": 9.325911933627228e-06, "loss": 0.184, "step": 19853, "teacher_loss": 0.16922476887702942 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.3437650203704834, "learning_rate": 9.32380951579432e-06, "loss": 0.3179, "step": 19854, "teacher_loss": 0.3149966299533844 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.6198545694351196, "learning_rate": 9.321707228099437e-06, "loss": 0.6113, "step": 19855, "teacher_loss": 0.61036217212677 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.2969365417957306, "learning_rate": 9.319605070590786e-06, "loss": 0.2365, "step": 19856, "teacher_loss": 0.2297685742378235 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.4402504861354828, "learning_rate": 9.317503043316559e-06, "loss": 0.4914, "step": 19857, "teacher_loss": 0.4970824718475342 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.2952881455421448, "learning_rate": 9.315401146324948e-06, "loss": 0.2649, "step": 19858, "teacher_loss": 0.2615756690502167 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.2437678575515747, "learning_rate": 9.31329937966415e-06, "loss": 0.1743, "step": 19859, "teacher_loss": 0.16660669445991516 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.3402618169784546, "learning_rate": 9.311197743382349e-06, "loss": 0.1929, "step": 19860, "teacher_loss": 0.1765081286430359 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.4157627522945404, "learning_rate": 9.30909623752772e-06, "loss": 0.2332, "step": 19861, "teacher_loss": 0.21289071440696716 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 1.119473934173584, "learning_rate": 9.30699486214846e-06, "loss": 0.3305, "step": 19862, "teacher_loss": 0.2427879273891449 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.19123497605323792, "learning_rate": 9.304893617292737e-06, "loss": 0.1757, "step": 19863, "teacher_loss": 0.17402340471744537 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.5166798830032349, "learning_rate": 9.302792503008725e-06, "loss": 0.3608, "step": 19864, "teacher_loss": 0.3434467315673828 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.5520679950714111, "learning_rate": 9.300691519344602e-06, "loss": 0.23, "step": 19865, "teacher_loss": 0.1942073106765747 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.2275848388671875, "learning_rate": 9.298590666348536e-06, "loss": 0.2306, "step": 19866, "teacher_loss": 0.23096546530723572 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.3816664218902588, "learning_rate": 9.29648994406869e-06, "loss": 0.2496, "step": 19867, "teacher_loss": 0.2349148392677307 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.5917420387268066, "learning_rate": 9.294389352553231e-06, "loss": 0.2197, "step": 19868, "teacher_loss": 0.178411602973938 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.3821257948875427, "learning_rate": 9.292288891850317e-06, "loss": 0.209, "step": 19869, "teacher_loss": 0.1897328794002533 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.54078608751297, "learning_rate": 9.290188562008112e-06, "loss": 0.2701, "step": 19870, "teacher_loss": 0.2400021255016327 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.40267056226730347, "learning_rate": 9.288088363074754e-06, "loss": 0.2357, "step": 19871, "teacher_loss": 0.21710573136806488 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.8273252248764038, "learning_rate": 9.285988295098414e-06, "loss": 0.2599, "step": 19872, "teacher_loss": 0.1968739777803421 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.4414958655834198, "learning_rate": 9.283888358127227e-06, "loss": 0.2137, "step": 19873, "teacher_loss": 0.1884109079837799 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.3258158564567566, "learning_rate": 9.281788552209337e-06, "loss": 0.2718, "step": 19874, "teacher_loss": 0.2658481299877167 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.46209263801574707, "learning_rate": 9.2796888773929e-06, "loss": 0.294, "step": 19875, "teacher_loss": 0.27526938915252686 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.8330816030502319, "learning_rate": 9.277589333726044e-06, "loss": 0.2552, "step": 19876, "teacher_loss": 0.19099751114845276 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.9506896734237671, "learning_rate": 9.275489921256904e-06, "loss": 0.2608, "step": 19877, "teacher_loss": 0.18415045738220215 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.5601963996887207, "learning_rate": 9.273390640033622e-06, "loss": 0.2398, "step": 19878, "teacher_loss": 0.20425289869308472 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.5329597592353821, "learning_rate": 9.271291490104323e-06, "loss": 0.2155, "step": 19879, "teacher_loss": 0.1802772879600525 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.6433287858963013, "learning_rate": 9.26919247151713e-06, "loss": 0.438, "step": 19880, "teacher_loss": 0.41513144969940186 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.545869767665863, "learning_rate": 9.267093584320175e-06, "loss": 0.2319, "step": 19881, "teacher_loss": 0.1970662623643875 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.42615216970443726, "learning_rate": 9.264994828561577e-06, "loss": 0.2315, "step": 19882, "teacher_loss": 0.20986221730709076 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.36910292506217957, "learning_rate": 9.262896204289449e-06, "loss": 0.2232, "step": 19883, "teacher_loss": 0.2069844901561737 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.9538236856460571, "learning_rate": 9.260797711551914e-06, "loss": 0.3383, "step": 19884, "teacher_loss": 0.26992595195770264 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.331133097410202, "learning_rate": 9.258699350397083e-06, "loss": 0.1709, "step": 19885, "teacher_loss": 0.15311212837696075 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.36773788928985596, "learning_rate": 9.256601120873062e-06, "loss": 0.1993, "step": 19886, "teacher_loss": 0.18053176999092102 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.43046027421951294, "learning_rate": 9.254503023027952e-06, "loss": 0.2001, "step": 19887, "teacher_loss": 0.17452047765254974 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.7361464500427246, "learning_rate": 9.252405056909865e-06, "loss": 0.4444, "step": 19888, "teacher_loss": 0.4119475781917572 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.3213765621185303, "learning_rate": 9.250307222566898e-06, "loss": 0.2163, "step": 19889, "teacher_loss": 0.20458675920963287 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.51975017786026, "learning_rate": 9.248209520047145e-06, "loss": 0.1932, "step": 19890, "teacher_loss": 0.15693068504333496 }, { "compression_loss": 0.0, "epoch": 3.59, "label_loss": 0.44473791122436523, "learning_rate": 9.246111949398708e-06, "loss": 0.2854, "step": 19891, "teacher_loss": 0.26768457889556885 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.3298566937446594, "learning_rate": 9.24401451066967e-06, "loss": 0.2489, "step": 19892, "teacher_loss": 0.2398521602153778 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.3874850273132324, "learning_rate": 9.24191720390812e-06, "loss": 0.1845, "step": 19893, "teacher_loss": 0.16198942065238953 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 1.0310026407241821, "learning_rate": 9.239820029162149e-06, "loss": 0.3011, "step": 19894, "teacher_loss": 0.22003161907196045 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.47316843271255493, "learning_rate": 9.237722986479836e-06, "loss": 0.1993, "step": 19895, "teacher_loss": 0.168876051902771 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.5754532217979431, "learning_rate": 9.235626075909251e-06, "loss": 0.2359, "step": 19896, "teacher_loss": 0.19813281297683716 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.5692824721336365, "learning_rate": 9.233529297498483e-06, "loss": 0.2061, "step": 19897, "teacher_loss": 0.16571253538131714 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.6581144332885742, "learning_rate": 9.231432651295602e-06, "loss": 0.4084, "step": 19898, "teacher_loss": 0.3806276321411133 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.4792437255382538, "learning_rate": 9.22933613734867e-06, "loss": 0.2515, "step": 19899, "teacher_loss": 0.22615377604961395 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.3260572850704193, "learning_rate": 9.227239755705762e-06, "loss": 0.2347, "step": 19900, "teacher_loss": 0.2245473563671112 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.1858081817626953, "learning_rate": 9.225143506414938e-06, "loss": 0.1903, "step": 19901, "teacher_loss": 0.19077607989311218 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.31750157475471497, "learning_rate": 9.223047389524259e-06, "loss": 0.4107, "step": 19902, "teacher_loss": 0.42110931873321533 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.6247704029083252, "learning_rate": 9.220951405081782e-06, "loss": 0.228, "step": 19903, "teacher_loss": 0.18392664194107056 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.5406287908554077, "learning_rate": 9.218855553135567e-06, "loss": 0.3534, "step": 19904, "teacher_loss": 0.3326292335987091 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.519968569278717, "learning_rate": 9.216759833733657e-06, "loss": 0.2213, "step": 19905, "teacher_loss": 0.18807634711265564 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.23802652955055237, "learning_rate": 9.214664246924105e-06, "loss": 0.1857, "step": 19906, "teacher_loss": 0.17983950674533844 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.4944468140602112, "learning_rate": 9.21256879275496e-06, "loss": 0.2778, "step": 19907, "teacher_loss": 0.2537689208984375 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.46359485387802124, "learning_rate": 9.210473471274262e-06, "loss": 0.2492, "step": 19908, "teacher_loss": 0.22541838884353638 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.3357314467430115, "learning_rate": 9.208378282530041e-06, "loss": 0.218, "step": 19909, "teacher_loss": 0.20494595170021057 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.7142949104309082, "learning_rate": 9.206283226570352e-06, "loss": 0.272, "step": 19910, "teacher_loss": 0.2228180319070816 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.43322238326072693, "learning_rate": 9.204188303443214e-06, "loss": 0.2157, "step": 19911, "teacher_loss": 0.19153130054473877 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.7269574403762817, "learning_rate": 9.202093513196658e-06, "loss": 0.2401, "step": 19912, "teacher_loss": 0.18598949909210205 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.16784697771072388, "learning_rate": 9.199998855878719e-06, "loss": 0.1972, "step": 19913, "teacher_loss": 0.2004975974559784 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.4637722969055176, "learning_rate": 9.197904331537416e-06, "loss": 0.202, "step": 19914, "teacher_loss": 0.17293208837509155 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.21799816191196442, "learning_rate": 9.195809940220768e-06, "loss": 0.1607, "step": 19915, "teacher_loss": 0.15434104204177856 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.24566230177879333, "learning_rate": 9.193715681976801e-06, "loss": 0.2661, "step": 19916, "teacher_loss": 0.2684234082698822 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.5468559265136719, "learning_rate": 9.191621556853523e-06, "loss": 0.2143, "step": 19917, "teacher_loss": 0.17740073800086975 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.12039814889431, "learning_rate": 9.189527564898947e-06, "loss": 0.1622, "step": 19918, "teacher_loss": 0.1668863594532013 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.36444589495658875, "learning_rate": 9.187433706161087e-06, "loss": 0.1667, "step": 19919, "teacher_loss": 0.14473843574523926 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.20830503106117249, "learning_rate": 9.185339980687943e-06, "loss": 0.1788, "step": 19920, "teacher_loss": 0.17554441094398499 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.6041305065155029, "learning_rate": 9.183246388527516e-06, "loss": 0.2897, "step": 19921, "teacher_loss": 0.25479745864868164 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.6556446552276611, "learning_rate": 9.181152929727813e-06, "loss": 0.3241, "step": 19922, "teacher_loss": 0.28728777170181274 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.2636571526527405, "learning_rate": 9.179059604336833e-06, "loss": 0.1364, "step": 19923, "teacher_loss": 0.12225516140460968 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.2800932228565216, "learning_rate": 9.176966412402553e-06, "loss": 0.2066, "step": 19924, "teacher_loss": 0.1984708607196808 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.28558629751205444, "learning_rate": 9.174873353972987e-06, "loss": 0.2239, "step": 19925, "teacher_loss": 0.21703684329986572 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.35899457335472107, "learning_rate": 9.172780429096103e-06, "loss": 0.1899, "step": 19926, "teacher_loss": 0.17116200923919678 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.2219620943069458, "learning_rate": 9.170687637819897e-06, "loss": 0.2229, "step": 19927, "teacher_loss": 0.22297261655330658 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.3342660963535309, "learning_rate": 9.168594980192341e-06, "loss": 0.2703, "step": 19928, "teacher_loss": 0.263232946395874 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.4044509828090668, "learning_rate": 9.16650245626142e-06, "loss": 0.1765, "step": 19929, "teacher_loss": 0.15112614631652832 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.3868674635887146, "learning_rate": 9.164410066075111e-06, "loss": 0.1629, "step": 19930, "teacher_loss": 0.13806472718715668 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.3807605803012848, "learning_rate": 9.162317809681378e-06, "loss": 0.2189, "step": 19931, "teacher_loss": 0.20092439651489258 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.5362415909767151, "learning_rate": 9.1602256871282e-06, "loss": 0.1804, "step": 19932, "teacher_loss": 0.14086347818374634 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.27870625257492065, "learning_rate": 9.158133698463541e-06, "loss": 0.2415, "step": 19933, "teacher_loss": 0.23740097880363464 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.34084075689315796, "learning_rate": 9.156041843735352e-06, "loss": 0.1836, "step": 19934, "teacher_loss": 0.16613787412643433 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.29510897397994995, "learning_rate": 9.15395012299161e-06, "loss": 0.2091, "step": 19935, "teacher_loss": 0.19958528876304626 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.4561639428138733, "learning_rate": 9.151858536280265e-06, "loss": 0.2342, "step": 19936, "teacher_loss": 0.20948924124240875 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.9261264204978943, "learning_rate": 9.149767083649265e-06, "loss": 0.4245, "step": 19937, "teacher_loss": 0.36876028776168823 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.6204567551612854, "learning_rate": 9.14767576514657e-06, "loss": 0.2135, "step": 19938, "teacher_loss": 0.16823957860469818 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.31686142086982727, "learning_rate": 9.145584580820122e-06, "loss": 0.1877, "step": 19939, "teacher_loss": 0.1733289659023285 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.5108184218406677, "learning_rate": 9.143493530717865e-06, "loss": 0.2267, "step": 19940, "teacher_loss": 0.19515544176101685 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.27669328451156616, "learning_rate": 9.141402614887745e-06, "loss": 0.1674, "step": 19941, "teacher_loss": 0.15526896715164185 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.465438574552536, "learning_rate": 9.1393118333777e-06, "loss": 0.202, "step": 19942, "teacher_loss": 0.17277663946151733 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.5035274028778076, "learning_rate": 9.137221186235657e-06, "loss": 0.2347, "step": 19943, "teacher_loss": 0.20480364561080933 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.6934943199157715, "learning_rate": 9.13513067350956e-06, "loss": 0.3728, "step": 19944, "teacher_loss": 0.33720535039901733 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.6449599862098694, "learning_rate": 9.133040295247335e-06, "loss": 0.2066, "step": 19945, "teacher_loss": 0.15785589814186096 }, { "compression_loss": 0.0, "epoch": 3.6, "label_loss": 0.4059029221534729, "learning_rate": 9.130950051496897e-06, "loss": 0.2128, "step": 19946, "teacher_loss": 0.19134293496608734 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.547698974609375, "learning_rate": 9.128859942306184e-06, "loss": 0.2111, "step": 19947, "teacher_loss": 0.17367462813854218 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.904839038848877, "learning_rate": 9.126769967723115e-06, "loss": 0.2407, "step": 19948, "teacher_loss": 0.16695371270179749 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.4810352921485901, "learning_rate": 9.124680127795596e-06, "loss": 0.2789, "step": 19949, "teacher_loss": 0.2564719319343567 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.18367794156074524, "learning_rate": 9.122590422571545e-06, "loss": 0.2208, "step": 19950, "teacher_loss": 0.22494328022003174 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.8472585082054138, "learning_rate": 9.120500852098877e-06, "loss": 0.3032, "step": 19951, "teacher_loss": 0.24278613924980164 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.5415215492248535, "learning_rate": 9.118411416425496e-06, "loss": 0.2214, "step": 19952, "teacher_loss": 0.18588018417358398 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.3888516426086426, "learning_rate": 9.116322115599304e-06, "loss": 0.1896, "step": 19953, "teacher_loss": 0.16741704940795898 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.22994567453861237, "learning_rate": 9.114232949668211e-06, "loss": 0.1882, "step": 19954, "teacher_loss": 0.1835949867963791 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.39963263273239136, "learning_rate": 9.112143918680108e-06, "loss": 0.2333, "step": 19955, "teacher_loss": 0.21482780575752258 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.3057858347892761, "learning_rate": 9.11005502268289e-06, "loss": 0.1839, "step": 19956, "teacher_loss": 0.17035634815692902 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.7031893134117126, "learning_rate": 9.107966261724453e-06, "loss": 0.2282, "step": 19957, "teacher_loss": 0.17537254095077515 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.6388310194015503, "learning_rate": 9.10587763585269e-06, "loss": 0.3069, "step": 19958, "teacher_loss": 0.27001726627349854 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.5127741098403931, "learning_rate": 9.10378914511547e-06, "loss": 0.2255, "step": 19959, "teacher_loss": 0.19358283281326294 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.5264831781387329, "learning_rate": 9.101700789560698e-06, "loss": 0.1808, "step": 19960, "teacher_loss": 0.14238545298576355 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 1.0926851034164429, "learning_rate": 9.099612569236239e-06, "loss": 0.3462, "step": 19961, "teacher_loss": 0.2632180154323578 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.5803426504135132, "learning_rate": 9.09752448418997e-06, "loss": 0.2774, "step": 19962, "teacher_loss": 0.2437680959701538 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.44937098026275635, "learning_rate": 9.09543653446977e-06, "loss": 0.234, "step": 19963, "teacher_loss": 0.2100919485092163 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.34498661756515503, "learning_rate": 9.09334872012351e-06, "loss": 0.383, "step": 19964, "teacher_loss": 0.3872228264808655 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.30845555663108826, "learning_rate": 9.091261041199051e-06, "loss": 0.1643, "step": 19965, "teacher_loss": 0.14832833409309387 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.8969360589981079, "learning_rate": 9.089173497744263e-06, "loss": 0.284, "step": 19966, "teacher_loss": 0.2159484326839447 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.6321310997009277, "learning_rate": 9.087086089807005e-06, "loss": 0.218, "step": 19967, "teacher_loss": 0.171972393989563 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.6322377324104309, "learning_rate": 9.084998817435139e-06, "loss": 0.239, "step": 19968, "teacher_loss": 0.19525766372680664 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.47490963339805603, "learning_rate": 9.082911680676507e-06, "loss": 0.2929, "step": 19969, "teacher_loss": 0.2727214992046356 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.692403256893158, "learning_rate": 9.08082467957898e-06, "loss": 0.325, "step": 19970, "teacher_loss": 0.2841499149799347 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.6878918409347534, "learning_rate": 9.078737814190393e-06, "loss": 0.4139, "step": 19971, "teacher_loss": 0.38343846797943115 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.8984094262123108, "learning_rate": 9.076651084558588e-06, "loss": 0.2838, "step": 19972, "teacher_loss": 0.215561181306839 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.16383318603038788, "learning_rate": 9.074564490731424e-06, "loss": 0.2152, "step": 19973, "teacher_loss": 0.22089210152626038 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.3293156623840332, "learning_rate": 9.072478032756726e-06, "loss": 0.205, "step": 19974, "teacher_loss": 0.19120121002197266 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.2958255410194397, "learning_rate": 9.070391710682334e-06, "loss": 0.1854, "step": 19975, "teacher_loss": 0.17310123145580292 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.3125835657119751, "learning_rate": 9.068305524556086e-06, "loss": 0.288, "step": 19976, "teacher_loss": 0.2853020131587982 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.7090179920196533, "learning_rate": 9.066219474425808e-06, "loss": 0.395, "step": 19977, "teacher_loss": 0.36016225814819336 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.8693034648895264, "learning_rate": 9.064133560339322e-06, "loss": 0.3069, "step": 19978, "teacher_loss": 0.2444513440132141 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.6638647317886353, "learning_rate": 9.062047782344461e-06, "loss": 0.3217, "step": 19979, "teacher_loss": 0.28368502855300903 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.6860822439193726, "learning_rate": 9.059962140489041e-06, "loss": 0.3384, "step": 19980, "teacher_loss": 0.2998095154762268 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.3258597254753113, "learning_rate": 9.057876634820878e-06, "loss": 0.2668, "step": 19981, "teacher_loss": 0.2601909339427948 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.37443482875823975, "learning_rate": 9.05579126538779e-06, "loss": 0.2316, "step": 19982, "teacher_loss": 0.21574074029922485 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.34044164419174194, "learning_rate": 9.053706032237592e-06, "loss": 0.3333, "step": 19983, "teacher_loss": 0.3325221836566925 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.26624196767807007, "learning_rate": 9.051620935418075e-06, "loss": 0.1917, "step": 19984, "teacher_loss": 0.18342649936676025 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.2921028137207031, "learning_rate": 9.049535974977067e-06, "loss": 0.1777, "step": 19985, "teacher_loss": 0.1649664044380188 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.2905924320220947, "learning_rate": 9.047451150962354e-06, "loss": 0.2437, "step": 19986, "teacher_loss": 0.2385289967060089 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.18643257021903992, "learning_rate": 9.045366463421736e-06, "loss": 0.1808, "step": 19987, "teacher_loss": 0.1801559329032898 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.9008071422576904, "learning_rate": 9.043281912403016e-06, "loss": 0.3188, "step": 19988, "teacher_loss": 0.2541596293449402 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.5576692223548889, "learning_rate": 9.041197497953984e-06, "loss": 0.2389, "step": 19989, "teacher_loss": 0.20346516370773315 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.3989705443382263, "learning_rate": 9.039113220122426e-06, "loss": 0.2483, "step": 19990, "teacher_loss": 0.23151624202728271 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.42784440517425537, "learning_rate": 9.037029078956126e-06, "loss": 0.2173, "step": 19991, "teacher_loss": 0.19386866688728333 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.4746456742286682, "learning_rate": 9.034945074502879e-06, "loss": 0.4454, "step": 19992, "teacher_loss": 0.44210803508758545 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.31803518533706665, "learning_rate": 9.032861206810456e-06, "loss": 0.2132, "step": 19993, "teacher_loss": 0.20155873894691467 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.1412222981452942, "learning_rate": 9.03077747592663e-06, "loss": 0.1887, "step": 19994, "teacher_loss": 0.19392287731170654 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.26981955766677856, "learning_rate": 9.028693881899185e-06, "loss": 0.1784, "step": 19995, "teacher_loss": 0.16822095215320587 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.43685564398765564, "learning_rate": 9.026610424775885e-06, "loss": 0.2381, "step": 19996, "teacher_loss": 0.21597439050674438 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.38053351640701294, "learning_rate": 9.024527104604493e-06, "loss": 0.3429, "step": 19997, "teacher_loss": 0.338720440864563 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.35880979895591736, "learning_rate": 9.022443921432785e-06, "loss": 0.1782, "step": 19998, "teacher_loss": 0.15809421241283417 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.4427030086517334, "learning_rate": 9.020360875308518e-06, "loss": 0.2476, "step": 19999, "teacher_loss": 0.2259388417005539 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.22023199498653412, "learning_rate": 9.018277966279443e-06, "loss": 0.2655, "step": 20000, "teacher_loss": 0.27047526836395264 }, { "epoch": 3.61, "eval_exact_match": 80.43519394512772, "eval_f1": 87.77667688597502, "step": 20000 }, { "compression_loss": 0.0, "epoch": 3.61, "label_loss": 0.4198623299598694, "learning_rate": 9.016195194393324e-06, "loss": 0.2399, "step": 20001, "teacher_loss": 0.21993079781532288 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.5562000274658203, "learning_rate": 9.014112559697908e-06, "loss": 0.2588, "step": 20002, "teacher_loss": 0.22572064399719238 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.32476580142974854, "learning_rate": 9.01203006224094e-06, "loss": 0.2309, "step": 20003, "teacher_loss": 0.22042503952980042 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.34965893626213074, "learning_rate": 9.009947702070175e-06, "loss": 0.215, "step": 20004, "teacher_loss": 0.2000509351491928 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.1953909695148468, "learning_rate": 9.00786547923335e-06, "loss": 0.2654, "step": 20005, "teacher_loss": 0.2731941044330597 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.2841143310070038, "learning_rate": 9.0057833937782e-06, "loss": 0.2276, "step": 20006, "teacher_loss": 0.22130073606967926 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.2699645757675171, "learning_rate": 9.003701445752468e-06, "loss": 0.1719, "step": 20007, "teacher_loss": 0.1610306203365326 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.52392578125, "learning_rate": 9.001619635203889e-06, "loss": 0.3048, "step": 20008, "teacher_loss": 0.2804965674877167 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.6863110065460205, "learning_rate": 8.999537962180177e-06, "loss": 0.4419, "step": 20009, "teacher_loss": 0.4147794842720032 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.6442421674728394, "learning_rate": 8.99745642672908e-06, "loss": 0.2044, "step": 20010, "teacher_loss": 0.15548613667488098 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.45127272605895996, "learning_rate": 8.995375028898305e-06, "loss": 0.2959, "step": 20011, "teacher_loss": 0.278641939163208 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.41365790367126465, "learning_rate": 8.99329376873558e-06, "loss": 0.1769, "step": 20012, "teacher_loss": 0.15058261156082153 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.3105650246143341, "learning_rate": 8.991212646288615e-06, "loss": 0.1974, "step": 20013, "teacher_loss": 0.18481430411338806 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.42537838220596313, "learning_rate": 8.989131661605133e-06, "loss": 0.2173, "step": 20014, "teacher_loss": 0.1942179799079895 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.7166765332221985, "learning_rate": 8.987050814732839e-06, "loss": 0.2258, "step": 20015, "teacher_loss": 0.17123952507972717 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.6444005966186523, "learning_rate": 8.98497010571944e-06, "loss": 0.2643, "step": 20016, "teacher_loss": 0.22210508584976196 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.567467451095581, "learning_rate": 8.982889534612646e-06, "loss": 0.1974, "step": 20017, "teacher_loss": 0.15623821318149567 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.28660139441490173, "learning_rate": 8.980809101460158e-06, "loss": 0.226, "step": 20018, "teacher_loss": 0.2192680835723877 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.8865457773208618, "learning_rate": 8.97872880630966e-06, "loss": 0.2889, "step": 20019, "teacher_loss": 0.2225276231765747 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.3245702087879181, "learning_rate": 8.976648649208866e-06, "loss": 0.2031, "step": 20020, "teacher_loss": 0.18959203362464905 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.7300923466682434, "learning_rate": 8.974568630205462e-06, "loss": 0.2439, "step": 20021, "teacher_loss": 0.1898818016052246 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.33470776677131653, "learning_rate": 8.972488749347126e-06, "loss": 0.2251, "step": 20022, "teacher_loss": 0.2129203975200653 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.19618995487689972, "learning_rate": 8.970409006681558e-06, "loss": 0.2345, "step": 20023, "teacher_loss": 0.23874971270561218 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.39174938201904297, "learning_rate": 8.968329402256432e-06, "loss": 0.2273, "step": 20024, "teacher_loss": 0.20899301767349243 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.4898705780506134, "learning_rate": 8.966249936119425e-06, "loss": 0.2364, "step": 20025, "teacher_loss": 0.2082604467868805 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.49955886602401733, "learning_rate": 8.964170608318222e-06, "loss": 0.2584, "step": 20026, "teacher_loss": 0.23161479830741882 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.26668012142181396, "learning_rate": 8.962091418900488e-06, "loss": 0.3176, "step": 20027, "teacher_loss": 0.32322514057159424 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.6411541700363159, "learning_rate": 8.960012367913894e-06, "loss": 0.2583, "step": 20028, "teacher_loss": 0.21577411890029907 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.6308726668357849, "learning_rate": 8.957933455406112e-06, "loss": 0.2711, "step": 20029, "teacher_loss": 0.23112300038337708 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.28858649730682373, "learning_rate": 8.955854681424797e-06, "loss": 0.2131, "step": 20030, "teacher_loss": 0.20475052297115326 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.5992348194122314, "learning_rate": 8.953776046017618e-06, "loss": 0.2238, "step": 20031, "teacher_loss": 0.18214033544063568 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.4789152443408966, "learning_rate": 8.951697549232217e-06, "loss": 0.2628, "step": 20032, "teacher_loss": 0.23875531554222107 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.6461098194122314, "learning_rate": 8.949619191116268e-06, "loss": 0.2291, "step": 20033, "teacher_loss": 0.18272440135478973 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.3495197594165802, "learning_rate": 8.947540971717406e-06, "loss": 0.1799, "step": 20034, "teacher_loss": 0.1610354483127594 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.3763784170150757, "learning_rate": 8.94546289108328e-06, "loss": 0.2, "step": 20035, "teacher_loss": 0.18040016293525696 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.38527852296829224, "learning_rate": 8.943384949261544e-06, "loss": 0.1897, "step": 20036, "teacher_loss": 0.16793614625930786 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.23364733159542084, "learning_rate": 8.94130714629983e-06, "loss": 0.2265, "step": 20037, "teacher_loss": 0.2257448434829712 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.6762915849685669, "learning_rate": 8.939229482245774e-06, "loss": 0.2612, "step": 20038, "teacher_loss": 0.215114563703537 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.48530444502830505, "learning_rate": 8.937151957147021e-06, "loss": 0.2778, "step": 20039, "teacher_loss": 0.2547089457511902 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.2480117827653885, "learning_rate": 8.935074571051194e-06, "loss": 0.1717, "step": 20040, "teacher_loss": 0.1632324457168579 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.6173408031463623, "learning_rate": 8.93299732400592e-06, "loss": 0.2926, "step": 20041, "teacher_loss": 0.2565540671348572 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.15449869632720947, "learning_rate": 8.930920216058832e-06, "loss": 0.1756, "step": 20042, "teacher_loss": 0.17798486351966858 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 1.4325318336486816, "learning_rate": 8.92884324725755e-06, "loss": 0.3504, "step": 20043, "teacher_loss": 0.2301802784204483 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.5709894895553589, "learning_rate": 8.92676641764968e-06, "loss": 0.3331, "step": 20044, "teacher_loss": 0.3066791892051697 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.9169344305992126, "learning_rate": 8.924689727282856e-06, "loss": 0.3521, "step": 20045, "teacher_loss": 0.2893841564655304 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.21883299946784973, "learning_rate": 8.922613176204685e-06, "loss": 0.206, "step": 20046, "teacher_loss": 0.20453500747680664 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.557162880897522, "learning_rate": 8.920536764462762e-06, "loss": 0.2518, "step": 20047, "teacher_loss": 0.21787531673908234 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.33095499873161316, "learning_rate": 8.918460492104712e-06, "loss": 0.1349, "step": 20048, "teacher_loss": 0.11316149681806564 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.7031446695327759, "learning_rate": 8.916384359178127e-06, "loss": 0.2771, "step": 20049, "teacher_loss": 0.22977030277252197 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.19005897641181946, "learning_rate": 8.914308365730608e-06, "loss": 0.2343, "step": 20050, "teacher_loss": 0.23919777572155 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.3658381700515747, "learning_rate": 8.912232511809753e-06, "loss": 0.2543, "step": 20051, "teacher_loss": 0.24188114702701569 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.37547972798347473, "learning_rate": 8.910156797463156e-06, "loss": 0.2404, "step": 20052, "teacher_loss": 0.22537273168563843 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.7734442353248596, "learning_rate": 8.908081222738403e-06, "loss": 0.2849, "step": 20053, "teacher_loss": 0.23065117001533508 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.31264474987983704, "learning_rate": 8.906005787683083e-06, "loss": 0.1596, "step": 20054, "teacher_loss": 0.1426108181476593 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.3375699818134308, "learning_rate": 8.90393049234478e-06, "loss": 0.1823, "step": 20055, "teacher_loss": 0.16508419811725616 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 1.3302650451660156, "learning_rate": 8.90185533677108e-06, "loss": 0.7684, "step": 20056, "teacher_loss": 0.7059221863746643 }, { "compression_loss": 0.0, "epoch": 3.62, "label_loss": 0.7007139921188354, "learning_rate": 8.899780321009544e-06, "loss": 0.2647, "step": 20057, "teacher_loss": 0.2162424921989441 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.9428095817565918, "learning_rate": 8.897705445107762e-06, "loss": 0.2394, "step": 20058, "teacher_loss": 0.16121414303779602 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.19587141275405884, "learning_rate": 8.895630709113299e-06, "loss": 0.2031, "step": 20059, "teacher_loss": 0.20394980907440186 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.4799688160419464, "learning_rate": 8.893556113073718e-06, "loss": 0.243, "step": 20060, "teacher_loss": 0.2167043387889862 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.08864134550094604, "learning_rate": 8.891481657036589e-06, "loss": 0.1596, "step": 20061, "teacher_loss": 0.16744858026504517 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.5029218792915344, "learning_rate": 8.889407341049472e-06, "loss": 0.2393, "step": 20062, "teacher_loss": 0.21003496646881104 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.14628297090530396, "learning_rate": 8.887333165159921e-06, "loss": 0.1691, "step": 20063, "teacher_loss": 0.17162545025348663 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.5763274431228638, "learning_rate": 8.885259129415497e-06, "loss": 0.1934, "step": 20064, "teacher_loss": 0.15080051124095917 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.46548789739608765, "learning_rate": 8.88318523386375e-06, "loss": 0.2315, "step": 20065, "teacher_loss": 0.20555195212364197 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.8760215044021606, "learning_rate": 8.881111478552221e-06, "loss": 0.2636, "step": 20066, "teacher_loss": 0.19559669494628906 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.5683371424674988, "learning_rate": 8.879037863528464e-06, "loss": 0.2172, "step": 20067, "teacher_loss": 0.17822539806365967 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.2963174283504486, "learning_rate": 8.876964388840019e-06, "loss": 0.2069, "step": 20068, "teacher_loss": 0.19691653549671173 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.7573115825653076, "learning_rate": 8.874891054534416e-06, "loss": 0.318, "step": 20069, "teacher_loss": 0.26917916536331177 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.20222747325897217, "learning_rate": 8.872817860659203e-06, "loss": 0.185, "step": 20070, "teacher_loss": 0.18308418989181519 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.23196372389793396, "learning_rate": 8.870744807261908e-06, "loss": 0.2156, "step": 20071, "teacher_loss": 0.2137642353773117 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.36386919021606445, "learning_rate": 8.868671894390056e-06, "loss": 0.2418, "step": 20072, "teacher_loss": 0.22822481393814087 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.46272265911102295, "learning_rate": 8.86659912209117e-06, "loss": 0.2253, "step": 20073, "teacher_loss": 0.19888511300086975 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.5408502817153931, "learning_rate": 8.86452649041278e-06, "loss": 0.1977, "step": 20074, "teacher_loss": 0.15957465767860413 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.41661757230758667, "learning_rate": 8.862453999402406e-06, "loss": 0.2604, "step": 20075, "teacher_loss": 0.24306653439998627 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.7930693626403809, "learning_rate": 8.860381649107553e-06, "loss": 0.313, "step": 20076, "teacher_loss": 0.2596908509731293 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.2829344868659973, "learning_rate": 8.858309439575747e-06, "loss": 0.1325, "step": 20077, "teacher_loss": 0.11576279997825623 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 1.1637897491455078, "learning_rate": 8.856237370854493e-06, "loss": 0.5858, "step": 20078, "teacher_loss": 0.5215364098548889 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.4017188549041748, "learning_rate": 8.85416544299129e-06, "loss": 0.252, "step": 20079, "teacher_loss": 0.2353803515434265 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.29159560799598694, "learning_rate": 8.852093656033654e-06, "loss": 0.1824, "step": 20080, "teacher_loss": 0.17028193175792694 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.9635019898414612, "learning_rate": 8.850022010029077e-06, "loss": 0.307, "step": 20081, "teacher_loss": 0.23407316207885742 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.44538551568984985, "learning_rate": 8.84795050502505e-06, "loss": 0.1841, "step": 20082, "teacher_loss": 0.15502850711345673 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.21815159916877747, "learning_rate": 8.845879141069082e-06, "loss": 0.1667, "step": 20083, "teacher_loss": 0.16098147630691528 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.315278559923172, "learning_rate": 8.84380791820865e-06, "loss": 0.1872, "step": 20084, "teacher_loss": 0.17291617393493652 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.5410944819450378, "learning_rate": 8.841736836491245e-06, "loss": 0.287, "step": 20085, "teacher_loss": 0.2588125169277191 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.6641720533370972, "learning_rate": 8.839665895964352e-06, "loss": 0.2421, "step": 20086, "teacher_loss": 0.1952308714389801 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.3963237404823303, "learning_rate": 8.837595096675451e-06, "loss": 0.2196, "step": 20087, "teacher_loss": 0.2000068575143814 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.5264793634414673, "learning_rate": 8.835524438672013e-06, "loss": 0.2557, "step": 20088, "teacher_loss": 0.2256353199481964 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.2969478964805603, "learning_rate": 8.833453922001524e-06, "loss": 0.2212, "step": 20089, "teacher_loss": 0.21273405849933624 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.648070216178894, "learning_rate": 8.831383546711448e-06, "loss": 0.1893, "step": 20090, "teacher_loss": 0.1383148431777954 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.4691910743713379, "learning_rate": 8.829313312849248e-06, "loss": 0.224, "step": 20091, "teacher_loss": 0.1967625916004181 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.3811272978782654, "learning_rate": 8.827243220462398e-06, "loss": 0.2162, "step": 20092, "teacher_loss": 0.19787511229515076 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.5865489840507507, "learning_rate": 8.825173269598354e-06, "loss": 0.2552, "step": 20093, "teacher_loss": 0.21843412518501282 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.5812729001045227, "learning_rate": 8.823103460304573e-06, "loss": 0.263, "step": 20094, "teacher_loss": 0.2276797890663147 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.2159494161605835, "learning_rate": 8.821033792628503e-06, "loss": 0.1398, "step": 20095, "teacher_loss": 0.13137969374656677 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.6607014536857605, "learning_rate": 8.818964266617614e-06, "loss": 0.1963, "step": 20096, "teacher_loss": 0.14465680718421936 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.4555264711380005, "learning_rate": 8.816894882319339e-06, "loss": 0.2772, "step": 20097, "teacher_loss": 0.2573592960834503 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.8002545833587646, "learning_rate": 8.814825639781123e-06, "loss": 0.3438, "step": 20098, "teacher_loss": 0.29311633110046387 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.21377700567245483, "learning_rate": 8.812756539050413e-06, "loss": 0.1654, "step": 20099, "teacher_loss": 0.16003045439720154 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.21958070993423462, "learning_rate": 8.810687580174646e-06, "loss": 0.1963, "step": 20100, "teacher_loss": 0.1937561184167862 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.32097071409225464, "learning_rate": 8.808618763201253e-06, "loss": 0.2769, "step": 20101, "teacher_loss": 0.27201932668685913 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.35265713930130005, "learning_rate": 8.806550088177671e-06, "loss": 0.2835, "step": 20102, "teacher_loss": 0.2758619487285614 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 1.5644676685333252, "learning_rate": 8.80448155515133e-06, "loss": 0.3634, "step": 20103, "teacher_loss": 0.22998693585395813 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.6261578798294067, "learning_rate": 8.802413164169647e-06, "loss": 0.2595, "step": 20104, "teacher_loss": 0.21875405311584473 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.3301115930080414, "learning_rate": 8.80034491528005e-06, "loss": 0.2062, "step": 20105, "teacher_loss": 0.19241949915885925 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.49898579716682434, "learning_rate": 8.798276808529961e-06, "loss": 0.2622, "step": 20106, "teacher_loss": 0.23591215908527374 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.4973953366279602, "learning_rate": 8.796208843966784e-06, "loss": 0.263, "step": 20107, "teacher_loss": 0.236973375082016 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.41038110852241516, "learning_rate": 8.794141021637944e-06, "loss": 0.29, "step": 20108, "teacher_loss": 0.27661973237991333 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.3936123549938202, "learning_rate": 8.792073341590843e-06, "loss": 0.1599, "step": 20109, "teacher_loss": 0.1339282989501953 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.33661508560180664, "learning_rate": 8.790005803872884e-06, "loss": 0.2002, "step": 20110, "teacher_loss": 0.1850610375404358 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.2975943088531494, "learning_rate": 8.787938408531478e-06, "loss": 0.2094, "step": 20111, "teacher_loss": 0.1995452344417572 }, { "compression_loss": 0.0, "epoch": 3.63, "label_loss": 0.4563060700893402, "learning_rate": 8.785871155614019e-06, "loss": 0.186, "step": 20112, "teacher_loss": 0.1560056209564209 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.3161929249763489, "learning_rate": 8.7838040451679e-06, "loss": 0.2158, "step": 20113, "teacher_loss": 0.20463277399539948 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.9171396493911743, "learning_rate": 8.78173707724052e-06, "loss": 0.2339, "step": 20114, "teacher_loss": 0.1579330861568451 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.39789557456970215, "learning_rate": 8.779670251879265e-06, "loss": 0.2461, "step": 20115, "teacher_loss": 0.22918111085891724 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.35162532329559326, "learning_rate": 8.777603569131526e-06, "loss": 0.1989, "step": 20116, "teacher_loss": 0.18192234635353088 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.37086138129234314, "learning_rate": 8.77553702904467e-06, "loss": 0.2186, "step": 20117, "teacher_loss": 0.20163457095623016 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.27766144275665283, "learning_rate": 8.7734706316661e-06, "loss": 0.2892, "step": 20118, "teacher_loss": 0.29052528738975525 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.243914395570755, "learning_rate": 8.771404377043177e-06, "loss": 0.2158, "step": 20119, "teacher_loss": 0.21273070573806763 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.46920087933540344, "learning_rate": 8.76933826522327e-06, "loss": 0.24, "step": 20120, "teacher_loss": 0.21455080807209015 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.25863173604011536, "learning_rate": 8.767272296253766e-06, "loss": 0.1798, "step": 20121, "teacher_loss": 0.171036958694458 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.7251174449920654, "learning_rate": 8.765206470182018e-06, "loss": 0.2916, "step": 20122, "teacher_loss": 0.24340078234672546 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.2674739360809326, "learning_rate": 8.763140787055388e-06, "loss": 0.2369, "step": 20123, "teacher_loss": 0.23350904881954193 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.5866222977638245, "learning_rate": 8.761075246921246e-06, "loss": 0.2758, "step": 20124, "teacher_loss": 0.24129986763000488 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.2375549077987671, "learning_rate": 8.759009849826943e-06, "loss": 0.1841, "step": 20125, "teacher_loss": 0.1781502366065979 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.671151876449585, "learning_rate": 8.756944595819827e-06, "loss": 0.3537, "step": 20126, "teacher_loss": 0.31845611333847046 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.9768250584602356, "learning_rate": 8.754879484947261e-06, "loss": 0.2996, "step": 20127, "teacher_loss": 0.22433406114578247 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.44259709119796753, "learning_rate": 8.752814517256587e-06, "loss": 0.2276, "step": 20128, "teacher_loss": 0.2037428915500641 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.3680911064147949, "learning_rate": 8.750749692795139e-06, "loss": 0.2112, "step": 20129, "teacher_loss": 0.1938011646270752 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.7616530060768127, "learning_rate": 8.748685011610264e-06, "loss": 0.3618, "step": 20130, "teacher_loss": 0.3173372745513916 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.31108957529067993, "learning_rate": 8.746620473749307e-06, "loss": 0.1927, "step": 20131, "teacher_loss": 0.17951622605323792 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.5059388875961304, "learning_rate": 8.744556079259586e-06, "loss": 0.2164, "step": 20132, "teacher_loss": 0.1841844618320465 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.13759613037109375, "learning_rate": 8.74249182818844e-06, "loss": 0.1911, "step": 20133, "teacher_loss": 0.19709071516990662 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.23851510882377625, "learning_rate": 8.740427720583199e-06, "loss": 0.1868, "step": 20134, "teacher_loss": 0.18101832270622253 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.082100510597229, "learning_rate": 8.738363756491186e-06, "loss": 0.13, "step": 20135, "teacher_loss": 0.1352669596672058 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.6354144215583801, "learning_rate": 8.736299935959706e-06, "loss": 0.2475, "step": 20136, "teacher_loss": 0.20438790321350098 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.4087774157524109, "learning_rate": 8.734236259036101e-06, "loss": 0.2781, "step": 20137, "teacher_loss": 0.26354488730430603 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.6276995539665222, "learning_rate": 8.732172725767673e-06, "loss": 0.27, "step": 20138, "teacher_loss": 0.23030540347099304 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.48228561878204346, "learning_rate": 8.730109336201726e-06, "loss": 0.1914, "step": 20139, "teacher_loss": 0.1590750515460968 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.46530789136886597, "learning_rate": 8.728046090385572e-06, "loss": 0.3492, "step": 20140, "teacher_loss": 0.3363426923751831 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.4980327785015106, "learning_rate": 8.725982988366527e-06, "loss": 0.3404, "step": 20141, "teacher_loss": 0.32283854484558105 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.7775996923446655, "learning_rate": 8.723920030191874e-06, "loss": 0.3359, "step": 20142, "teacher_loss": 0.2868744134902954 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.29764047265052795, "learning_rate": 8.721857215908916e-06, "loss": 0.2195, "step": 20143, "teacher_loss": 0.21085713803768158 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.4057222306728363, "learning_rate": 8.719794545564957e-06, "loss": 0.2728, "step": 20144, "teacher_loss": 0.2580621838569641 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.2733703851699829, "learning_rate": 8.717732019207274e-06, "loss": 0.214, "step": 20145, "teacher_loss": 0.20743875205516815 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.5477606058120728, "learning_rate": 8.71566963688316e-06, "loss": 0.2585, "step": 20146, "teacher_loss": 0.22630661725997925 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.25076591968536377, "learning_rate": 8.713607398639909e-06, "loss": 0.1836, "step": 20147, "teacher_loss": 0.1761234700679779 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.1703055500984192, "learning_rate": 8.711545304524781e-06, "loss": 0.1849, "step": 20148, "teacher_loss": 0.1864854246377945 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.2854404151439667, "learning_rate": 8.70948335458507e-06, "loss": 0.2203, "step": 20149, "teacher_loss": 0.21306897699832916 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.2666966915130615, "learning_rate": 8.70742154886805e-06, "loss": 0.2164, "step": 20150, "teacher_loss": 0.21078705787658691 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.2651047706604004, "learning_rate": 8.705359887420982e-06, "loss": 0.1753, "step": 20151, "teacher_loss": 0.16531670093536377 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.3236709237098694, "learning_rate": 8.703298370291146e-06, "loss": 0.2077, "step": 20152, "teacher_loss": 0.19479495286941528 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.5478780269622803, "learning_rate": 8.701236997525792e-06, "loss": 0.2968, "step": 20153, "teacher_loss": 0.2689024806022644 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.3117363154888153, "learning_rate": 8.699175769172191e-06, "loss": 0.1854, "step": 20154, "teacher_loss": 0.17131835222244263 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.3794316053390503, "learning_rate": 8.697114685277603e-06, "loss": 0.2333, "step": 20155, "teacher_loss": 0.21705839037895203 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.30206358432769775, "learning_rate": 8.695053745889274e-06, "loss": 0.1828, "step": 20156, "teacher_loss": 0.16952402889728546 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.3111790418624878, "learning_rate": 8.692992951054464e-06, "loss": 0.2182, "step": 20157, "teacher_loss": 0.20790278911590576 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.3668109178543091, "learning_rate": 8.69093230082041e-06, "loss": 0.3025, "step": 20158, "teacher_loss": 0.29536116123199463 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.3567535877227783, "learning_rate": 8.688871795234365e-06, "loss": 0.2268, "step": 20159, "teacher_loss": 0.21241214871406555 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.18598857522010803, "learning_rate": 8.686811434343574e-06, "loss": 0.1997, "step": 20160, "teacher_loss": 0.20123563706874847 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.5409897565841675, "learning_rate": 8.68475121819526e-06, "loss": 0.3076, "step": 20161, "teacher_loss": 0.28170478343963623 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.3420236110687256, "learning_rate": 8.682691146836674e-06, "loss": 0.184, "step": 20162, "teacher_loss": 0.1664143204689026 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.46551820635795593, "learning_rate": 8.680631220315034e-06, "loss": 0.1692, "step": 20163, "teacher_loss": 0.13622616231441498 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.3419816493988037, "learning_rate": 8.678571438677573e-06, "loss": 0.1857, "step": 20164, "teacher_loss": 0.16830158233642578 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.33312928676605225, "learning_rate": 8.676511801971522e-06, "loss": 0.1744, "step": 20165, "teacher_loss": 0.15680186450481415 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.30069297552108765, "learning_rate": 8.674452310244091e-06, "loss": 0.2323, "step": 20166, "teacher_loss": 0.22467570006847382 }, { "compression_loss": 0.0, "epoch": 3.64, "label_loss": 0.4934171438217163, "learning_rate": 8.672392963542504e-06, "loss": 0.2266, "step": 20167, "teacher_loss": 0.19699643552303314 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.6495916843414307, "learning_rate": 8.67033376191398e-06, "loss": 0.2064, "step": 20168, "teacher_loss": 0.15714342892169952 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.34311622381210327, "learning_rate": 8.668274705405722e-06, "loss": 0.2386, "step": 20169, "teacher_loss": 0.22699680924415588 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.31838762760162354, "learning_rate": 8.666215794064939e-06, "loss": 0.2915, "step": 20170, "teacher_loss": 0.28847840428352356 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.4417462944984436, "learning_rate": 8.664157027938846e-06, "loss": 0.2436, "step": 20171, "teacher_loss": 0.2215639352798462 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.9922329187393188, "learning_rate": 8.66209840707463e-06, "loss": 0.3844, "step": 20172, "teacher_loss": 0.31684058904647827 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.26555538177490234, "learning_rate": 8.660039931519495e-06, "loss": 0.2155, "step": 20173, "teacher_loss": 0.20992310345172882 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.2855551540851593, "learning_rate": 8.657981601320643e-06, "loss": 0.2624, "step": 20174, "teacher_loss": 0.25986993312835693 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.692298412322998, "learning_rate": 8.655923416525258e-06, "loss": 0.2558, "step": 20175, "teacher_loss": 0.20730829238891602 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.7641059756278992, "learning_rate": 8.653865377180517e-06, "loss": 0.2749, "step": 20176, "teacher_loss": 0.2205168455839157 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.7931911945343018, "learning_rate": 8.651807483333627e-06, "loss": 0.3376, "step": 20177, "teacher_loss": 0.28699490427970886 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.32132911682128906, "learning_rate": 8.649749735031758e-06, "loss": 0.1621, "step": 20178, "teacher_loss": 0.14437633752822876 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.4211026430130005, "learning_rate": 8.647692132322084e-06, "loss": 0.2097, "step": 20179, "teacher_loss": 0.18623338639736176 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.7544137239456177, "learning_rate": 8.645634675251784e-06, "loss": 0.3068, "step": 20180, "teacher_loss": 0.2570740282535553 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.37173134088516235, "learning_rate": 8.643577363868036e-06, "loss": 0.2214, "step": 20181, "teacher_loss": 0.20471572875976562 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.6343812346458435, "learning_rate": 8.641520198217995e-06, "loss": 0.2393, "step": 20182, "teacher_loss": 0.19541388750076294 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.4001957178115845, "learning_rate": 8.639463178348831e-06, "loss": 0.2453, "step": 20183, "teacher_loss": 0.2280999720096588 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.5001657009124756, "learning_rate": 8.637406304307714e-06, "loss": 0.396, "step": 20184, "teacher_loss": 0.3844684362411499 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.5640811920166016, "learning_rate": 8.635349576141795e-06, "loss": 0.2438, "step": 20185, "teacher_loss": 0.2082669585943222 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.6251089572906494, "learning_rate": 8.633292993898214e-06, "loss": 0.2573, "step": 20186, "teacher_loss": 0.21646592020988464 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.3213304579257965, "learning_rate": 8.631236557624149e-06, "loss": 0.2364, "step": 20187, "teacher_loss": 0.22691258788108826 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.640985369682312, "learning_rate": 8.629180267366736e-06, "loss": 0.2176, "step": 20188, "teacher_loss": 0.170509934425354 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.3191848695278168, "learning_rate": 8.627124123173112e-06, "loss": 0.2336, "step": 20189, "teacher_loss": 0.22413255274295807 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.28675830364227295, "learning_rate": 8.625068125090424e-06, "loss": 0.2099, "step": 20190, "teacher_loss": 0.20134183764457703 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.6062551736831665, "learning_rate": 8.62301227316582e-06, "loss": 0.1923, "step": 20191, "teacher_loss": 0.14634083211421967 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.3339284658432007, "learning_rate": 8.620956567446417e-06, "loss": 0.2681, "step": 20192, "teacher_loss": 0.2608281970024109 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.4099222719669342, "learning_rate": 8.618901007979353e-06, "loss": 0.2312, "step": 20193, "teacher_loss": 0.21135810017585754 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.22398120164871216, "learning_rate": 8.616845594811765e-06, "loss": 0.1565, "step": 20194, "teacher_loss": 0.14898596704006195 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.4728623926639557, "learning_rate": 8.614790327990766e-06, "loss": 0.2602, "step": 20195, "teacher_loss": 0.23652443289756775 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.3112582862377167, "learning_rate": 8.612735207563478e-06, "loss": 0.2918, "step": 20196, "teacher_loss": 0.2895861268043518 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.29711753129959106, "learning_rate": 8.610680233577029e-06, "loss": 0.2017, "step": 20197, "teacher_loss": 0.19114097952842712 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.22253045439720154, "learning_rate": 8.608625406078526e-06, "loss": 0.1352, "step": 20198, "teacher_loss": 0.1254810094833374 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.19349834322929382, "learning_rate": 8.606570725115068e-06, "loss": 0.2, "step": 20199, "teacher_loss": 0.2007497251033783 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.3512103855609894, "learning_rate": 8.604516190733786e-06, "loss": 0.2537, "step": 20200, "teacher_loss": 0.24283862113952637 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.23127895593643188, "learning_rate": 8.602461802981773e-06, "loss": 0.1694, "step": 20201, "teacher_loss": 0.16256925463676453 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.5138158798217773, "learning_rate": 8.600407561906127e-06, "loss": 0.1972, "step": 20202, "teacher_loss": 0.16204431653022766 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.2443954199552536, "learning_rate": 8.598353467553946e-06, "loss": 0.1835, "step": 20203, "teacher_loss": 0.17674848437309265 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.7003247737884521, "learning_rate": 8.596299519972333e-06, "loss": 0.2857, "step": 20204, "teacher_loss": 0.2396685630083084 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.857781708240509, "learning_rate": 8.594245719208367e-06, "loss": 0.3947, "step": 20205, "teacher_loss": 0.3432343900203705 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.953424870967865, "learning_rate": 8.59219206530914e-06, "loss": 0.2608, "step": 20206, "teacher_loss": 0.18379944562911987 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.1468905806541443, "learning_rate": 8.590138558321747e-06, "loss": 0.1479, "step": 20207, "teacher_loss": 0.14801552891731262 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.4537774622440338, "learning_rate": 8.58808519829325e-06, "loss": 0.2399, "step": 20208, "teacher_loss": 0.2161661982536316 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.4669089913368225, "learning_rate": 8.586031985270735e-06, "loss": 0.2345, "step": 20209, "teacher_loss": 0.2086581289768219 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.17330892384052277, "learning_rate": 8.583978919301284e-06, "loss": 0.1555, "step": 20210, "teacher_loss": 0.15353038907051086 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.4137945771217346, "learning_rate": 8.581926000431954e-06, "loss": 0.3127, "step": 20211, "teacher_loss": 0.30148079991340637 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.8254531621932983, "learning_rate": 8.579873228709824e-06, "loss": 0.2637, "step": 20212, "teacher_loss": 0.20128926634788513 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.45209255814552307, "learning_rate": 8.577820604181948e-06, "loss": 0.302, "step": 20213, "teacher_loss": 0.28537318110466003 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.27556315064430237, "learning_rate": 8.575768126895387e-06, "loss": 0.1987, "step": 20214, "teacher_loss": 0.19011583924293518 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.30866843461990356, "learning_rate": 8.57371579689721e-06, "loss": 0.2684, "step": 20215, "teacher_loss": 0.2638859152793884 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.4336906671524048, "learning_rate": 8.571663614234458e-06, "loss": 0.2219, "step": 20216, "teacher_loss": 0.19842275977134705 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.5355225801467896, "learning_rate": 8.569611578954186e-06, "loss": 0.2924, "step": 20217, "teacher_loss": 0.2653810679912567 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.36329615116119385, "learning_rate": 8.567559691103447e-06, "loss": 0.2721, "step": 20218, "teacher_loss": 0.26202094554901123 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.46648550033569336, "learning_rate": 8.56550795072927e-06, "loss": 0.2471, "step": 20219, "teacher_loss": 0.22274476289749146 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.4365149140357971, "learning_rate": 8.563456357878713e-06, "loss": 0.1778, "step": 20220, "teacher_loss": 0.149072527885437 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.676209568977356, "learning_rate": 8.561404912598798e-06, "loss": 0.2441, "step": 20221, "teacher_loss": 0.19609782099723816 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.32545363903045654, "learning_rate": 8.559353614936565e-06, "loss": 0.2342, "step": 20222, "teacher_loss": 0.22407066822052002 }, { "compression_loss": 0.0, "epoch": 3.65, "label_loss": 0.3496916890144348, "learning_rate": 8.557302464939049e-06, "loss": 0.2225, "step": 20223, "teacher_loss": 0.2084149271249771 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.20512907207012177, "learning_rate": 8.555251462653264e-06, "loss": 0.1571, "step": 20224, "teacher_loss": 0.1517784297466278 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.20934784412384033, "learning_rate": 8.553200608126247e-06, "loss": 0.2, "step": 20225, "teacher_loss": 0.1989302784204483 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.5861580967903137, "learning_rate": 8.551149901405006e-06, "loss": 0.2046, "step": 20226, "teacher_loss": 0.16221728920936584 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.7154439687728882, "learning_rate": 8.549099342536561e-06, "loss": 0.2822, "step": 20227, "teacher_loss": 0.23411481082439423 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.23381400108337402, "learning_rate": 8.547048931567936e-06, "loss": 0.2189, "step": 20228, "teacher_loss": 0.2172200083732605 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.24642375111579895, "learning_rate": 8.544998668546124e-06, "loss": 0.1974, "step": 20229, "teacher_loss": 0.19196170568466187 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.39428532123565674, "learning_rate": 8.542948553518142e-06, "loss": 0.2247, "step": 20230, "teacher_loss": 0.20582188665866852 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.5662950873374939, "learning_rate": 8.540898586530996e-06, "loss": 0.3535, "step": 20231, "teacher_loss": 0.32986587285995483 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.12203732132911682, "learning_rate": 8.538848767631672e-06, "loss": 0.1436, "step": 20232, "teacher_loss": 0.14601413905620575 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.3659417927265167, "learning_rate": 8.536799096867176e-06, "loss": 0.2323, "step": 20233, "teacher_loss": 0.2174283117055893 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.466904878616333, "learning_rate": 8.534749574284505e-06, "loss": 0.2377, "step": 20234, "teacher_loss": 0.2122471034526825 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.0996318832039833, "learning_rate": 8.532700199930642e-06, "loss": 0.132, "step": 20235, "teacher_loss": 0.1356450468301773 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.9772903919219971, "learning_rate": 8.53065097385256e-06, "loss": 0.3448, "step": 20236, "teacher_loss": 0.274472177028656 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.5410850644111633, "learning_rate": 8.528601896097269e-06, "loss": 0.1853, "step": 20237, "teacher_loss": 0.14581036567687988 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.43765372037887573, "learning_rate": 8.526552966711734e-06, "loss": 0.1799, "step": 20238, "teacher_loss": 0.1512630134820938 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.44078630208969116, "learning_rate": 8.524504185742923e-06, "loss": 0.2372, "step": 20239, "teacher_loss": 0.2145601212978363 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.37051481008529663, "learning_rate": 8.52245555323782e-06, "loss": 0.2246, "step": 20240, "teacher_loss": 0.20836688578128815 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.2678895592689514, "learning_rate": 8.520407069243395e-06, "loss": 0.1973, "step": 20241, "teacher_loss": 0.18946653604507446 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 1.1230154037475586, "learning_rate": 8.518358733806602e-06, "loss": 0.3173, "step": 20242, "teacher_loss": 0.22779735922813416 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.7786638736724854, "learning_rate": 8.516310546974411e-06, "loss": 0.3261, "step": 20243, "teacher_loss": 0.275812029838562 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.37805962562561035, "learning_rate": 8.514262508793784e-06, "loss": 0.2483, "step": 20244, "teacher_loss": 0.23392270505428314 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.5138403177261353, "learning_rate": 8.51221461931167e-06, "loss": 0.2251, "step": 20245, "teacher_loss": 0.19301816821098328 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.34679239988327026, "learning_rate": 8.510166878575022e-06, "loss": 0.2109, "step": 20246, "teacher_loss": 0.19579347968101501 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 1.0638108253479004, "learning_rate": 8.508119286630795e-06, "loss": 0.3026, "step": 20247, "teacher_loss": 0.2179986536502838 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.4252743721008301, "learning_rate": 8.506071843525931e-06, "loss": 0.3248, "step": 20248, "teacher_loss": 0.31366756558418274 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.38507816195487976, "learning_rate": 8.504024549307357e-06, "loss": 0.4053, "step": 20249, "teacher_loss": 0.4074931740760803 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.2570658028125763, "learning_rate": 8.501977404022034e-06, "loss": 0.1761, "step": 20250, "teacher_loss": 0.16707256436347961 }, { "epoch": 3.66, "eval_exact_match": 80.2081362346263, "eval_f1": 87.48776695731692, "step": 20250 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.18904536962509155, "learning_rate": 8.499930407716888e-06, "loss": 0.3088, "step": 20251, "teacher_loss": 0.3221076428890228 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.27548471093177795, "learning_rate": 8.497883560438845e-06, "loss": 0.2193, "step": 20252, "teacher_loss": 0.21304328739643097 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.33199456334114075, "learning_rate": 8.495836862234837e-06, "loss": 0.1861, "step": 20253, "teacher_loss": 0.1699065864086151 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.4198136031627655, "learning_rate": 8.493790313151797e-06, "loss": 0.1786, "step": 20254, "teacher_loss": 0.15182146430015564 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.4110315144062042, "learning_rate": 8.491743913236629e-06, "loss": 0.1696, "step": 20255, "teacher_loss": 0.14281292259693146 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.12331907451152802, "learning_rate": 8.489697662536264e-06, "loss": 0.1334, "step": 20256, "teacher_loss": 0.13449972867965698 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.2964077591896057, "learning_rate": 8.487651561097615e-06, "loss": 0.2314, "step": 20257, "teacher_loss": 0.22417324781417847 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.9203104376792908, "learning_rate": 8.485605608967586e-06, "loss": 0.546, "step": 20258, "teacher_loss": 0.5044133067131042 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.1376548409461975, "learning_rate": 8.48355980619309e-06, "loss": 0.2226, "step": 20259, "teacher_loss": 0.2320544421672821 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.23093575239181519, "learning_rate": 8.481514152821037e-06, "loss": 0.1756, "step": 20260, "teacher_loss": 0.16949301958084106 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.8521703481674194, "learning_rate": 8.479468648898319e-06, "loss": 0.5114, "step": 20261, "teacher_loss": 0.4735168516635895 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.5308355093002319, "learning_rate": 8.47742329447183e-06, "loss": 0.2022, "step": 20262, "teacher_loss": 0.16567447781562805 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.37594202160835266, "learning_rate": 8.475378089588471e-06, "loss": 0.1997, "step": 20263, "teacher_loss": 0.18008652329444885 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.4753708243370056, "learning_rate": 8.473333034295135e-06, "loss": 0.2977, "step": 20264, "teacher_loss": 0.2779938578605652 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.4141441583633423, "learning_rate": 8.471288128638697e-06, "loss": 0.3199, "step": 20265, "teacher_loss": 0.3094070553779602 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.38325268030166626, "learning_rate": 8.46924337266605e-06, "loss": 0.2191, "step": 20266, "teacher_loss": 0.20088501274585724 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.4207196831703186, "learning_rate": 8.467198766424078e-06, "loss": 0.216, "step": 20267, "teacher_loss": 0.19322559237480164 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.1613987684249878, "learning_rate": 8.465154309959648e-06, "loss": 0.1847, "step": 20268, "teacher_loss": 0.18726277351379395 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.4992298483848572, "learning_rate": 8.463110003319633e-06, "loss": 0.254, "step": 20269, "teacher_loss": 0.22676478326320648 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.27575981616973877, "learning_rate": 8.461065846550916e-06, "loss": 0.2024, "step": 20270, "teacher_loss": 0.19425445795059204 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.5447056293487549, "learning_rate": 8.459021839700346e-06, "loss": 0.1984, "step": 20271, "teacher_loss": 0.15990683436393738 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.40601879358291626, "learning_rate": 8.456977982814796e-06, "loss": 0.2415, "step": 20272, "teacher_loss": 0.22327515482902527 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.5051358342170715, "learning_rate": 8.454934275941129e-06, "loss": 0.3202, "step": 20273, "teacher_loss": 0.29966962337493896 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.6076987981796265, "learning_rate": 8.452890719126193e-06, "loss": 0.2756, "step": 20274, "teacher_loss": 0.23864729702472687 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.7815028429031372, "learning_rate": 8.450847312416845e-06, "loss": 0.3498, "step": 20275, "teacher_loss": 0.3018571734428406 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.23986394703388214, "learning_rate": 8.448804055859931e-06, "loss": 0.1485, "step": 20276, "teacher_loss": 0.13837382197380066 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.35530006885528564, "learning_rate": 8.446760949502296e-06, "loss": 0.2752, "step": 20277, "teacher_loss": 0.26631683111190796 }, { "compression_loss": 0.0, "epoch": 3.66, "label_loss": 0.5229827165603638, "learning_rate": 8.444717993390792e-06, "loss": 0.2796, "step": 20278, "teacher_loss": 0.2525658905506134 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.2837910056114197, "learning_rate": 8.442675187572242e-06, "loss": 0.2379, "step": 20279, "teacher_loss": 0.23279443383216858 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.6391610503196716, "learning_rate": 8.440632532093493e-06, "loss": 0.252, "step": 20280, "teacher_loss": 0.20892734825611115 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.34770065546035767, "learning_rate": 8.43859002700138e-06, "loss": 0.194, "step": 20281, "teacher_loss": 0.17691677808761597 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.7221857309341431, "learning_rate": 8.436547672342717e-06, "loss": 0.1891, "step": 20282, "teacher_loss": 0.12988263368606567 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.11522579193115234, "learning_rate": 8.434505468164345e-06, "loss": 0.1711, "step": 20283, "teacher_loss": 0.17727959156036377 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.21450819075107574, "learning_rate": 8.432463414513072e-06, "loss": 0.1717, "step": 20284, "teacher_loss": 0.16691450774669647 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.366448312997818, "learning_rate": 8.43042151143573e-06, "loss": 0.3625, "step": 20285, "teacher_loss": 0.36201101541519165 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.6070150136947632, "learning_rate": 8.428379758979119e-06, "loss": 0.2725, "step": 20286, "teacher_loss": 0.23537677526474 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.5561429262161255, "learning_rate": 8.426338157190058e-06, "loss": 0.2657, "step": 20287, "teacher_loss": 0.2333860695362091 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.30099374055862427, "learning_rate": 8.424296706115358e-06, "loss": 0.3015, "step": 20288, "teacher_loss": 0.3015149235725403 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.5226436257362366, "learning_rate": 8.422255405801817e-06, "loss": 0.3397, "step": 20289, "teacher_loss": 0.3193542957305908 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.32014569640159607, "learning_rate": 8.420214256296235e-06, "loss": 0.1942, "step": 20290, "teacher_loss": 0.1801832616329193 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.5011449456214905, "learning_rate": 8.41817325764542e-06, "loss": 0.24, "step": 20291, "teacher_loss": 0.21096271276474 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.33939769864082336, "learning_rate": 8.416132409896153e-06, "loss": 0.2753, "step": 20292, "teacher_loss": 0.2682017385959625 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.40753042697906494, "learning_rate": 8.41409171309523e-06, "loss": 0.2346, "step": 20293, "teacher_loss": 0.2153315246105194 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.34742921590805054, "learning_rate": 8.412051167289447e-06, "loss": 0.2304, "step": 20294, "teacher_loss": 0.21736004948616028 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.4847373366355896, "learning_rate": 8.410010772525571e-06, "loss": 0.2578, "step": 20295, "teacher_loss": 0.23258140683174133 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.3675590455532074, "learning_rate": 8.40797052885039e-06, "loss": 0.2155, "step": 20296, "teacher_loss": 0.1986561119556427 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.4584321081638336, "learning_rate": 8.405930436310688e-06, "loss": 0.2883, "step": 20297, "teacher_loss": 0.2694227695465088 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.27614128589630127, "learning_rate": 8.40389049495323e-06, "loss": 0.1981, "step": 20298, "teacher_loss": 0.1893869936466217 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.2228301465511322, "learning_rate": 8.401850704824775e-06, "loss": 0.1998, "step": 20299, "teacher_loss": 0.19723562896251678 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.5884919166564941, "learning_rate": 8.399811065972112e-06, "loss": 0.2554, "step": 20300, "teacher_loss": 0.21842415630817413 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.37131041288375854, "learning_rate": 8.397771578441995e-06, "loss": 0.2327, "step": 20301, "teacher_loss": 0.21731264889240265 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.7470121383666992, "learning_rate": 8.395732242281175e-06, "loss": 0.2843, "step": 20302, "teacher_loss": 0.23289614915847778 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.6012039184570312, "learning_rate": 8.393693057536412e-06, "loss": 0.4507, "step": 20303, "teacher_loss": 0.43402254581451416 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.689649224281311, "learning_rate": 8.391654024254465e-06, "loss": 0.2492, "step": 20304, "teacher_loss": 0.2002355009317398 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.14456743001937866, "learning_rate": 8.389615142482078e-06, "loss": 0.1452, "step": 20305, "teacher_loss": 0.14532293379306793 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.1396913081407547, "learning_rate": 8.387576412265994e-06, "loss": 0.1611, "step": 20306, "teacher_loss": 0.1634686291217804 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.28904545307159424, "learning_rate": 8.385537833652963e-06, "loss": 0.2849, "step": 20307, "teacher_loss": 0.2844006419181824 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.2781648337841034, "learning_rate": 8.383499406689718e-06, "loss": 0.2269, "step": 20308, "teacher_loss": 0.22123923897743225 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.6340066194534302, "learning_rate": 8.381461131422984e-06, "loss": 0.2394, "step": 20309, "teacher_loss": 0.1955721378326416 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.33694514632225037, "learning_rate": 8.379423007899513e-06, "loss": 0.2403, "step": 20310, "teacher_loss": 0.22953510284423828 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.5375656485557556, "learning_rate": 8.377385036166024e-06, "loss": 0.4538, "step": 20311, "teacher_loss": 0.4444499611854553 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.26863205432891846, "learning_rate": 8.375347216269236e-06, "loss": 0.1569, "step": 20312, "teacher_loss": 0.14447666704654694 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.2612832188606262, "learning_rate": 8.373309548255871e-06, "loss": 0.2026, "step": 20313, "teacher_loss": 0.19604083895683289 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.5189799070358276, "learning_rate": 8.371272032172658e-06, "loss": 0.2403, "step": 20314, "teacher_loss": 0.20935894548892975 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.4244531989097595, "learning_rate": 8.369234668066301e-06, "loss": 0.2819, "step": 20315, "teacher_loss": 0.266079843044281 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.2923707067966461, "learning_rate": 8.36719745598351e-06, "loss": 0.2307, "step": 20316, "teacher_loss": 0.22387002408504486 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.288173645734787, "learning_rate": 8.365160395970997e-06, "loss": 0.2125, "step": 20317, "teacher_loss": 0.20408092439174652 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.3977069854736328, "learning_rate": 8.363123488075464e-06, "loss": 0.3739, "step": 20318, "teacher_loss": 0.37125885486602783 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 1.0839234590530396, "learning_rate": 8.361086732343607e-06, "loss": 0.3195, "step": 20319, "teacher_loss": 0.23459284007549286 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.21802380681037903, "learning_rate": 8.359050128822135e-06, "loss": 0.247, "step": 20320, "teacher_loss": 0.25027501583099365 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.3903188109397888, "learning_rate": 8.357013677557725e-06, "loss": 0.239, "step": 20321, "teacher_loss": 0.22220875322818756 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.5343143343925476, "learning_rate": 8.354977378597077e-06, "loss": 0.2601, "step": 20322, "teacher_loss": 0.2296162247657776 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.8922644853591919, "learning_rate": 8.352941231986881e-06, "loss": 0.4402, "step": 20323, "teacher_loss": 0.3899794816970825 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.33488988876342773, "learning_rate": 8.35090523777381e-06, "loss": 0.1856, "step": 20324, "teacher_loss": 0.16898152232170105 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.3360133171081543, "learning_rate": 8.348869396004545e-06, "loss": 0.1502, "step": 20325, "teacher_loss": 0.1295267939567566 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.418695867061615, "learning_rate": 8.346833706725763e-06, "loss": 0.2011, "step": 20326, "teacher_loss": 0.17686712741851807 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.14462333917617798, "learning_rate": 8.34479816998414e-06, "loss": 0.1617, "step": 20327, "teacher_loss": 0.16357830166816711 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.48511573672294617, "learning_rate": 8.342762785826338e-06, "loss": 0.2382, "step": 20328, "teacher_loss": 0.21080774068832397 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.4053296446800232, "learning_rate": 8.340727554299025e-06, "loss": 0.2342, "step": 20329, "teacher_loss": 0.21518591046333313 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.2639651894569397, "learning_rate": 8.33869247544887e-06, "loss": 0.1804, "step": 20330, "teacher_loss": 0.1710660755634308 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.4180266261100769, "learning_rate": 8.33665754932252e-06, "loss": 0.2205, "step": 20331, "teacher_loss": 0.19860179722309113 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.6020634174346924, "learning_rate": 8.334622775966634e-06, "loss": 0.2835, "step": 20332, "teacher_loss": 0.24805109202861786 }, { "compression_loss": 0.0, "epoch": 3.67, "label_loss": 0.8178966045379639, "learning_rate": 8.332588155427869e-06, "loss": 0.2442, "step": 20333, "teacher_loss": 0.18041634559631348 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.24088552594184875, "learning_rate": 8.330553687752861e-06, "loss": 0.1513, "step": 20334, "teacher_loss": 0.14134082198143005 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.2003917694091797, "learning_rate": 8.328519372988268e-06, "loss": 0.1816, "step": 20335, "teacher_loss": 0.17948219180107117 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.2967814803123474, "learning_rate": 8.32648521118072e-06, "loss": 0.2628, "step": 20336, "teacher_loss": 0.25899559259414673 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.2348853200674057, "learning_rate": 8.324451202376855e-06, "loss": 0.2041, "step": 20337, "teacher_loss": 0.20064151287078857 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.21910922229290009, "learning_rate": 8.322417346623317e-06, "loss": 0.2039, "step": 20338, "teacher_loss": 0.20220354199409485 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.479958176612854, "learning_rate": 8.320383643966721e-06, "loss": 0.3048, "step": 20339, "teacher_loss": 0.2853829264640808 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.5123432874679565, "learning_rate": 8.318350094453702e-06, "loss": 0.2228, "step": 20340, "teacher_loss": 0.19068023562431335 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.30526140332221985, "learning_rate": 8.31631669813089e-06, "loss": 0.1997, "step": 20341, "teacher_loss": 0.18794922530651093 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.30018049478530884, "learning_rate": 8.31428345504489e-06, "loss": 0.2734, "step": 20342, "teacher_loss": 0.27042222023010254 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.6939787864685059, "learning_rate": 8.312250365242331e-06, "loss": 0.439, "step": 20343, "teacher_loss": 0.4106296896934509 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.5731590986251831, "learning_rate": 8.310217428769816e-06, "loss": 0.2604, "step": 20344, "teacher_loss": 0.22563323378562927 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.6227819919586182, "learning_rate": 8.308184645673958e-06, "loss": 0.3086, "step": 20345, "teacher_loss": 0.27365371584892273 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.2726311981678009, "learning_rate": 8.306152016001368e-06, "loss": 0.2262, "step": 20346, "teacher_loss": 0.2210034281015396 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.2026922106742859, "learning_rate": 8.304119539798638e-06, "loss": 0.177, "step": 20347, "teacher_loss": 0.17410127818584442 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.261749804019928, "learning_rate": 8.302087217112377e-06, "loss": 0.1814, "step": 20348, "teacher_loss": 0.17242054641246796 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.4985075891017914, "learning_rate": 8.30005504798917e-06, "loss": 0.2441, "step": 20349, "teacher_loss": 0.21580630540847778 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.7892706990242004, "learning_rate": 8.298023032475613e-06, "loss": 0.2459, "step": 20350, "teacher_loss": 0.18557390570640564 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.27995237708091736, "learning_rate": 8.2959911706183e-06, "loss": 0.183, "step": 20351, "teacher_loss": 0.1721746325492859 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.30418139696121216, "learning_rate": 8.293959462463802e-06, "loss": 0.1888, "step": 20352, "teacher_loss": 0.17603248357772827 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.46759098768234253, "learning_rate": 8.29192790805871e-06, "loss": 0.2522, "step": 20353, "teacher_loss": 0.22831980884075165 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.7757827043533325, "learning_rate": 8.289896507449604e-06, "loss": 0.3346, "step": 20354, "teacher_loss": 0.2855909466743469 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.5297353267669678, "learning_rate": 8.28786526068305e-06, "loss": 0.4594, "step": 20355, "teacher_loss": 0.45154690742492676 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.6049448251724243, "learning_rate": 8.285834167805617e-06, "loss": 0.3056, "step": 20356, "teacher_loss": 0.2723514437675476 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.550566554069519, "learning_rate": 8.283803228863886e-06, "loss": 0.288, "step": 20357, "teacher_loss": 0.25881457328796387 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.4555302858352661, "learning_rate": 8.281772443904409e-06, "loss": 0.2217, "step": 20358, "teacher_loss": 0.19570600986480713 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.42996734380722046, "learning_rate": 8.279741812973736e-06, "loss": 0.2497, "step": 20359, "teacher_loss": 0.22970044612884521 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.5414090156555176, "learning_rate": 8.277711336118446e-06, "loss": 0.1725, "step": 20360, "teacher_loss": 0.13145697116851807 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.3630410432815552, "learning_rate": 8.275681013385083e-06, "loss": 0.2105, "step": 20361, "teacher_loss": 0.19349659979343414 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.44658923149108887, "learning_rate": 8.27365084482018e-06, "loss": 0.2628, "step": 20362, "teacher_loss": 0.24239778518676758 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.21085229516029358, "learning_rate": 8.271620830470308e-06, "loss": 0.188, "step": 20363, "teacher_loss": 0.18547658622264862 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.5636534690856934, "learning_rate": 8.269590970381999e-06, "loss": 0.2355, "step": 20364, "teacher_loss": 0.19901852309703827 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.26244986057281494, "learning_rate": 8.267561264601783e-06, "loss": 0.1651, "step": 20365, "teacher_loss": 0.15428493916988373 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.4168131351470947, "learning_rate": 8.265531713176204e-06, "loss": 0.2612, "step": 20366, "teacher_loss": 0.24396264553070068 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.638247013092041, "learning_rate": 8.263502316151797e-06, "loss": 0.4886, "step": 20367, "teacher_loss": 0.47201007604599 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.39159074425697327, "learning_rate": 8.261473073575076e-06, "loss": 0.2228, "step": 20368, "teacher_loss": 0.20407122373580933 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.4306749999523163, "learning_rate": 8.259443985492577e-06, "loss": 0.3133, "step": 20369, "teacher_loss": 0.300204336643219 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.2095477432012558, "learning_rate": 8.257415051950825e-06, "loss": 0.1812, "step": 20370, "teacher_loss": 0.17806529998779297 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.5440797209739685, "learning_rate": 8.25538627299633e-06, "loss": 0.2741, "step": 20371, "teacher_loss": 0.2441214770078659 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.7079646587371826, "learning_rate": 8.253357648675594e-06, "loss": 0.2744, "step": 20372, "teacher_loss": 0.22624942660331726 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.46335119009017944, "learning_rate": 8.251329179035151e-06, "loss": 0.2922, "step": 20373, "teacher_loss": 0.2731907069683075 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.32143378257751465, "learning_rate": 8.249300864121497e-06, "loss": 0.2561, "step": 20374, "teacher_loss": 0.24881336092948914 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.29442209005355835, "learning_rate": 8.247272703981127e-06, "loss": 0.3145, "step": 20375, "teacher_loss": 0.3166942000389099 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.4065555930137634, "learning_rate": 8.245244698660553e-06, "loss": 0.2559, "step": 20376, "teacher_loss": 0.23910623788833618 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.23264452815055847, "learning_rate": 8.243216848206267e-06, "loss": 0.1833, "step": 20377, "teacher_loss": 0.17780563235282898 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.24286848306655884, "learning_rate": 8.241189152664756e-06, "loss": 0.2228, "step": 20378, "teacher_loss": 0.22058901190757751 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.47323596477508545, "learning_rate": 8.239161612082515e-06, "loss": 0.2998, "step": 20379, "teacher_loss": 0.2805844843387604 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.25970566272735596, "learning_rate": 8.237134226506033e-06, "loss": 0.1895, "step": 20380, "teacher_loss": 0.18165087699890137 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 1.0713167190551758, "learning_rate": 8.235106995981783e-06, "loss": 0.273, "step": 20381, "teacher_loss": 0.1843479573726654 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.2944996953010559, "learning_rate": 8.233079920556246e-06, "loss": 0.2429, "step": 20382, "teacher_loss": 0.23720312118530273 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.2966660261154175, "learning_rate": 8.231053000275904e-06, "loss": 0.2405, "step": 20383, "teacher_loss": 0.23430955410003662 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.4842275083065033, "learning_rate": 8.229026235187215e-06, "loss": 0.1763, "step": 20384, "teacher_loss": 0.14207223057746887 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.3229365348815918, "learning_rate": 8.226999625336663e-06, "loss": 0.2425, "step": 20385, "teacher_loss": 0.2335261106491089 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.3411158323287964, "learning_rate": 8.224973170770695e-06, "loss": 0.2045, "step": 20386, "teacher_loss": 0.1892724335193634 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.782532811164856, "learning_rate": 8.222946871535786e-06, "loss": 0.3481, "step": 20387, "teacher_loss": 0.2998234033584595 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.7450612783432007, "learning_rate": 8.220920727678383e-06, "loss": 0.344, "step": 20388, "teacher_loss": 0.2994205355644226 }, { "compression_loss": 0.0, "epoch": 3.68, "label_loss": 0.1958162784576416, "learning_rate": 8.218894739244939e-06, "loss": 0.1665, "step": 20389, "teacher_loss": 0.16325756907463074 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3126941919326782, "learning_rate": 8.216868906281918e-06, "loss": 0.229, "step": 20390, "teacher_loss": 0.21969658136367798 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.22135841846466064, "learning_rate": 8.214843228835746e-06, "loss": 0.1789, "step": 20391, "teacher_loss": 0.17416563630104065 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.31070998311042786, "learning_rate": 8.212817706952878e-06, "loss": 0.2127, "step": 20392, "teacher_loss": 0.20176814496517181 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.4560564458370209, "learning_rate": 8.210792340679756e-06, "loss": 0.2079, "step": 20393, "teacher_loss": 0.18028277158737183 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.6389520764350891, "learning_rate": 8.208767130062805e-06, "loss": 0.3146, "step": 20394, "teacher_loss": 0.278572142124176 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.4027354419231415, "learning_rate": 8.206742075148462e-06, "loss": 0.2546, "step": 20395, "teacher_loss": 0.2381429523229599 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3905818462371826, "learning_rate": 8.204717175983163e-06, "loss": 0.304, "step": 20396, "teacher_loss": 0.2943894565105438 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.28477054834365845, "learning_rate": 8.20269243261332e-06, "loss": 0.1656, "step": 20397, "teacher_loss": 0.15231354534626007 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.6624535322189331, "learning_rate": 8.200667845085365e-06, "loss": 0.3406, "step": 20398, "teacher_loss": 0.30482351779937744 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.38137638568878174, "learning_rate": 8.198643413445705e-06, "loss": 0.1973, "step": 20399, "teacher_loss": 0.17686955630779266 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3406970500946045, "learning_rate": 8.19661913774076e-06, "loss": 0.2355, "step": 20400, "teacher_loss": 0.22382305562496185 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.7200182676315308, "learning_rate": 8.194595018016949e-06, "loss": 0.3264, "step": 20401, "teacher_loss": 0.2826971709728241 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3059428930282593, "learning_rate": 8.19257105432066e-06, "loss": 0.3012, "step": 20402, "teacher_loss": 0.3006381392478943 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3222476541996002, "learning_rate": 8.190547246698311e-06, "loss": 0.1706, "step": 20403, "teacher_loss": 0.1537092924118042 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.5360685586929321, "learning_rate": 8.1885235951963e-06, "loss": 0.1937, "step": 20404, "teacher_loss": 0.1556239277124405 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3255910873413086, "learning_rate": 8.186500099861019e-06, "loss": 0.2305, "step": 20405, "teacher_loss": 0.21998965740203857 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.37914547324180603, "learning_rate": 8.184476760738867e-06, "loss": 0.2086, "step": 20406, "teacher_loss": 0.18962402641773224 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.49698537588119507, "learning_rate": 8.182453577876224e-06, "loss": 0.2056, "step": 20407, "teacher_loss": 0.17320981621742249 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.13500294089317322, "learning_rate": 8.180430551319486e-06, "loss": 0.1808, "step": 20408, "teacher_loss": 0.18585175275802612 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.12996836006641388, "learning_rate": 8.178407681115023e-06, "loss": 0.1603, "step": 20409, "teacher_loss": 0.16366402804851532 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.4867998957633972, "learning_rate": 8.176384967309218e-06, "loss": 0.2598, "step": 20410, "teacher_loss": 0.23454934358596802 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.42236441373825073, "learning_rate": 8.174362409948456e-06, "loss": 0.2816, "step": 20411, "teacher_loss": 0.26599812507629395 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3552388846874237, "learning_rate": 8.172340009079091e-06, "loss": 0.2534, "step": 20412, "teacher_loss": 0.2420380711555481 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.4141189157962799, "learning_rate": 8.170317764747501e-06, "loss": 0.2162, "step": 20413, "teacher_loss": 0.19424453377723694 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.2065640091896057, "learning_rate": 8.168295677000054e-06, "loss": 0.151, "step": 20414, "teacher_loss": 0.14484231173992157 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.6622151136398315, "learning_rate": 8.166273745883098e-06, "loss": 0.3225, "step": 20415, "teacher_loss": 0.28471681475639343 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.42239901423454285, "learning_rate": 8.164251971442997e-06, "loss": 0.2291, "step": 20416, "teacher_loss": 0.2076021432876587 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.42003461718559265, "learning_rate": 8.16223035372611e-06, "loss": 0.2675, "step": 20417, "teacher_loss": 0.25051814317703247 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3556000590324402, "learning_rate": 8.160208892778775e-06, "loss": 0.2581, "step": 20418, "teacher_loss": 0.2472292184829712 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.7390826940536499, "learning_rate": 8.158187588647341e-06, "loss": 0.2276, "step": 20419, "teacher_loss": 0.17079684138298035 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3526287078857422, "learning_rate": 8.156166441378161e-06, "loss": 0.2206, "step": 20420, "teacher_loss": 0.20597627758979797 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.47965970635414124, "learning_rate": 8.154145451017565e-06, "loss": 0.1932, "step": 20421, "teacher_loss": 0.16135311126708984 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.5041465163230896, "learning_rate": 8.152124617611876e-06, "loss": 0.2461, "step": 20422, "teacher_loss": 0.21740445494651794 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.20443934202194214, "learning_rate": 8.150103941207451e-06, "loss": 0.1898, "step": 20423, "teacher_loss": 0.1882147490978241 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.4370315670967102, "learning_rate": 8.148083421850606e-06, "loss": 0.2358, "step": 20424, "teacher_loss": 0.2134111523628235 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.49659407138824463, "learning_rate": 8.146063059587657e-06, "loss": 0.314, "step": 20425, "teacher_loss": 0.29369789361953735 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.7438884973526001, "learning_rate": 8.144042854464936e-06, "loss": 0.347, "step": 20426, "teacher_loss": 0.30293387174606323 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.44723615050315857, "learning_rate": 8.142022806528761e-06, "loss": 0.2054, "step": 20427, "teacher_loss": 0.17856638133525848 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3505808115005493, "learning_rate": 8.140002915825437e-06, "loss": 0.2015, "step": 20428, "teacher_loss": 0.18493297696113586 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.22884199023246765, "learning_rate": 8.137983182401279e-06, "loss": 0.1508, "step": 20429, "teacher_loss": 0.14209480583667755 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.4753621816635132, "learning_rate": 8.135963606302597e-06, "loss": 0.2926, "step": 20430, "teacher_loss": 0.2723028361797333 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.30431801080703735, "learning_rate": 8.13394418757569e-06, "loss": 0.2518, "step": 20431, "teacher_loss": 0.24599260091781616 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.9572106003761292, "learning_rate": 8.131924926266848e-06, "loss": 0.3521, "step": 20432, "teacher_loss": 0.2849164605140686 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.2716759443283081, "learning_rate": 8.129905822422388e-06, "loss": 0.2579, "step": 20433, "teacher_loss": 0.25639498233795166 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.28855806589126587, "learning_rate": 8.127886876088589e-06, "loss": 0.1704, "step": 20434, "teacher_loss": 0.15729355812072754 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.3208698630332947, "learning_rate": 8.125868087311731e-06, "loss": 0.1982, "step": 20435, "teacher_loss": 0.18455222249031067 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.5896419882774353, "learning_rate": 8.123849456138114e-06, "loss": 0.2337, "step": 20436, "teacher_loss": 0.1941101849079132 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.6188450455665588, "learning_rate": 8.121830982614014e-06, "loss": 0.5363, "step": 20437, "teacher_loss": 0.527134895324707 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.8942652940750122, "learning_rate": 8.119812666785704e-06, "loss": 0.2672, "step": 20438, "teacher_loss": 0.19750502705574036 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.47525739669799805, "learning_rate": 8.117794508699462e-06, "loss": 0.24, "step": 20439, "teacher_loss": 0.21380913257598877 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.11387049406766891, "learning_rate": 8.115776508401564e-06, "loss": 0.187, "step": 20440, "teacher_loss": 0.19512397050857544 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.36860546469688416, "learning_rate": 8.113758665938264e-06, "loss": 0.1866, "step": 20441, "teacher_loss": 0.16636237502098083 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.7275445461273193, "learning_rate": 8.111740981355834e-06, "loss": 0.2781, "step": 20442, "teacher_loss": 0.2282119244337082 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.6481698155403137, "learning_rate": 8.109723454700533e-06, "loss": 0.2839, "step": 20443, "teacher_loss": 0.24345257878303528 }, { "compression_loss": 0.0, "epoch": 3.69, "label_loss": 0.23270158469676971, "learning_rate": 8.10770608601861e-06, "loss": 0.1955, "step": 20444, "teacher_loss": 0.19136837124824524 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.3991568386554718, "learning_rate": 8.105688875356324e-06, "loss": 0.2221, "step": 20445, "teacher_loss": 0.2023891657590866 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.2909599840641022, "learning_rate": 8.103671822759928e-06, "loss": 0.2311, "step": 20446, "teacher_loss": 0.22443795204162598 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.3990752398967743, "learning_rate": 8.10165492827566e-06, "loss": 0.3091, "step": 20447, "teacher_loss": 0.29912543296813965 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.2488056868314743, "learning_rate": 8.099638191949758e-06, "loss": 0.1994, "step": 20448, "teacher_loss": 0.19393882155418396 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.41436147689819336, "learning_rate": 8.097621613828462e-06, "loss": 0.2019, "step": 20449, "teacher_loss": 0.1782858520746231 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.32092559337615967, "learning_rate": 8.095605193958013e-06, "loss": 0.1798, "step": 20450, "teacher_loss": 0.16414344310760498 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.23810219764709473, "learning_rate": 8.093588932384631e-06, "loss": 0.2024, "step": 20451, "teacher_loss": 0.19839242100715637 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.4326157867908478, "learning_rate": 8.09157282915455e-06, "loss": 0.3352, "step": 20452, "teacher_loss": 0.32442739605903625 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.68401700258255, "learning_rate": 8.089556884313997e-06, "loss": 0.1915, "step": 20453, "teacher_loss": 0.13678179681301117 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.260090708732605, "learning_rate": 8.08754109790918e-06, "loss": 0.1551, "step": 20454, "teacher_loss": 0.14346373081207275 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.5561800003051758, "learning_rate": 8.085525469986321e-06, "loss": 0.2263, "step": 20455, "teacher_loss": 0.1896296739578247 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.37078726291656494, "learning_rate": 8.083510000591638e-06, "loss": 0.38, "step": 20456, "teacher_loss": 0.3810608386993408 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.5847737789154053, "learning_rate": 8.081494689771325e-06, "loss": 0.2577, "step": 20457, "teacher_loss": 0.22139260172843933 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.2347414791584015, "learning_rate": 8.079479537571608e-06, "loss": 0.1842, "step": 20458, "teacher_loss": 0.17853516340255737 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.46124717593193054, "learning_rate": 8.077464544038666e-06, "loss": 0.3103, "step": 20459, "teacher_loss": 0.29348236322402954 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.7428712844848633, "learning_rate": 8.07544970921871e-06, "loss": 0.338, "step": 20460, "teacher_loss": 0.29296165704727173 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.5688939690589905, "learning_rate": 8.073435033157934e-06, "loss": 0.2441, "step": 20461, "teacher_loss": 0.20806053280830383 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.43966642022132874, "learning_rate": 8.071420515902522e-06, "loss": 0.2762, "step": 20462, "teacher_loss": 0.25801873207092285 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.3009549677371979, "learning_rate": 8.069406157498664e-06, "loss": 0.1771, "step": 20463, "teacher_loss": 0.16330209374427795 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.6500614881515503, "learning_rate": 8.067391957992551e-06, "loss": 0.3557, "step": 20464, "teacher_loss": 0.3230045437812805 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.19565460085868835, "learning_rate": 8.065377917430348e-06, "loss": 0.2492, "step": 20465, "teacher_loss": 0.2551359534263611 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.3056241571903229, "learning_rate": 8.063364035858238e-06, "loss": 0.2543, "step": 20466, "teacher_loss": 0.24858754873275757 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.2654286026954651, "learning_rate": 8.0613503133224e-06, "loss": 0.1768, "step": 20467, "teacher_loss": 0.16696037352085114 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.7197321653366089, "learning_rate": 8.059336749868991e-06, "loss": 0.28, "step": 20468, "teacher_loss": 0.23116309940814972 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.494012713432312, "learning_rate": 8.057323345544185e-06, "loss": 0.2069, "step": 20469, "teacher_loss": 0.17501267790794373 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.9360468983650208, "learning_rate": 8.055310100394136e-06, "loss": 0.2705, "step": 20470, "teacher_loss": 0.196604922413826 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.4719622731208801, "learning_rate": 8.053297014465008e-06, "loss": 0.1503, "step": 20471, "teacher_loss": 0.1146031841635704 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.3433011472225189, "learning_rate": 8.051284087802949e-06, "loss": 0.1677, "step": 20472, "teacher_loss": 0.14820224046707153 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.881848931312561, "learning_rate": 8.049271320454114e-06, "loss": 0.2858, "step": 20473, "teacher_loss": 0.21952764689922333 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.5663677453994751, "learning_rate": 8.047258712464652e-06, "loss": 0.3023, "step": 20474, "teacher_loss": 0.272938996553421 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.4826279878616333, "learning_rate": 8.045246263880695e-06, "loss": 0.3438, "step": 20475, "teacher_loss": 0.32841426134109497 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.4675447940826416, "learning_rate": 8.043233974748392e-06, "loss": 0.22, "step": 20476, "teacher_loss": 0.1925058513879776 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.4167604148387909, "learning_rate": 8.04122184511388e-06, "loss": 0.2743, "step": 20477, "teacher_loss": 0.25842082500457764 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 1.0200319290161133, "learning_rate": 8.039209875023285e-06, "loss": 0.2375, "step": 20478, "teacher_loss": 0.15051937103271484 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.601428747177124, "learning_rate": 8.037198064522734e-06, "loss": 0.2172, "step": 20479, "teacher_loss": 0.17456106841564178 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.6416711807250977, "learning_rate": 8.035186413658364e-06, "loss": 0.2504, "step": 20480, "teacher_loss": 0.20696741342544556 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.333924263715744, "learning_rate": 8.033174922476288e-06, "loss": 0.1863, "step": 20481, "teacher_loss": 0.1699182242155075 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 1.2532771825790405, "learning_rate": 8.031163591022613e-06, "loss": 0.4716, "step": 20482, "teacher_loss": 0.38480043411254883 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.4358372092247009, "learning_rate": 8.029152419343472e-06, "loss": 0.2429, "step": 20483, "teacher_loss": 0.22141847014427185 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.35643208026885986, "learning_rate": 8.02714140748497e-06, "loss": 0.2264, "step": 20484, "teacher_loss": 0.21195971965789795 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.4319394826889038, "learning_rate": 8.025130555493195e-06, "loss": 0.2865, "step": 20485, "teacher_loss": 0.270307332277298 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.25577813386917114, "learning_rate": 8.023119863414276e-06, "loss": 0.1547, "step": 20486, "teacher_loss": 0.14348259568214417 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.39948272705078125, "learning_rate": 8.021109331294303e-06, "loss": 0.2236, "step": 20487, "teacher_loss": 0.20410682260990143 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.5584471225738525, "learning_rate": 8.019098959179361e-06, "loss": 0.2196, "step": 20488, "teacher_loss": 0.1819203794002533 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.15208375453948975, "learning_rate": 8.017088747115554e-06, "loss": 0.2366, "step": 20489, "teacher_loss": 0.2459491640329361 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.5692252516746521, "learning_rate": 8.015078695148968e-06, "loss": 0.3436, "step": 20490, "teacher_loss": 0.31855615973472595 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.47208714485168457, "learning_rate": 8.01306880332568e-06, "loss": 0.1907, "step": 20491, "teacher_loss": 0.15944108366966248 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.395516961812973, "learning_rate": 8.011059071691779e-06, "loss": 0.2957, "step": 20492, "teacher_loss": 0.2845984697341919 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.3867572546005249, "learning_rate": 8.009049500293344e-06, "loss": 0.2827, "step": 20493, "teacher_loss": 0.271151602268219 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.4019503891468048, "learning_rate": 8.007040089176443e-06, "loss": 0.2116, "step": 20494, "teacher_loss": 0.19040986895561218 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.32331639528274536, "learning_rate": 8.005030838387137e-06, "loss": 0.2701, "step": 20495, "teacher_loss": 0.26420462131500244 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.5309414863586426, "learning_rate": 8.003021747971513e-06, "loss": 0.2136, "step": 20496, "teacher_loss": 0.17832162976264954 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.6562752723693848, "learning_rate": 8.001012817975622e-06, "loss": 0.3282, "step": 20497, "teacher_loss": 0.2917705774307251 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.3987181782722473, "learning_rate": 7.999004048445518e-06, "loss": 0.1818, "step": 20498, "teacher_loss": 0.15769723057746887 }, { "compression_loss": 0.0, "epoch": 3.7, "label_loss": 0.2560056447982788, "learning_rate": 7.996995439427259e-06, "loss": 0.2161, "step": 20499, "teacher_loss": 0.21162426471710205 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.25209033489227295, "learning_rate": 7.994986990966905e-06, "loss": 0.2251, "step": 20500, "teacher_loss": 0.2221122682094574 }, { "epoch": 3.71, "eval_exact_match": 80.34058656575213, "eval_f1": 87.61933188818082, "step": 20500 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.29122698307037354, "learning_rate": 7.992978703110492e-06, "loss": 0.2498, "step": 20501, "teacher_loss": 0.2451673299074173 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.6233056783676147, "learning_rate": 7.99097057590407e-06, "loss": 0.2651, "step": 20502, "teacher_loss": 0.22530491650104523 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.39383214712142944, "learning_rate": 7.988962609393682e-06, "loss": 0.2576, "step": 20503, "teacher_loss": 0.24242591857910156 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.5649991035461426, "learning_rate": 7.986954803625357e-06, "loss": 0.2137, "step": 20504, "teacher_loss": 0.1746564507484436 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.5237628221511841, "learning_rate": 7.984947158645131e-06, "loss": 0.236, "step": 20505, "teacher_loss": 0.20397555828094482 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.34743866324424744, "learning_rate": 7.982939674499042e-06, "loss": 0.1408, "step": 20506, "teacher_loss": 0.11787965893745422 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.45824772119522095, "learning_rate": 7.980932351233102e-06, "loss": 0.2294, "step": 20507, "teacher_loss": 0.20394228398799896 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.3159356713294983, "learning_rate": 7.978925188893344e-06, "loss": 0.1924, "step": 20508, "teacher_loss": 0.17868517339229584 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.747072696685791, "learning_rate": 7.976918187525775e-06, "loss": 0.3485, "step": 20509, "teacher_loss": 0.304210364818573 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.3645329773426056, "learning_rate": 7.974911347176422e-06, "loss": 0.3194, "step": 20510, "teacher_loss": 0.3144038915634155 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.2930063009262085, "learning_rate": 7.972904667891285e-06, "loss": 0.205, "step": 20511, "teacher_loss": 0.19519363343715668 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 1.087515115737915, "learning_rate": 7.970898149716375e-06, "loss": 0.2569, "step": 20512, "teacher_loss": 0.16464795172214508 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.32083261013031006, "learning_rate": 7.9688917926977e-06, "loss": 0.1657, "step": 20513, "teacher_loss": 0.14846599102020264 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.6017273664474487, "learning_rate": 7.966885596881252e-06, "loss": 0.2634, "step": 20514, "teacher_loss": 0.22584998607635498 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.3221001923084259, "learning_rate": 7.964879562313029e-06, "loss": 0.3057, "step": 20515, "teacher_loss": 0.303842157125473 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.33737727999687195, "learning_rate": 7.962873689039033e-06, "loss": 0.207, "step": 20516, "teacher_loss": 0.19256901741027832 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.20019355416297913, "learning_rate": 7.96086797710524e-06, "loss": 0.1745, "step": 20517, "teacher_loss": 0.1716061234474182 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.6021059155464172, "learning_rate": 7.958862426557636e-06, "loss": 0.253, "step": 20518, "teacher_loss": 0.21420764923095703 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.29106423258781433, "learning_rate": 7.956857037442215e-06, "loss": 0.2019, "step": 20519, "teacher_loss": 0.192010298371315 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.5370165109634399, "learning_rate": 7.954851809804938e-06, "loss": 0.2338, "step": 20520, "teacher_loss": 0.2000667005777359 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.214020237326622, "learning_rate": 7.952846743691794e-06, "loss": 0.1847, "step": 20521, "teacher_loss": 0.18139652907848358 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.40170636773109436, "learning_rate": 7.95084183914874e-06, "loss": 0.2252, "step": 20522, "teacher_loss": 0.2055548131465912 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.701139509677887, "learning_rate": 7.948837096221747e-06, "loss": 0.2793, "step": 20523, "teacher_loss": 0.23241330683231354 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.4164860248565674, "learning_rate": 7.946832514956785e-06, "loss": 0.2416, "step": 20524, "teacher_loss": 0.2222120463848114 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.6345788240432739, "learning_rate": 7.944828095399802e-06, "loss": 0.3216, "step": 20525, "teacher_loss": 0.2868001461029053 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.38317978382110596, "learning_rate": 7.942823837596757e-06, "loss": 0.2723, "step": 20526, "teacher_loss": 0.2599836587905884 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.6318536400794983, "learning_rate": 7.94081974159361e-06, "loss": 0.3076, "step": 20527, "teacher_loss": 0.2715749740600586 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.29888448119163513, "learning_rate": 7.938815807436294e-06, "loss": 0.2304, "step": 20528, "teacher_loss": 0.2227899730205536 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.2001003623008728, "learning_rate": 7.936812035170764e-06, "loss": 0.2493, "step": 20529, "teacher_loss": 0.2547980546951294 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.859743058681488, "learning_rate": 7.93480842484296e-06, "loss": 0.255, "step": 20530, "teacher_loss": 0.18780821561813354 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.5746020078659058, "learning_rate": 7.932804976498817e-06, "loss": 0.4405, "step": 20531, "teacher_loss": 0.42557328939437866 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.359576940536499, "learning_rate": 7.930801690184265e-06, "loss": 0.2297, "step": 20532, "teacher_loss": 0.21526750922203064 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.44806063175201416, "learning_rate": 7.928798565945232e-06, "loss": 0.2501, "step": 20533, "teacher_loss": 0.22809848189353943 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.6142005324363708, "learning_rate": 7.926795603827655e-06, "loss": 0.2143, "step": 20534, "teacher_loss": 0.16982506215572357 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.4922184944152832, "learning_rate": 7.92479280387744e-06, "loss": 0.2158, "step": 20535, "teacher_loss": 0.18510878086090088 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.41362863779067993, "learning_rate": 7.922790166140516e-06, "loss": 0.3335, "step": 20536, "teacher_loss": 0.3245932459831238 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.5203946828842163, "learning_rate": 7.9207876906628e-06, "loss": 0.1985, "step": 20537, "teacher_loss": 0.1627473533153534 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.4432861804962158, "learning_rate": 7.91878537749019e-06, "loss": 0.207, "step": 20538, "teacher_loss": 0.18079635500907898 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.6002190709114075, "learning_rate": 7.916783226668602e-06, "loss": 0.2991, "step": 20539, "teacher_loss": 0.2655889689922333 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.16795042157173157, "learning_rate": 7.914781238243945e-06, "loss": 0.1573, "step": 20540, "teacher_loss": 0.15615800023078918 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.4106191098690033, "learning_rate": 7.912779412262105e-06, "loss": 0.1852, "step": 20541, "teacher_loss": 0.16012956202030182 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.4219675064086914, "learning_rate": 7.910777748768986e-06, "loss": 0.2838, "step": 20542, "teacher_loss": 0.2684195041656494 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.5335960388183594, "learning_rate": 7.908776247810482e-06, "loss": 0.2045, "step": 20543, "teacher_loss": 0.16792264580726624 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.3978830575942993, "learning_rate": 7.90677490943248e-06, "loss": 0.2129, "step": 20544, "teacher_loss": 0.19235210120677948 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.5176531076431274, "learning_rate": 7.904773733680847e-06, "loss": 0.2869, "step": 20545, "teacher_loss": 0.261284202337265 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.2776418924331665, "learning_rate": 7.902772720601498e-06, "loss": 0.1778, "step": 20546, "teacher_loss": 0.1666685789823532 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.6594192981719971, "learning_rate": 7.900771870240286e-06, "loss": 0.2932, "step": 20547, "teacher_loss": 0.2525408864021301 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.3557298481464386, "learning_rate": 7.898771182643087e-06, "loss": 0.1839, "step": 20548, "teacher_loss": 0.16476333141326904 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.2247595489025116, "learning_rate": 7.896770657855774e-06, "loss": 0.2201, "step": 20549, "teacher_loss": 0.2195352166891098 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.7136620283126831, "learning_rate": 7.89477029592422e-06, "loss": 0.3431, "step": 20550, "teacher_loss": 0.3018948435783386 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.48758912086486816, "learning_rate": 7.892770096894274e-06, "loss": 0.2377, "step": 20551, "teacher_loss": 0.2099277526140213 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.6213744878768921, "learning_rate": 7.8907700608118e-06, "loss": 0.3432, "step": 20552, "teacher_loss": 0.3123405873775482 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.49746182560920715, "learning_rate": 7.888770187722663e-06, "loss": 0.2357, "step": 20553, "teacher_loss": 0.20666344463825226 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.34353911876678467, "learning_rate": 7.886770477672703e-06, "loss": 0.2366, "step": 20554, "teacher_loss": 0.22466708719730377 }, { "compression_loss": 0.0, "epoch": 3.71, "label_loss": 0.4736207127571106, "learning_rate": 7.884770930707757e-06, "loss": 0.2277, "step": 20555, "teacher_loss": 0.20042431354522705 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.40838760137557983, "learning_rate": 7.88277154687369e-06, "loss": 0.2281, "step": 20556, "teacher_loss": 0.20807047188282013 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.5997644066810608, "learning_rate": 7.88077232621634e-06, "loss": 0.2878, "step": 20557, "teacher_loss": 0.253089040517807 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.47178590297698975, "learning_rate": 7.878773268781526e-06, "loss": 0.1858, "step": 20558, "teacher_loss": 0.15402144193649292 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.4570574164390564, "learning_rate": 7.87677437461509e-06, "loss": 0.2323, "step": 20559, "teacher_loss": 0.20727333426475525 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.551131546497345, "learning_rate": 7.874775643762868e-06, "loss": 0.2804, "step": 20560, "teacher_loss": 0.25032323598861694 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.33984100818634033, "learning_rate": 7.872777076270669e-06, "loss": 0.2632, "step": 20561, "teacher_loss": 0.25465041399002075 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.5567530393600464, "learning_rate": 7.870778672184328e-06, "loss": 0.2427, "step": 20562, "teacher_loss": 0.20775152742862701 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.35740435123443604, "learning_rate": 7.86878043154966e-06, "loss": 0.2136, "step": 20563, "teacher_loss": 0.19762060046195984 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.40884658694267273, "learning_rate": 7.866782354412471e-06, "loss": 0.3155, "step": 20564, "teacher_loss": 0.305100679397583 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.268097460269928, "learning_rate": 7.864784440818578e-06, "loss": 0.2241, "step": 20565, "teacher_loss": 0.2192152738571167 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.4740188717842102, "learning_rate": 7.862786690813789e-06, "loss": 0.2478, "step": 20566, "teacher_loss": 0.22263957560062408 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.43564820289611816, "learning_rate": 7.860789104443897e-06, "loss": 0.3101, "step": 20567, "teacher_loss": 0.2961238622665405 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 1.0727430582046509, "learning_rate": 7.858791681754707e-06, "loss": 0.4854, "step": 20568, "teacher_loss": 0.42014801502227783 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.6135869026184082, "learning_rate": 7.85679442279202e-06, "loss": 0.323, "step": 20569, "teacher_loss": 0.290690541267395 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.3265843391418457, "learning_rate": 7.854797327601614e-06, "loss": 0.2562, "step": 20570, "teacher_loss": 0.2483462244272232 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.5780649185180664, "learning_rate": 7.852800396229286e-06, "loss": 0.2737, "step": 20571, "teacher_loss": 0.23989325761795044 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.40878117084503174, "learning_rate": 7.850803628720814e-06, "loss": 0.3163, "step": 20572, "teacher_loss": 0.3060475289821625 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.388255774974823, "learning_rate": 7.848807025121985e-06, "loss": 0.2648, "step": 20573, "teacher_loss": 0.2510680556297302 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.3706388473510742, "learning_rate": 7.846810585478565e-06, "loss": 0.1865, "step": 20574, "teacher_loss": 0.1660592257976532 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.4351791739463806, "learning_rate": 7.844814309836334e-06, "loss": 0.275, "step": 20575, "teacher_loss": 0.2572292685508728 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.23523324728012085, "learning_rate": 7.84281819824106e-06, "loss": 0.1884, "step": 20576, "teacher_loss": 0.18315139412879944 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.6651009321212769, "learning_rate": 7.840822250738503e-06, "loss": 0.3068, "step": 20577, "teacher_loss": 0.26703932881355286 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.5947476625442505, "learning_rate": 7.838826467374426e-06, "loss": 0.2314, "step": 20578, "teacher_loss": 0.1910579800605774 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.3304585814476013, "learning_rate": 7.836830848194597e-06, "loss": 0.2398, "step": 20579, "teacher_loss": 0.2297612428665161 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.6935474872589111, "learning_rate": 7.83483539324475e-06, "loss": 0.2373, "step": 20580, "teacher_loss": 0.18655851483345032 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.7470519542694092, "learning_rate": 7.832840102570655e-06, "loss": 0.3217, "step": 20581, "teacher_loss": 0.2744293510913849 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.45566248893737793, "learning_rate": 7.83084497621804e-06, "loss": 0.2921, "step": 20582, "teacher_loss": 0.2738826274871826 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.23319372534751892, "learning_rate": 7.828850014232656e-06, "loss": 0.1893, "step": 20583, "teacher_loss": 0.18445329368114471 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.6765554547309875, "learning_rate": 7.826855216660247e-06, "loss": 0.3197, "step": 20584, "teacher_loss": 0.2800275683403015 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.2504131495952606, "learning_rate": 7.824860583546536e-06, "loss": 0.1816, "step": 20585, "teacher_loss": 0.1739581823348999 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.5527567863464355, "learning_rate": 7.82286611493726e-06, "loss": 0.2781, "step": 20586, "teacher_loss": 0.24757763743400574 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.32621315121650696, "learning_rate": 7.820871810878151e-06, "loss": 0.2305, "step": 20587, "teacher_loss": 0.21982556581497192 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.3054104745388031, "learning_rate": 7.81887767141492e-06, "loss": 0.2513, "step": 20588, "teacher_loss": 0.24533875286579132 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.4608084559440613, "learning_rate": 7.816883696593297e-06, "loss": 0.3438, "step": 20589, "teacher_loss": 0.3308277726173401 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.7486361265182495, "learning_rate": 7.814889886458999e-06, "loss": 0.5455, "step": 20590, "teacher_loss": 0.5228923559188843 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.2961157560348511, "learning_rate": 7.812896241057728e-06, "loss": 0.2293, "step": 20591, "teacher_loss": 0.22187002003192902 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.46913662552833557, "learning_rate": 7.810902760435198e-06, "loss": 0.2862, "step": 20592, "teacher_loss": 0.26582786440849304 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.5202335715293884, "learning_rate": 7.808909444637119e-06, "loss": 0.2352, "step": 20593, "teacher_loss": 0.20353004336357117 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.25242379307746887, "learning_rate": 7.806916293709188e-06, "loss": 0.3006, "step": 20594, "teacher_loss": 0.3059537410736084 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.36521148681640625, "learning_rate": 7.804923307697092e-06, "loss": 0.2378, "step": 20595, "teacher_loss": 0.22366078197956085 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.24222902953624725, "learning_rate": 7.802930486646534e-06, "loss": 0.1539, "step": 20596, "teacher_loss": 0.14406165480613708 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.2768211364746094, "learning_rate": 7.800937830603208e-06, "loss": 0.1774, "step": 20597, "teacher_loss": 0.16636043787002563 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.4194197356700897, "learning_rate": 7.798945339612787e-06, "loss": 0.2837, "step": 20598, "teacher_loss": 0.2685825824737549 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.48154306411743164, "learning_rate": 7.796953013720961e-06, "loss": 0.2673, "step": 20599, "teacher_loss": 0.24351122975349426 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.439342200756073, "learning_rate": 7.794960852973413e-06, "loss": 0.2898, "step": 20600, "teacher_loss": 0.27313506603240967 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.39674484729766846, "learning_rate": 7.792968857415803e-06, "loss": 0.1639, "step": 20601, "teacher_loss": 0.13806486129760742 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.5691717267036438, "learning_rate": 7.790977027093808e-06, "loss": 0.3161, "step": 20602, "teacher_loss": 0.28793853521347046 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.21447071433067322, "learning_rate": 7.788985362053105e-06, "loss": 0.2644, "step": 20603, "teacher_loss": 0.26990869641304016 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.7089633941650391, "learning_rate": 7.786993862339347e-06, "loss": 0.2653, "step": 20604, "teacher_loss": 0.21601277589797974 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.8200685977935791, "learning_rate": 7.785002527998182e-06, "loss": 0.3423, "step": 20605, "teacher_loss": 0.2891594469547272 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.2618734538555145, "learning_rate": 7.78301135907529e-06, "loss": 0.2311, "step": 20606, "teacher_loss": 0.2276328057050705 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.7955800294876099, "learning_rate": 7.781020355616309e-06, "loss": 0.3536, "step": 20607, "teacher_loss": 0.30448412895202637 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.612231969833374, "learning_rate": 7.779029517666873e-06, "loss": 0.2833, "step": 20608, "teacher_loss": 0.2466975748538971 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.48443174362182617, "learning_rate": 7.777038845272656e-06, "loss": 0.4876, "step": 20609, "teacher_loss": 0.48799023032188416 }, { "compression_loss": 0.0, "epoch": 3.72, "label_loss": 0.3143419027328491, "learning_rate": 7.775048338479282e-06, "loss": 0.2213, "step": 20610, "teacher_loss": 0.2109655737876892 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.8374941945075989, "learning_rate": 7.773057997332384e-06, "loss": 0.2836, "step": 20611, "teacher_loss": 0.22203822433948517 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.8722686767578125, "learning_rate": 7.771067821877594e-06, "loss": 0.2804, "step": 20612, "teacher_loss": 0.214582160115242 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.4468388259410858, "learning_rate": 7.769077812160555e-06, "loss": 0.2787, "step": 20613, "teacher_loss": 0.2599703073501587 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.9476112127304077, "learning_rate": 7.767087968226875e-06, "loss": 0.2766, "step": 20614, "teacher_loss": 0.20204895734786987 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.34413790702819824, "learning_rate": 7.765098290122182e-06, "loss": 0.1868, "step": 20615, "teacher_loss": 0.1693127453327179 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.34676483273506165, "learning_rate": 7.763108777892101e-06, "loss": 0.3059, "step": 20616, "teacher_loss": 0.3013885021209717 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.2990168631076813, "learning_rate": 7.76111943158224e-06, "loss": 0.1938, "step": 20617, "teacher_loss": 0.1820741593837738 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.3342002332210541, "learning_rate": 7.759130251238194e-06, "loss": 0.2255, "step": 20618, "teacher_loss": 0.21344928443431854 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.25176236033439636, "learning_rate": 7.75714123690559e-06, "loss": 0.2296, "step": 20619, "teacher_loss": 0.22711308300495148 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.46062982082366943, "learning_rate": 7.755152388630026e-06, "loss": 0.2612, "step": 20620, "teacher_loss": 0.23902156949043274 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.5796553492546082, "learning_rate": 7.75316370645709e-06, "loss": 0.2774, "step": 20621, "teacher_loss": 0.24383430182933807 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.39046692848205566, "learning_rate": 7.751175190432383e-06, "loss": 0.2098, "step": 20622, "teacher_loss": 0.18975865840911865 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.695408821105957, "learning_rate": 7.749186840601503e-06, "loss": 0.1808, "step": 20623, "teacher_loss": 0.12367603927850723 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.2626107931137085, "learning_rate": 7.747198657010022e-06, "loss": 0.1788, "step": 20624, "teacher_loss": 0.16947954893112183 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.6986124515533447, "learning_rate": 7.745210639703533e-06, "loss": 0.2719, "step": 20625, "teacher_loss": 0.2245016098022461 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.8883216977119446, "learning_rate": 7.743222788727616e-06, "loss": 0.3389, "step": 20626, "teacher_loss": 0.2778290808200836 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.6360207796096802, "learning_rate": 7.74123510412784e-06, "loss": 0.2562, "step": 20627, "teacher_loss": 0.21398407220840454 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.2795759439468384, "learning_rate": 7.739247585949779e-06, "loss": 0.2054, "step": 20628, "teacher_loss": 0.19721169769763947 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.6199157238006592, "learning_rate": 7.737260234239006e-06, "loss": 0.2707, "step": 20629, "teacher_loss": 0.23194241523742676 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.31909239292144775, "learning_rate": 7.735273049041078e-06, "loss": 0.1741, "step": 20630, "teacher_loss": 0.15800046920776367 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.6406176090240479, "learning_rate": 7.733286030401564e-06, "loss": 0.2035, "step": 20631, "teacher_loss": 0.1549123078584671 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.2993151545524597, "learning_rate": 7.731299178366008e-06, "loss": 0.1449, "step": 20632, "teacher_loss": 0.1277957260608673 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.4320480227470398, "learning_rate": 7.729312492979967e-06, "loss": 0.1793, "step": 20633, "teacher_loss": 0.1511736512184143 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.4288039207458496, "learning_rate": 7.727325974289e-06, "loss": 0.2501, "step": 20634, "teacher_loss": 0.23027953505516052 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.3129156827926636, "learning_rate": 7.725339622338639e-06, "loss": 0.2408, "step": 20635, "teacher_loss": 0.2328205406665802 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.2605130672454834, "learning_rate": 7.723353437174435e-06, "loss": 0.1698, "step": 20636, "teacher_loss": 0.1597018539905548 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.446674108505249, "learning_rate": 7.721367418841914e-06, "loss": 0.2766, "step": 20637, "teacher_loss": 0.257689893245697 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.6010599136352539, "learning_rate": 7.719381567386615e-06, "loss": 0.3327, "step": 20638, "teacher_loss": 0.3028751611709595 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.352644681930542, "learning_rate": 7.717395882854075e-06, "loss": 0.2538, "step": 20639, "teacher_loss": 0.2428404688835144 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.6668270826339722, "learning_rate": 7.715410365289806e-06, "loss": 0.2157, "step": 20640, "teacher_loss": 0.1655881702899933 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.32208502292633057, "learning_rate": 7.713425014739337e-06, "loss": 0.2557, "step": 20641, "teacher_loss": 0.248269185423851 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.2823813557624817, "learning_rate": 7.711439831248192e-06, "loss": 0.1525, "step": 20642, "teacher_loss": 0.13801613450050354 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.4927947223186493, "learning_rate": 7.709454814861876e-06, "loss": 0.3049, "step": 20643, "teacher_loss": 0.28396880626678467 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.552959680557251, "learning_rate": 7.707469965625907e-06, "loss": 0.3861, "step": 20644, "teacher_loss": 0.3675660789012909 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.14102678000926971, "learning_rate": 7.70548528358578e-06, "loss": 0.17, "step": 20645, "teacher_loss": 0.17317049205303192 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.5988131761550903, "learning_rate": 7.703500768787008e-06, "loss": 0.2357, "step": 20646, "teacher_loss": 0.19538281857967377 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.503410816192627, "learning_rate": 7.701516421275092e-06, "loss": 0.2283, "step": 20647, "teacher_loss": 0.197728231549263 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.2142864167690277, "learning_rate": 7.699532241095518e-06, "loss": 0.2193, "step": 20648, "teacher_loss": 0.2198556363582611 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.44591790437698364, "learning_rate": 7.69754822829378e-06, "loss": 0.2409, "step": 20649, "teacher_loss": 0.21816131472587585 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.40563446283340454, "learning_rate": 7.695564382915374e-06, "loss": 0.1998, "step": 20650, "teacher_loss": 0.17687420547008514 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.28687727451324463, "learning_rate": 7.69358070500577e-06, "loss": 0.2426, "step": 20651, "teacher_loss": 0.23770327866077423 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.626595139503479, "learning_rate": 7.691597194610456e-06, "loss": 0.232, "step": 20652, "teacher_loss": 0.18810918927192688 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.3314914107322693, "learning_rate": 7.689613851774911e-06, "loss": 0.1745, "step": 20653, "teacher_loss": 0.15703517198562622 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.4683282971382141, "learning_rate": 7.687630676544605e-06, "loss": 0.2813, "step": 20654, "teacher_loss": 0.2605719268321991 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.9122825860977173, "learning_rate": 7.685647668964988e-06, "loss": 0.2831, "step": 20655, "teacher_loss": 0.21318268775939941 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.4241066873073578, "learning_rate": 7.683664829081557e-06, "loss": 0.2378, "step": 20656, "teacher_loss": 0.2171216458082199 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 1.0855133533477783, "learning_rate": 7.681682156939752e-06, "loss": 0.3129, "step": 20657, "teacher_loss": 0.22705848515033722 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.2217554748058319, "learning_rate": 7.67969965258503e-06, "loss": 0.1764, "step": 20658, "teacher_loss": 0.1713918000459671 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.3536851406097412, "learning_rate": 7.677717316062849e-06, "loss": 0.2638, "step": 20659, "teacher_loss": 0.2537705898284912 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.27580326795578003, "learning_rate": 7.67573514741866e-06, "loss": 0.2091, "step": 20660, "teacher_loss": 0.20165014266967773 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.5255050659179688, "learning_rate": 7.6737531466979e-06, "loss": 0.2815, "step": 20661, "teacher_loss": 0.25439292192459106 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.46773040294647217, "learning_rate": 7.671771313946015e-06, "loss": 0.223, "step": 20662, "teacher_loss": 0.1957968771457672 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.32312217354774475, "learning_rate": 7.669789649208449e-06, "loss": 0.258, "step": 20663, "teacher_loss": 0.25072628259658813 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.3443107604980469, "learning_rate": 7.667808152530622e-06, "loss": 0.1874, "step": 20664, "teacher_loss": 0.16992640495300293 }, { "compression_loss": 0.0, "epoch": 3.73, "label_loss": 0.618716835975647, "learning_rate": 7.66582682395797e-06, "loss": 0.2242, "step": 20665, "teacher_loss": 0.180341899394989 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.3502388894557953, "learning_rate": 7.663845663535928e-06, "loss": 0.1764, "step": 20666, "teacher_loss": 0.15708932280540466 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.4204500913619995, "learning_rate": 7.661864671309908e-06, "loss": 0.246, "step": 20667, "teacher_loss": 0.22665318846702576 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.436565101146698, "learning_rate": 7.65988384732532e-06, "loss": 0.2139, "step": 20668, "teacher_loss": 0.18915501236915588 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.3638366460800171, "learning_rate": 7.6579031916276e-06, "loss": 0.2104, "step": 20669, "teacher_loss": 0.19330652058124542 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.5326085686683655, "learning_rate": 7.65592270426215e-06, "loss": 0.1972, "step": 20670, "teacher_loss": 0.15993425250053406 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.35313689708709717, "learning_rate": 7.653942385274362e-06, "loss": 0.2106, "step": 20671, "teacher_loss": 0.19481004774570465 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.48107102513313293, "learning_rate": 7.651962234709655e-06, "loss": 0.4124, "step": 20672, "teacher_loss": 0.404815673828125 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.6386967897415161, "learning_rate": 7.649982252613428e-06, "loss": 0.2845, "step": 20673, "teacher_loss": 0.24518311023712158 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.4580540060997009, "learning_rate": 7.648002439031065e-06, "loss": 0.2202, "step": 20674, "teacher_loss": 0.19380971789360046 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.6248688101768494, "learning_rate": 7.646022794007966e-06, "loss": 0.359, "step": 20675, "teacher_loss": 0.3294230103492737 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.40935218334198, "learning_rate": 7.64404331758952e-06, "loss": 0.1811, "step": 20676, "teacher_loss": 0.15575991570949554 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.4022122919559479, "learning_rate": 7.642064009821106e-06, "loss": 0.2564, "step": 20677, "teacher_loss": 0.24018919467926025 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.34272414445877075, "learning_rate": 7.64008487074809e-06, "loss": 0.1919, "step": 20678, "teacher_loss": 0.17516404390335083 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.36190497875213623, "learning_rate": 7.638105900415878e-06, "loss": 0.232, "step": 20679, "teacher_loss": 0.21755488216876984 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.4359971284866333, "learning_rate": 7.636127098869824e-06, "loss": 0.2478, "step": 20680, "teacher_loss": 0.22691220045089722 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.42522379755973816, "learning_rate": 7.634148466155292e-06, "loss": 0.1884, "step": 20681, "teacher_loss": 0.16203323006629944 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.31441712379455566, "learning_rate": 7.632170002317649e-06, "loss": 0.2581, "step": 20682, "teacher_loss": 0.2518788278102875 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.4209899306297302, "learning_rate": 7.630191707402267e-06, "loss": 0.2544, "step": 20683, "teacher_loss": 0.23583492636680603 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.44638535380363464, "learning_rate": 7.628213581454485e-06, "loss": 0.3165, "step": 20684, "teacher_loss": 0.30206233263015747 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.3177550733089447, "learning_rate": 7.626235624519665e-06, "loss": 0.2349, "step": 20685, "teacher_loss": 0.22569599747657776 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.38468509912490845, "learning_rate": 7.624257836643162e-06, "loss": 0.213, "step": 20686, "teacher_loss": 0.19397558271884918 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.41876256465911865, "learning_rate": 7.6222802178703064e-06, "loss": 0.2505, "step": 20687, "teacher_loss": 0.23175761103630066 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.2677580416202545, "learning_rate": 7.620302768246445e-06, "loss": 0.183, "step": 20688, "teacher_loss": 0.1735418140888214 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.3936900794506073, "learning_rate": 7.618325487816921e-06, "loss": 0.2039, "step": 20689, "teacher_loss": 0.18284907937049866 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.49180394411087036, "learning_rate": 7.616348376627057e-06, "loss": 0.2276, "step": 20690, "teacher_loss": 0.19824816286563873 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.7609177231788635, "learning_rate": 7.614371434722187e-06, "loss": 0.3484, "step": 20691, "teacher_loss": 0.30254560708999634 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.3338005542755127, "learning_rate": 7.612394662147643e-06, "loss": 0.2378, "step": 20692, "teacher_loss": 0.2271246314048767 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.6402816772460938, "learning_rate": 7.6104180589487354e-06, "loss": 0.325, "step": 20693, "teacher_loss": 0.28994888067245483 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.45723778009414673, "learning_rate": 7.608441625170791e-06, "loss": 0.2839, "step": 20694, "teacher_loss": 0.2646586298942566 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.38231393694877625, "learning_rate": 7.606465360859113e-06, "loss": 0.2183, "step": 20695, "teacher_loss": 0.20008337497711182 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.7269190549850464, "learning_rate": 7.604489266059019e-06, "loss": 0.4847, "step": 20696, "teacher_loss": 0.45775866508483887 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.36590510606765747, "learning_rate": 7.602513340815817e-06, "loss": 0.2195, "step": 20697, "teacher_loss": 0.2031954973936081 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.3498404324054718, "learning_rate": 7.600537585174802e-06, "loss": 0.1947, "step": 20698, "teacher_loss": 0.1774539351463318 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.3963054418563843, "learning_rate": 7.598561999181278e-06, "loss": 0.2276, "step": 20699, "teacher_loss": 0.20884516835212708 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.43781328201293945, "learning_rate": 7.596586582880535e-06, "loss": 0.287, "step": 20700, "teacher_loss": 0.2702672779560089 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.3699612617492676, "learning_rate": 7.5946113363178615e-06, "loss": 0.1846, "step": 20701, "teacher_loss": 0.16404679417610168 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.29414916038513184, "learning_rate": 7.592636259538556e-06, "loss": 0.2136, "step": 20702, "teacher_loss": 0.20467005670070648 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.19703754782676697, "learning_rate": 7.5906613525878845e-06, "loss": 0.1851, "step": 20703, "teacher_loss": 0.18382486701011658 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.3419671952724457, "learning_rate": 7.588686615511141e-06, "loss": 0.295, "step": 20704, "teacher_loss": 0.28976479172706604 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.5558841228485107, "learning_rate": 7.5867120483535865e-06, "loss": 0.2365, "step": 20705, "teacher_loss": 0.20102283358573914 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.2001648098230362, "learning_rate": 7.584737651160498e-06, "loss": 0.1912, "step": 20706, "teacher_loss": 0.19014906883239746 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.2834233343601227, "learning_rate": 7.5827634239771495e-06, "loss": 0.1782, "step": 20707, "teacher_loss": 0.16655324399471283 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 1.2182042598724365, "learning_rate": 7.580789366848794e-06, "loss": 0.2955, "step": 20708, "teacher_loss": 0.19293151795864105 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.8388482332229614, "learning_rate": 7.578815479820692e-06, "loss": 0.2043, "step": 20709, "teacher_loss": 0.13382437825202942 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.15370246767997742, "learning_rate": 7.576841762938108e-06, "loss": 0.1682, "step": 20710, "teacher_loss": 0.16980718076229095 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.7117568254470825, "learning_rate": 7.574868216246279e-06, "loss": 0.244, "step": 20711, "teacher_loss": 0.19198210537433624 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.17348745465278625, "learning_rate": 7.572894839790462e-06, "loss": 0.1884, "step": 20712, "teacher_loss": 0.19007861614227295 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.9110016822814941, "learning_rate": 7.570921633615904e-06, "loss": 0.3511, "step": 20713, "teacher_loss": 0.2889332175254822 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.6120614409446716, "learning_rate": 7.568948597767833e-06, "loss": 0.2009, "step": 20714, "teacher_loss": 0.1552184671163559 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.2810188829898834, "learning_rate": 7.566975732291491e-06, "loss": 0.2074, "step": 20715, "teacher_loss": 0.19925513863563538 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.49374228715896606, "learning_rate": 7.565003037232119e-06, "loss": 0.2664, "step": 20716, "teacher_loss": 0.24116943776607513 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.25371938943862915, "learning_rate": 7.563030512634932e-06, "loss": 0.2118, "step": 20717, "teacher_loss": 0.20717597007751465 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.7613872289657593, "learning_rate": 7.561058158545153e-06, "loss": 0.2676, "step": 20718, "teacher_loss": 0.21278022229671478 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.14412908256053925, "learning_rate": 7.55908597500801e-06, "loss": 0.1399, "step": 20719, "teacher_loss": 0.13938085734844208 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.38786977529525757, "learning_rate": 7.557113962068721e-06, "loss": 0.3816, "step": 20720, "teacher_loss": 0.38089269399642944 }, { "compression_loss": 0.0, "epoch": 3.74, "label_loss": 0.26191383600234985, "learning_rate": 7.555142119772488e-06, "loss": 0.1478, "step": 20721, "teacher_loss": 0.13510656356811523 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.4136655330657959, "learning_rate": 7.553170448164524e-06, "loss": 0.276, "step": 20722, "teacher_loss": 0.260692298412323 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.25625112652778625, "learning_rate": 7.551198947290043e-06, "loss": 0.1773, "step": 20723, "teacher_loss": 0.16848014295101166 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.23818835616111755, "learning_rate": 7.549227617194231e-06, "loss": 0.1623, "step": 20724, "teacher_loss": 0.15389969944953918 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.40371131896972656, "learning_rate": 7.547256457922291e-06, "loss": 0.2064, "step": 20725, "teacher_loss": 0.18444772064685822 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.3765753507614136, "learning_rate": 7.54528546951942e-06, "loss": 0.2419, "step": 20726, "teacher_loss": 0.22689582407474518 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.5859529376029968, "learning_rate": 7.5433146520308045e-06, "loss": 0.2196, "step": 20727, "teacher_loss": 0.17893315851688385 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.244625985622406, "learning_rate": 7.541344005501617e-06, "loss": 0.1744, "step": 20728, "teacher_loss": 0.16655270755290985 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.2897653579711914, "learning_rate": 7.539373529977059e-06, "loss": 0.142, "step": 20729, "teacher_loss": 0.12561453878879547 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.4354771673679352, "learning_rate": 7.5374032255022975e-06, "loss": 0.2498, "step": 20730, "teacher_loss": 0.2292238026857376 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.3942147493362427, "learning_rate": 7.535433092122496e-06, "loss": 0.2816, "step": 20731, "teacher_loss": 0.26903682947158813 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.244733527302742, "learning_rate": 7.533463129882844e-06, "loss": 0.2135, "step": 20732, "teacher_loss": 0.21001559495925903 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.4936160147190094, "learning_rate": 7.531493338828499e-06, "loss": 0.3877, "step": 20733, "teacher_loss": 0.3759553134441376 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.6860644817352295, "learning_rate": 7.529523719004612e-06, "loss": 0.3148, "step": 20734, "teacher_loss": 0.2735133171081543 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.624211311340332, "learning_rate": 7.527554270456349e-06, "loss": 0.272, "step": 20735, "teacher_loss": 0.23286965489387512 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.4572480320930481, "learning_rate": 7.525584993228869e-06, "loss": 0.1802, "step": 20736, "teacher_loss": 0.14945730566978455 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.32000505924224854, "learning_rate": 7.52361588736731e-06, "loss": 0.1868, "step": 20737, "teacher_loss": 0.17198756337165833 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.5569369792938232, "learning_rate": 7.521646952916823e-06, "loss": 0.2913, "step": 20738, "teacher_loss": 0.26179802417755127 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.36998504400253296, "learning_rate": 7.519678189922555e-06, "loss": 0.2243, "step": 20739, "teacher_loss": 0.20809680223464966 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.4210659861564636, "learning_rate": 7.517709598429641e-06, "loss": 0.2203, "step": 20740, "teacher_loss": 0.19799906015396118 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.2743741273880005, "learning_rate": 7.5157411784832e-06, "loss": 0.1695, "step": 20741, "teacher_loss": 0.15788379311561584 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.49082091450691223, "learning_rate": 7.513772930128386e-06, "loss": 0.2051, "step": 20742, "teacher_loss": 0.1733470857143402 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.6030537486076355, "learning_rate": 7.511804853410314e-06, "loss": 0.281, "step": 20743, "teacher_loss": 0.24524493515491486 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.4425712823867798, "learning_rate": 7.509836948374102e-06, "loss": 0.2721, "step": 20744, "teacher_loss": 0.25315406918525696 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.43324679136276245, "learning_rate": 7.507869215064871e-06, "loss": 0.1806, "step": 20745, "teacher_loss": 0.1525239795446396 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.20522859692573547, "learning_rate": 7.50590165352774e-06, "loss": 0.2204, "step": 20746, "teacher_loss": 0.22207316756248474 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.13888667523860931, "learning_rate": 7.503934263807813e-06, "loss": 0.1978, "step": 20747, "teacher_loss": 0.20436108112335205 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.25033554434776306, "learning_rate": 7.5019670459501974e-06, "loss": 0.1598, "step": 20748, "teacher_loss": 0.14969266951084137 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.2894870638847351, "learning_rate": 7.500000000000004e-06, "loss": 0.2212, "step": 20749, "teacher_loss": 0.2136184275150299 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 1.0707168579101562, "learning_rate": 7.498033126002317e-06, "loss": 0.335, "step": 20750, "teacher_loss": 0.2532368302345276 }, { "epoch": 3.75, "eval_exact_match": 80.40681173131505, "eval_f1": 87.72404554738125, "step": 20750 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.20284032821655273, "learning_rate": 7.496066424002239e-06, "loss": 0.1546, "step": 20751, "teacher_loss": 0.1491994708776474 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.26079559326171875, "learning_rate": 7.4940998940448654e-06, "loss": 0.1975, "step": 20752, "teacher_loss": 0.19049356877803802 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.2134438455104828, "learning_rate": 7.492133536175272e-06, "loss": 0.2185, "step": 20753, "teacher_loss": 0.2190854847431183 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.6453027725219727, "learning_rate": 7.490167350438553e-06, "loss": 0.2175, "step": 20754, "teacher_loss": 0.16995762288570404 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.6194392442703247, "learning_rate": 7.4882013368797745e-06, "loss": 0.2662, "step": 20755, "teacher_loss": 0.22695858776569366 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.3406556248664856, "learning_rate": 7.486235495544019e-06, "loss": 0.2577, "step": 20756, "teacher_loss": 0.2484908401966095 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.601486086845398, "learning_rate": 7.484269826476361e-06, "loss": 0.2502, "step": 20757, "teacher_loss": 0.21122130751609802 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.42010968923568726, "learning_rate": 7.482304329721857e-06, "loss": 0.1927, "step": 20758, "teacher_loss": 0.167444109916687 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.5542106628417969, "learning_rate": 7.480339005325576e-06, "loss": 0.341, "step": 20759, "teacher_loss": 0.3173280358314514 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.6838760375976562, "learning_rate": 7.478373853332581e-06, "loss": 0.3701, "step": 20760, "teacher_loss": 0.33525902032852173 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.6941088438034058, "learning_rate": 7.476408873787918e-06, "loss": 0.2933, "step": 20761, "teacher_loss": 0.2487148493528366 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.1887531280517578, "learning_rate": 7.474444066736647e-06, "loss": 0.1747, "step": 20762, "teacher_loss": 0.17308753728866577 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.392214298248291, "learning_rate": 7.472479432223806e-06, "loss": 0.24, "step": 20763, "teacher_loss": 0.22308039665222168 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.2423935979604721, "learning_rate": 7.470514970294443e-06, "loss": 0.1707, "step": 20764, "teacher_loss": 0.1626831591129303 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.10280925035476685, "learning_rate": 7.468550680993604e-06, "loss": 0.1331, "step": 20765, "teacher_loss": 0.13645391166210175 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.3067972958087921, "learning_rate": 7.466586564366309e-06, "loss": 0.1922, "step": 20766, "teacher_loss": 0.1795041859149933 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.9471680521965027, "learning_rate": 7.464622620457605e-06, "loss": 0.2577, "step": 20767, "teacher_loss": 0.1810496747493744 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.15797367691993713, "learning_rate": 7.462658849312507e-06, "loss": 0.163, "step": 20768, "teacher_loss": 0.16355343163013458 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.5739122629165649, "learning_rate": 7.460695250976042e-06, "loss": 0.287, "step": 20769, "teacher_loss": 0.25512754917144775 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.9545706510543823, "learning_rate": 7.458731825493237e-06, "loss": 0.5573, "step": 20770, "teacher_loss": 0.5131410360336304 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.2062109261751175, "learning_rate": 7.456768572909097e-06, "loss": 0.2129, "step": 20771, "teacher_loss": 0.21362704038619995 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.9647200107574463, "learning_rate": 7.454805493268635e-06, "loss": 0.5632, "step": 20772, "teacher_loss": 0.5185519456863403 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.268298476934433, "learning_rate": 7.4528425866168705e-06, "loss": 0.2095, "step": 20773, "teacher_loss": 0.2029745727777481 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.969589114189148, "learning_rate": 7.450879852998791e-06, "loss": 0.4408, "step": 20774, "teacher_loss": 0.3820270299911499 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.4565858542919159, "learning_rate": 7.4489172924594014e-06, "loss": 0.2322, "step": 20775, "teacher_loss": 0.20724791288375854 }, { "compression_loss": 0.0, "epoch": 3.75, "label_loss": 0.6731314659118652, "learning_rate": 7.446954905043707e-06, "loss": 0.2596, "step": 20776, "teacher_loss": 0.21364635229110718 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.4056048095226288, "learning_rate": 7.444992690796691e-06, "loss": 0.2527, "step": 20777, "teacher_loss": 0.23567092418670654 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.2888379693031311, "learning_rate": 7.4430306497633286e-06, "loss": 0.3097, "step": 20778, "teacher_loss": 0.31204405426979065 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.5134663581848145, "learning_rate": 7.441068781988628e-06, "loss": 0.3031, "step": 20779, "teacher_loss": 0.27977266907691956 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.3733491897583008, "learning_rate": 7.439107087517559e-06, "loss": 0.2276, "step": 20780, "teacher_loss": 0.21140369772911072 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.23875029385089874, "learning_rate": 7.437145566395088e-06, "loss": 0.205, "step": 20781, "teacher_loss": 0.2012852132320404 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.46868765354156494, "learning_rate": 7.435184218666195e-06, "loss": 0.2676, "step": 20782, "teacher_loss": 0.24521180987358093 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.4392552077770233, "learning_rate": 7.433223044375854e-06, "loss": 0.2336, "step": 20783, "teacher_loss": 0.21076600253582 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.25900956988334656, "learning_rate": 7.431262043569016e-06, "loss": 0.2107, "step": 20784, "teacher_loss": 0.205328106880188 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.49829140305519104, "learning_rate": 7.429301216290648e-06, "loss": 0.2545, "step": 20785, "teacher_loss": 0.2273646593093872 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.9398878216743469, "learning_rate": 7.42734056258571e-06, "loss": 0.372, "step": 20786, "teacher_loss": 0.3088604211807251 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.4491899013519287, "learning_rate": 7.425380082499144e-06, "loss": 0.24, "step": 20787, "teacher_loss": 0.2167256772518158 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.4651971757411957, "learning_rate": 7.4234197760759015e-06, "loss": 0.286, "step": 20788, "teacher_loss": 0.26613929867744446 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.7286639213562012, "learning_rate": 7.421459643360934e-06, "loss": 0.2367, "step": 20789, "teacher_loss": 0.18205049633979797 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.4562970995903015, "learning_rate": 7.419499684399175e-06, "loss": 0.2122, "step": 20790, "teacher_loss": 0.18506991863250732 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.4934269189834595, "learning_rate": 7.417539899235549e-06, "loss": 0.2909, "step": 20791, "teacher_loss": 0.26839479804039 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.5747343897819519, "learning_rate": 7.415580287915012e-06, "loss": 0.3562, "step": 20792, "teacher_loss": 0.3319389522075653 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.3341788053512573, "learning_rate": 7.413620850482479e-06, "loss": 0.246, "step": 20793, "teacher_loss": 0.23622292280197144 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.3715561330318451, "learning_rate": 7.411661586982871e-06, "loss": 0.2777, "step": 20794, "teacher_loss": 0.2672439217567444 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.4468238353729248, "learning_rate": 7.409702497461109e-06, "loss": 0.2056, "step": 20795, "teacher_loss": 0.1787426769733429 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.2073078453540802, "learning_rate": 7.407743581962119e-06, "loss": 0.1677, "step": 20796, "teacher_loss": 0.1633288860321045 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.6839919090270996, "learning_rate": 7.4057848405307995e-06, "loss": 0.3752, "step": 20797, "teacher_loss": 0.3409017324447632 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.4029559791088104, "learning_rate": 7.403826273212066e-06, "loss": 0.2701, "step": 20798, "teacher_loss": 0.2552831172943115 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.2680244445800781, "learning_rate": 7.401867880050827e-06, "loss": 0.1918, "step": 20799, "teacher_loss": 0.18330876529216766 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.4653906226158142, "learning_rate": 7.39990966109197e-06, "loss": 0.3439, "step": 20800, "teacher_loss": 0.330363392829895 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.451619029045105, "learning_rate": 7.397951616380401e-06, "loss": 0.2554, "step": 20801, "teacher_loss": 0.23364028334617615 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.28162825107574463, "learning_rate": 7.395993745961012e-06, "loss": 0.1898, "step": 20802, "teacher_loss": 0.17959347367286682 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.7520164251327515, "learning_rate": 7.3940360498786904e-06, "loss": 0.2888, "step": 20803, "teacher_loss": 0.23734937608242035 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.2984544634819031, "learning_rate": 7.392078528178312e-06, "loss": 0.1921, "step": 20804, "teacher_loss": 0.1802406907081604 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.25831353664398193, "learning_rate": 7.390121180904763e-06, "loss": 0.2208, "step": 20805, "teacher_loss": 0.21664166450500488 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.5697411298751831, "learning_rate": 7.388164008102926e-06, "loss": 0.4947, "step": 20806, "teacher_loss": 0.48634475469589233 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.5164893269538879, "learning_rate": 7.38620700981766e-06, "loss": 0.2506, "step": 20807, "teacher_loss": 0.2210976928472519 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.28908205032348633, "learning_rate": 7.384250186093841e-06, "loss": 0.1901, "step": 20808, "teacher_loss": 0.17913052439689636 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.6624298095703125, "learning_rate": 7.3822935369763375e-06, "loss": 0.2286, "step": 20809, "teacher_loss": 0.1804494559764862 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.28073588013648987, "learning_rate": 7.380337062509998e-06, "loss": 0.2653, "step": 20810, "teacher_loss": 0.26360517740249634 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.349109947681427, "learning_rate": 7.378380762739685e-06, "loss": 0.1806, "step": 20811, "teacher_loss": 0.1619122326374054 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.2896420955657959, "learning_rate": 7.376424637710257e-06, "loss": 0.2882, "step": 20812, "teacher_loss": 0.28800883889198303 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.5932540893554688, "learning_rate": 7.374468687466548e-06, "loss": 0.2707, "step": 20813, "teacher_loss": 0.2348783016204834 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.7519567012786865, "learning_rate": 7.372512912053411e-06, "loss": 0.2854, "step": 20814, "teacher_loss": 0.23355445265769958 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.17227578163146973, "learning_rate": 7.370557311515689e-06, "loss": 0.1767, "step": 20815, "teacher_loss": 0.17721986770629883 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.5862759351730347, "learning_rate": 7.368601885898208e-06, "loss": 0.367, "step": 20816, "teacher_loss": 0.34264010190963745 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.19032594561576843, "learning_rate": 7.366646635245812e-06, "loss": 0.17, "step": 20817, "teacher_loss": 0.1677112877368927 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.40794655680656433, "learning_rate": 7.3646915596033165e-06, "loss": 0.2471, "step": 20818, "teacher_loss": 0.2292812317609787 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.45370203256607056, "learning_rate": 7.3627366590155515e-06, "loss": 0.2238, "step": 20819, "teacher_loss": 0.19825172424316406 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.6985903978347778, "learning_rate": 7.360781933527343e-06, "loss": 0.3495, "step": 20820, "teacher_loss": 0.31075701117515564 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.310874879360199, "learning_rate": 7.358827383183497e-06, "loss": 0.1894, "step": 20821, "teacher_loss": 0.17586715519428253 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.24492311477661133, "learning_rate": 7.356873008028834e-06, "loss": 0.2219, "step": 20822, "teacher_loss": 0.2193339616060257 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.2357134222984314, "learning_rate": 7.354918808108152e-06, "loss": 0.1816, "step": 20823, "teacher_loss": 0.17560246586799622 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.5981073379516602, "learning_rate": 7.352964783466261e-06, "loss": 0.3156, "step": 20824, "teacher_loss": 0.2841559946537018 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.23481610417366028, "learning_rate": 7.351010934147965e-06, "loss": 0.2163, "step": 20825, "teacher_loss": 0.214248925447464 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.4718189835548401, "learning_rate": 7.34905726019805e-06, "loss": 0.2146, "step": 20826, "teacher_loss": 0.18602976202964783 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.46568188071250916, "learning_rate": 7.34710376166132e-06, "loss": 0.2754, "step": 20827, "teacher_loss": 0.25426435470581055 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.5828574299812317, "learning_rate": 7.34515043858255e-06, "loss": 0.2448, "step": 20828, "teacher_loss": 0.20720547437667847 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.41344600915908813, "learning_rate": 7.343197291006531e-06, "loss": 0.2334, "step": 20829, "teacher_loss": 0.21338143944740295 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.43301451206207275, "learning_rate": 7.341244318978046e-06, "loss": 0.2463, "step": 20830, "teacher_loss": 0.2255607545375824 }, { "compression_loss": 0.0, "epoch": 3.76, "label_loss": 0.36874908208847046, "learning_rate": 7.339291522541861e-06, "loss": 0.2803, "step": 20831, "teacher_loss": 0.2705221176147461 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.22135399281978607, "learning_rate": 7.337338901742754e-06, "loss": 0.1627, "step": 20832, "teacher_loss": 0.15615665912628174 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.6512225866317749, "learning_rate": 7.3353864566254975e-06, "loss": 0.2871, "step": 20833, "teacher_loss": 0.2466907501220703 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.24554145336151123, "learning_rate": 7.3334341872348445e-06, "loss": 0.2228, "step": 20834, "teacher_loss": 0.22029517590999603 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.4795461595058441, "learning_rate": 7.33148209361556e-06, "loss": 0.4373, "step": 20835, "teacher_loss": 0.4326130151748657 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 1.1043647527694702, "learning_rate": 7.329530175812406e-06, "loss": 0.3464, "step": 20836, "teacher_loss": 0.2621748447418213 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.8489279747009277, "learning_rate": 7.327578433870121e-06, "loss": 0.2893, "step": 20837, "teacher_loss": 0.22709746658802032 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.48972082138061523, "learning_rate": 7.32562686783346e-06, "loss": 0.3263, "step": 20838, "teacher_loss": 0.30818068981170654 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.37717700004577637, "learning_rate": 7.323675477747171e-06, "loss": 0.2243, "step": 20839, "teacher_loss": 0.20733988285064697 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.3475998044013977, "learning_rate": 7.321724263655989e-06, "loss": 0.2944, "step": 20840, "teacher_loss": 0.2885274887084961 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.09062592685222626, "learning_rate": 7.319773225604638e-06, "loss": 0.1162, "step": 20841, "teacher_loss": 0.11907285451889038 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.7083640098571777, "learning_rate": 7.317822363637872e-06, "loss": 0.2279, "step": 20842, "teacher_loss": 0.17451521754264832 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.29006364941596985, "learning_rate": 7.315871677800406e-06, "loss": 0.2013, "step": 20843, "teacher_loss": 0.19147752225399017 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.34421366453170776, "learning_rate": 7.3139211681369586e-06, "loss": 0.2004, "step": 20844, "teacher_loss": 0.18447571992874146 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.600397527217865, "learning_rate": 7.311970834692255e-06, "loss": 0.2618, "step": 20845, "teacher_loss": 0.22417312860488892 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.6416093111038208, "learning_rate": 7.3100206775110165e-06, "loss": 0.3441, "step": 20846, "teacher_loss": 0.310992956161499 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.13265115022659302, "learning_rate": 7.30807069663794e-06, "loss": 0.2227, "step": 20847, "teacher_loss": 0.23272722959518433 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.37863844633102417, "learning_rate": 7.306120892117743e-06, "loss": 0.3172, "step": 20848, "teacher_loss": 0.3104074001312256 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.2519739866256714, "learning_rate": 7.304171263995132e-06, "loss": 0.2236, "step": 20849, "teacher_loss": 0.22041761875152588 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.3375350832939148, "learning_rate": 7.302221812314801e-06, "loss": 0.2161, "step": 20850, "teacher_loss": 0.20255626738071442 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.2439296990633011, "learning_rate": 7.300272537121433e-06, "loss": 0.2147, "step": 20851, "teacher_loss": 0.21144267916679382 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.7562127113342285, "learning_rate": 7.2983234384597404e-06, "loss": 0.2484, "step": 20852, "teacher_loss": 0.19194942712783813 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.4540614187717438, "learning_rate": 7.2963745163744026e-06, "loss": 0.2384, "step": 20853, "teacher_loss": 0.2144690752029419 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.6738770604133606, "learning_rate": 7.294425770910088e-06, "loss": 0.2537, "step": 20854, "teacher_loss": 0.20696386694908142 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.6841431856155396, "learning_rate": 7.292477202111501e-06, "loss": 0.3347, "step": 20855, "teacher_loss": 0.29586371779441833 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.4516450762748718, "learning_rate": 7.290528810023302e-06, "loss": 0.2078, "step": 20856, "teacher_loss": 0.1807451844215393 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.23400571942329407, "learning_rate": 7.288580594690157e-06, "loss": 0.2043, "step": 20857, "teacher_loss": 0.20101910829544067 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.12988528609275818, "learning_rate": 7.28663255615674e-06, "loss": 0.1384, "step": 20858, "teacher_loss": 0.13936671614646912 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.49110034108161926, "learning_rate": 7.284684694467717e-06, "loss": 0.222, "step": 20859, "teacher_loss": 0.19211995601654053 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.24366320669651031, "learning_rate": 7.282737009667738e-06, "loss": 0.3268, "step": 20860, "teacher_loss": 0.3360125422477722 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.17032060027122498, "learning_rate": 7.280789501801461e-06, "loss": 0.1447, "step": 20861, "teacher_loss": 0.14180141687393188 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.2818715572357178, "learning_rate": 7.2788421709135445e-06, "loss": 0.1726, "step": 20862, "teacher_loss": 0.16046112775802612 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.38449782133102417, "learning_rate": 7.276895017048621e-06, "loss": 0.255, "step": 20863, "teacher_loss": 0.24063386023044586 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.6231509447097778, "learning_rate": 7.2749480402513394e-06, "loss": 0.3675, "step": 20864, "teacher_loss": 0.33914023637771606 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.6110333204269409, "learning_rate": 7.273001240566343e-06, "loss": 0.2753, "step": 20865, "teacher_loss": 0.23794592916965485 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.45223790407180786, "learning_rate": 7.271054618038264e-06, "loss": 0.2339, "step": 20866, "teacher_loss": 0.20965829491615295 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.4540024995803833, "learning_rate": 7.269108172711722e-06, "loss": 0.2203, "step": 20867, "teacher_loss": 0.19429796934127808 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.40884512662887573, "learning_rate": 7.2671619046313525e-06, "loss": 0.2367, "step": 20868, "teacher_loss": 0.21762433648109436 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.2868151366710663, "learning_rate": 7.265215813841782e-06, "loss": 0.2914, "step": 20869, "teacher_loss": 0.2919510006904602 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.1320994794368744, "learning_rate": 7.263269900387618e-06, "loss": 0.1359, "step": 20870, "teacher_loss": 0.13634318113327026 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.5066434741020203, "learning_rate": 7.261324164313477e-06, "loss": 0.2777, "step": 20871, "teacher_loss": 0.252309650182724 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.2875855267047882, "learning_rate": 7.259378605663979e-06, "loss": 0.1866, "step": 20872, "teacher_loss": 0.17539288103580475 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.26620596647262573, "learning_rate": 7.257433224483714e-06, "loss": 0.1747, "step": 20873, "teacher_loss": 0.16452372074127197 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.16928622126579285, "learning_rate": 7.255488020817293e-06, "loss": 0.1459, "step": 20874, "teacher_loss": 0.14334167540073395 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 1.0552732944488525, "learning_rate": 7.253542994709316e-06, "loss": 0.3185, "step": 20875, "teacher_loss": 0.2366788387298584 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.30774903297424316, "learning_rate": 7.251598146204371e-06, "loss": 0.2427, "step": 20876, "teacher_loss": 0.2354506105184555 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.2725062072277069, "learning_rate": 7.249653475347054e-06, "loss": 0.2206, "step": 20877, "teacher_loss": 0.21486398577690125 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.4646933078765869, "learning_rate": 7.247708982181939e-06, "loss": 0.2537, "step": 20878, "teacher_loss": 0.2302594929933548 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.3467743396759033, "learning_rate": 7.245764666753617e-06, "loss": 0.2544, "step": 20879, "teacher_loss": 0.24415363371372223 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.5541468262672424, "learning_rate": 7.243820529106667e-06, "loss": 0.1854, "step": 20880, "teacher_loss": 0.14439867436885834 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.5157880187034607, "learning_rate": 7.241876569285651e-06, "loss": 0.2461, "step": 20881, "teacher_loss": 0.2161901891231537 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.3942872881889343, "learning_rate": 7.239932787335147e-06, "loss": 0.2899, "step": 20882, "teacher_loss": 0.278264582157135 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.37743228673934937, "learning_rate": 7.237989183299724e-06, "loss": 0.1953, "step": 20883, "teacher_loss": 0.1750173419713974 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.765160322189331, "learning_rate": 7.236045757223931e-06, "loss": 0.4015, "step": 20884, "teacher_loss": 0.36105063557624817 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.4167137145996094, "learning_rate": 7.234102509152336e-06, "loss": 0.2671, "step": 20885, "teacher_loss": 0.25046032667160034 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.3473365306854248, "learning_rate": 7.232159439129484e-06, "loss": 0.271, "step": 20886, "teacher_loss": 0.26248565316200256 }, { "compression_loss": 0.0, "epoch": 3.77, "label_loss": 0.3602268397808075, "learning_rate": 7.230216547199925e-06, "loss": 0.19, "step": 20887, "teacher_loss": 0.17105732858181 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.5267833471298218, "learning_rate": 7.228273833408211e-06, "loss": 0.2156, "step": 20888, "teacher_loss": 0.18102025985717773 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6122809052467346, "learning_rate": 7.226331297798872e-06, "loss": 0.2963, "step": 20889, "teacher_loss": 0.261149525642395 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.51596599817276, "learning_rate": 7.2243889404164555e-06, "loss": 0.1937, "step": 20890, "teacher_loss": 0.15793634951114655 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.38295289874076843, "learning_rate": 7.222446761305483e-06, "loss": 0.2461, "step": 20891, "teacher_loss": 0.2308589220046997 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.2524401843547821, "learning_rate": 7.2205047605104865e-06, "loss": 0.1371, "step": 20892, "teacher_loss": 0.12423904240131378 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.4336429238319397, "learning_rate": 7.218562938075999e-06, "loss": 0.2132, "step": 20893, "teacher_loss": 0.18868887424468994 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.3345625400543213, "learning_rate": 7.216621294046527e-06, "loss": 0.2492, "step": 20894, "teacher_loss": 0.23971731960773468 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.742740273475647, "learning_rate": 7.214679828466593e-06, "loss": 0.3532, "step": 20895, "teacher_loss": 0.3098759055137634 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.2610739469528198, "learning_rate": 7.212738541380714e-06, "loss": 0.2035, "step": 20896, "teacher_loss": 0.19704784452915192 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6191931962966919, "learning_rate": 7.210797432833388e-06, "loss": 0.2289, "step": 20897, "teacher_loss": 0.18557915091514587 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.9858016967773438, "learning_rate": 7.208856502869122e-06, "loss": 0.3076, "step": 20898, "teacher_loss": 0.23226192593574524 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6767506003379822, "learning_rate": 7.206915751532425e-06, "loss": 0.5641, "step": 20899, "teacher_loss": 0.551571786403656 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.24747434258460999, "learning_rate": 7.204975178867783e-06, "loss": 0.1922, "step": 20900, "teacher_loss": 0.18601638078689575 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.49998316168785095, "learning_rate": 7.203034784919681e-06, "loss": 0.2239, "step": 20901, "teacher_loss": 0.1931706666946411 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6947510242462158, "learning_rate": 7.201094569732623e-06, "loss": 0.2851, "step": 20902, "teacher_loss": 0.23953962326049805 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.33247101306915283, "learning_rate": 7.199154533351086e-06, "loss": 0.2172, "step": 20903, "teacher_loss": 0.20434662699699402 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.3905603587627411, "learning_rate": 7.197214675819536e-06, "loss": 0.2172, "step": 20904, "teacher_loss": 0.19794002175331116 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.601241409778595, "learning_rate": 7.1952749971824714e-06, "loss": 0.2237, "step": 20905, "teacher_loss": 0.1817316710948944 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.7119176983833313, "learning_rate": 7.193335497484349e-06, "loss": 0.2628, "step": 20906, "teacher_loss": 0.21287992596626282 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6558371782302856, "learning_rate": 7.1913961767696344e-06, "loss": 0.2668, "step": 20907, "teacher_loss": 0.22353026270866394 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.5078572034835815, "learning_rate": 7.189457035082794e-06, "loss": 0.2156, "step": 20908, "teacher_loss": 0.18311774730682373 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.8642302751541138, "learning_rate": 7.187518072468291e-06, "loss": 0.2797, "step": 20909, "teacher_loss": 0.2147475779056549 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.4342668056488037, "learning_rate": 7.185579288970571e-06, "loss": 0.176, "step": 20910, "teacher_loss": 0.1472967565059662 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.4895259737968445, "learning_rate": 7.183640684634089e-06, "loss": 0.2315, "step": 20911, "teacher_loss": 0.20279614627361298 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.49807092547416687, "learning_rate": 7.181702259503296e-06, "loss": 0.2732, "step": 20912, "teacher_loss": 0.2482655942440033 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 1.2886818647384644, "learning_rate": 7.179764013622631e-06, "loss": 0.3024, "step": 20913, "teacher_loss": 0.19279402494430542 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.711980402469635, "learning_rate": 7.1778259470365185e-06, "loss": 0.2405, "step": 20914, "teacher_loss": 0.18815943598747253 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.15923148393630981, "learning_rate": 7.175888059789418e-06, "loss": 0.1291, "step": 20915, "teacher_loss": 0.12570597231388092 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.5589304566383362, "learning_rate": 7.173950351925746e-06, "loss": 0.3095, "step": 20916, "teacher_loss": 0.28174859285354614 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6440385580062866, "learning_rate": 7.172012823489921e-06, "loss": 0.2838, "step": 20917, "teacher_loss": 0.24373877048492432 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6720754504203796, "learning_rate": 7.170075474526373e-06, "loss": 0.2991, "step": 20918, "teacher_loss": 0.2576434016227722 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6613928079605103, "learning_rate": 7.1681383050795275e-06, "loss": 0.2648, "step": 20919, "teacher_loss": 0.22068515419960022 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.5318841338157654, "learning_rate": 7.166201315193781e-06, "loss": 0.2313, "step": 20920, "teacher_loss": 0.1978864073753357 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6168327927589417, "learning_rate": 7.164264504913552e-06, "loss": 0.2754, "step": 20921, "teacher_loss": 0.23745819926261902 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.32774221897125244, "learning_rate": 7.162327874283251e-06, "loss": 0.1931, "step": 20922, "teacher_loss": 0.17813856899738312 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.22952650487422943, "learning_rate": 7.160391423347267e-06, "loss": 0.1566, "step": 20923, "teacher_loss": 0.14851021766662598 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.2600582242012024, "learning_rate": 7.158455152150004e-06, "loss": 0.1664, "step": 20924, "teacher_loss": 0.15599417686462402 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.3207114040851593, "learning_rate": 7.156519060735861e-06, "loss": 0.2424, "step": 20925, "teacher_loss": 0.23366346955299377 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.20522890985012054, "learning_rate": 7.154583149149212e-06, "loss": 0.3245, "step": 20926, "teacher_loss": 0.3377407193183899 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.5513725280761719, "learning_rate": 7.152647417434456e-06, "loss": 0.3456, "step": 20927, "teacher_loss": 0.3227009177207947 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.2717287838459015, "learning_rate": 7.150711865635962e-06, "loss": 0.2548, "step": 20928, "teacher_loss": 0.25286778807640076 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.8975619077682495, "learning_rate": 7.148776493798118e-06, "loss": 0.2593, "step": 20929, "teacher_loss": 0.188373863697052 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.29291635751724243, "learning_rate": 7.146841301965284e-06, "loss": 0.185, "step": 20930, "teacher_loss": 0.17304736375808716 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.44363588094711304, "learning_rate": 7.144906290181832e-06, "loss": 0.201, "step": 20931, "teacher_loss": 0.174034982919693 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.5962947010993958, "learning_rate": 7.142971458492136e-06, "loss": 0.1849, "step": 20932, "teacher_loss": 0.13924050331115723 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.5456302165985107, "learning_rate": 7.14103680694054e-06, "loss": 0.3215, "step": 20933, "teacher_loss": 0.29663681983947754 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.1243998259305954, "learning_rate": 7.139102335571409e-06, "loss": 0.1244, "step": 20934, "teacher_loss": 0.1244472861289978 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.2211330085992813, "learning_rate": 7.137168044429099e-06, "loss": 0.1481, "step": 20935, "teacher_loss": 0.13996800780296326 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6663065552711487, "learning_rate": 7.135233933557945e-06, "loss": 0.3028, "step": 20936, "teacher_loss": 0.2623865008354187 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.5568263530731201, "learning_rate": 7.133300003002298e-06, "loss": 0.2409, "step": 20937, "teacher_loss": 0.20585143566131592 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.6896703839302063, "learning_rate": 7.131366252806501e-06, "loss": 0.2174, "step": 20938, "teacher_loss": 0.1649707555770874 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.4019356369972229, "learning_rate": 7.12943268301488e-06, "loss": 0.2026, "step": 20939, "teacher_loss": 0.180498868227005 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.4665055572986603, "learning_rate": 7.127499293671776e-06, "loss": 0.2278, "step": 20940, "teacher_loss": 0.20129835605621338 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.7757083773612976, "learning_rate": 7.125566084821504e-06, "loss": 0.2774, "step": 20941, "teacher_loss": 0.2219896763563156 }, { "compression_loss": 0.0, "epoch": 3.78, "label_loss": 0.9921225309371948, "learning_rate": 7.123633056508393e-06, "loss": 0.3373, "step": 20942, "teacher_loss": 0.2645341753959656 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.2844240665435791, "learning_rate": 7.1217002087767655e-06, "loss": 0.2403, "step": 20943, "teacher_loss": 0.23539794981479645 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.44497138261795044, "learning_rate": 7.119767541670928e-06, "loss": 0.1854, "step": 20944, "teacher_loss": 0.15654712915420532 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 1.0019617080688477, "learning_rate": 7.117835055235195e-06, "loss": 0.3468, "step": 20945, "teacher_loss": 0.2740297019481659 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.6010870933532715, "learning_rate": 7.115902749513877e-06, "loss": 0.2205, "step": 20946, "teacher_loss": 0.17817196249961853 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.6027884483337402, "learning_rate": 7.113970624551266e-06, "loss": 0.33, "step": 20947, "teacher_loss": 0.29969367384910583 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.33525532484054565, "learning_rate": 7.112038680391671e-06, "loss": 0.2524, "step": 20948, "teacher_loss": 0.24323342740535736 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.4300152063369751, "learning_rate": 7.110106917079373e-06, "loss": 0.3034, "step": 20949, "teacher_loss": 0.28936296701431274 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.813190758228302, "learning_rate": 7.108175334658675e-06, "loss": 0.3276, "step": 20950, "teacher_loss": 0.2736360430717468 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.27036961913108826, "learning_rate": 7.106243933173849e-06, "loss": 0.1644, "step": 20951, "teacher_loss": 0.1526794284582138 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.4308510422706604, "learning_rate": 7.104312712669183e-06, "loss": 0.2561, "step": 20952, "teacher_loss": 0.23672431707382202 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.33989912271499634, "learning_rate": 7.102381673188961e-06, "loss": 0.2492, "step": 20953, "teacher_loss": 0.23914164304733276 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.5601613521575928, "learning_rate": 7.100450814777443e-06, "loss": 0.2172, "step": 20954, "teacher_loss": 0.1790561079978943 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.22413137555122375, "learning_rate": 7.098520137478902e-06, "loss": 0.1791, "step": 20955, "teacher_loss": 0.17414340376853943 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.5522635579109192, "learning_rate": 7.096589641337611e-06, "loss": 0.3217, "step": 20956, "teacher_loss": 0.2960282862186432 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.29903745651245117, "learning_rate": 7.094659326397818e-06, "loss": 0.2091, "step": 20957, "teacher_loss": 0.1991077959537506 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.3318334221839905, "learning_rate": 7.092729192703786e-06, "loss": 0.1981, "step": 20958, "teacher_loss": 0.1832163780927658 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.14552034437656403, "learning_rate": 7.0907992402997715e-06, "loss": 0.1534, "step": 20959, "teacher_loss": 0.1542649269104004 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.23334616422653198, "learning_rate": 7.088869469230013e-06, "loss": 0.1518, "step": 20960, "teacher_loss": 0.1427372395992279 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.9505484104156494, "learning_rate": 7.086939879538756e-06, "loss": 0.3765, "step": 20961, "teacher_loss": 0.3126862943172455 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.28648293018341064, "learning_rate": 7.085010471270252e-06, "loss": 0.1921, "step": 20962, "teacher_loss": 0.1816261112689972 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.4025924801826477, "learning_rate": 7.083081244468725e-06, "loss": 0.2165, "step": 20963, "teacher_loss": 0.1958579421043396 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.4363923668861389, "learning_rate": 7.081152199178399e-06, "loss": 0.2188, "step": 20964, "teacher_loss": 0.19462257623672485 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.39485037326812744, "learning_rate": 7.07922333544352e-06, "loss": 0.222, "step": 20965, "teacher_loss": 0.2027878761291504 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.376198947429657, "learning_rate": 7.077294653308305e-06, "loss": 0.249, "step": 20966, "teacher_loss": 0.23481214046478271 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.34262073040008545, "learning_rate": 7.075366152816963e-06, "loss": 0.2261, "step": 20967, "teacher_loss": 0.21318262815475464 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.6181092262268066, "learning_rate": 7.0734378340137166e-06, "loss": 0.259, "step": 20968, "teacher_loss": 0.2190513014793396 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.41529470682144165, "learning_rate": 7.071509696942781e-06, "loss": 0.235, "step": 20969, "teacher_loss": 0.2150205671787262 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.5941544771194458, "learning_rate": 7.069581741648351e-06, "loss": 0.2399, "step": 20970, "teacher_loss": 0.2004947066307068 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.397749662399292, "learning_rate": 7.067653968174636e-06, "loss": 0.3289, "step": 20971, "teacher_loss": 0.3212510347366333 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.5265370011329651, "learning_rate": 7.065726376565839e-06, "loss": 0.2372, "step": 20972, "teacher_loss": 0.20500922203063965 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.22794246673583984, "learning_rate": 7.063798966866147e-06, "loss": 0.1873, "step": 20973, "teacher_loss": 0.1827775537967682 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.41547369956970215, "learning_rate": 7.06187173911974e-06, "loss": 0.2052, "step": 20974, "teacher_loss": 0.18178215622901917 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.48197731375694275, "learning_rate": 7.059944693370825e-06, "loss": 0.2123, "step": 20975, "teacher_loss": 0.18234783411026 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.15815985202789307, "learning_rate": 7.058017829663573e-06, "loss": 0.1688, "step": 20976, "teacher_loss": 0.17002446949481964 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.270396888256073, "learning_rate": 7.05609114804215e-06, "loss": 0.2024, "step": 20977, "teacher_loss": 0.19486625492572784 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.705931544303894, "learning_rate": 7.054164648550749e-06, "loss": 0.276, "step": 20978, "teacher_loss": 0.22827589511871338 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.2824687659740448, "learning_rate": 7.052238331233533e-06, "loss": 0.2089, "step": 20979, "teacher_loss": 0.20077916979789734 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.9517744183540344, "learning_rate": 7.050312196134655e-06, "loss": 0.3425, "step": 20980, "teacher_loss": 0.2747650742530823 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.36999624967575073, "learning_rate": 7.048386243298286e-06, "loss": 0.1804, "step": 20981, "teacher_loss": 0.15937253832817078 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.44541680812835693, "learning_rate": 7.046460472768586e-06, "loss": 0.1996, "step": 20982, "teacher_loss": 0.1723167598247528 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.6684302091598511, "learning_rate": 7.044534884589694e-06, "loss": 0.294, "step": 20983, "teacher_loss": 0.2524460554122925 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.4397996664047241, "learning_rate": 7.042609478805766e-06, "loss": 0.1994, "step": 20984, "teacher_loss": 0.1727360188961029 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.5474628806114197, "learning_rate": 7.0406842554609515e-06, "loss": 0.3218, "step": 20985, "teacher_loss": 0.29667186737060547 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.5723136067390442, "learning_rate": 7.038759214599378e-06, "loss": 0.2689, "step": 20986, "teacher_loss": 0.23519571125507355 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.24437585473060608, "learning_rate": 7.036834356265186e-06, "loss": 0.2143, "step": 20987, "teacher_loss": 0.21095268428325653 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.38687533140182495, "learning_rate": 7.034909680502514e-06, "loss": 0.1942, "step": 20988, "teacher_loss": 0.17277246713638306 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.27932319045066833, "learning_rate": 7.032985187355481e-06, "loss": 0.193, "step": 20989, "teacher_loss": 0.18339186906814575 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.4516412913799286, "learning_rate": 7.0310608768682065e-06, "loss": 0.2218, "step": 20990, "teacher_loss": 0.19622871279716492 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.32434624433517456, "learning_rate": 7.029136749084815e-06, "loss": 0.1787, "step": 20991, "teacher_loss": 0.16256055235862732 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.517440676689148, "learning_rate": 7.027212804049424e-06, "loss": 0.253, "step": 20992, "teacher_loss": 0.223586767911911 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.5337303876876831, "learning_rate": 7.025289041806134e-06, "loss": 0.3084, "step": 20993, "teacher_loss": 0.2833779454231262 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.5473419427871704, "learning_rate": 7.023365462399058e-06, "loss": 0.2054, "step": 20994, "teacher_loss": 0.1674402356147766 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.27413833141326904, "learning_rate": 7.021442065872303e-06, "loss": 0.2013, "step": 20995, "teacher_loss": 0.19316229224205017 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.2361699640750885, "learning_rate": 7.019518852269953e-06, "loss": 0.1616, "step": 20996, "teacher_loss": 0.15330219268798828 }, { "compression_loss": 0.0, "epoch": 3.79, "label_loss": 0.8159303069114685, "learning_rate": 7.017595821636109e-06, "loss": 0.3536, "step": 20997, "teacher_loss": 0.30223768949508667 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.5530143976211548, "learning_rate": 7.015672974014867e-06, "loss": 0.1947, "step": 20998, "teacher_loss": 0.15485063195228577 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.3127064108848572, "learning_rate": 7.013750309450299e-06, "loss": 0.1861, "step": 20999, "teacher_loss": 0.17207646369934082 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.17733411490917206, "learning_rate": 7.011827827986499e-06, "loss": 0.1373, "step": 21000, "teacher_loss": 0.13282468914985657 }, { "epoch": 3.8, "eval_exact_match": 80.09460737937559, "eval_f1": 87.52990201813554, "step": 21000 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.6839249134063721, "learning_rate": 7.009905529667531e-06, "loss": 0.2579, "step": 21001, "teacher_loss": 0.21056047081947327 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.5756430625915527, "learning_rate": 7.0079834145374744e-06, "loss": 0.2302, "step": 21002, "teacher_loss": 0.1918008029460907 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.7244046926498413, "learning_rate": 7.006061482640402e-06, "loss": 0.3214, "step": 21003, "teacher_loss": 0.2765883207321167 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.2491282969713211, "learning_rate": 7.004139734020366e-06, "loss": 0.2171, "step": 21004, "teacher_loss": 0.21359427273273468 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.22341501712799072, "learning_rate": 7.0022181687214335e-06, "loss": 0.1711, "step": 21005, "teacher_loss": 0.16524332761764526 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.408224493265152, "learning_rate": 7.000296786787666e-06, "loss": 0.2195, "step": 21006, "teacher_loss": 0.19849984347820282 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.4759189188480377, "learning_rate": 6.9983755882631026e-06, "loss": 0.2023, "step": 21007, "teacher_loss": 0.17186099290847778 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.5682344436645508, "learning_rate": 6.996454573191799e-06, "loss": 0.2976, "step": 21008, "teacher_loss": 0.2675284743309021 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.37052714824676514, "learning_rate": 6.994533741617799e-06, "loss": 0.2041, "step": 21009, "teacher_loss": 0.18565037846565247 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.23669889569282532, "learning_rate": 6.992613093585134e-06, "loss": 0.2232, "step": 21010, "teacher_loss": 0.221745103597641 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.3380270302295685, "learning_rate": 6.990692629137849e-06, "loss": 0.1864, "step": 21011, "teacher_loss": 0.169529989361763 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.3921695351600647, "learning_rate": 6.988772348319964e-06, "loss": 0.2546, "step": 21012, "teacher_loss": 0.23934721946716309 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.13165238499641418, "learning_rate": 6.986852251175515e-06, "loss": 0.1394, "step": 21013, "teacher_loss": 0.1402832269668579 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.47960513830184937, "learning_rate": 6.984932337748513e-06, "loss": 0.3595, "step": 21014, "teacher_loss": 0.3461185097694397 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.6147972345352173, "learning_rate": 6.983012608082983e-06, "loss": 0.2809, "step": 21015, "teacher_loss": 0.24375228583812714 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.6853942275047302, "learning_rate": 6.981093062222944e-06, "loss": 0.2826, "step": 21016, "teacher_loss": 0.23787739872932434 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.14575828611850739, "learning_rate": 6.979173700212393e-06, "loss": 0.1684, "step": 21017, "teacher_loss": 0.170942023396492 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.23127904534339905, "learning_rate": 6.977254522095341e-06, "loss": 0.2312, "step": 21018, "teacher_loss": 0.23120540380477905 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.5682312846183777, "learning_rate": 6.975335527915794e-06, "loss": 0.2887, "step": 21019, "teacher_loss": 0.2575920820236206 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.4151148498058319, "learning_rate": 6.97341671771774e-06, "loss": 0.2517, "step": 21020, "teacher_loss": 0.23349085450172424 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.3168506622314453, "learning_rate": 6.971498091545174e-06, "loss": 0.2338, "step": 21021, "teacher_loss": 0.22458340227603912 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.6118311285972595, "learning_rate": 6.969579649442094e-06, "loss": 0.297, "step": 21022, "teacher_loss": 0.2620731592178345 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.7122552394866943, "learning_rate": 6.9676613914524744e-06, "loss": 0.2682, "step": 21023, "teacher_loss": 0.21887817978858948 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.3397955298423767, "learning_rate": 6.965743317620288e-06, "loss": 0.2172, "step": 21024, "teacher_loss": 0.20352382957935333 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.4656582474708557, "learning_rate": 6.963825427989529e-06, "loss": 0.2071, "step": 21025, "teacher_loss": 0.17833131551742554 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.4977899193763733, "learning_rate": 6.961907722604158e-06, "loss": 0.2538, "step": 21026, "teacher_loss": 0.22672194242477417 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.623198926448822, "learning_rate": 6.959990201508133e-06, "loss": 0.2364, "step": 21027, "teacher_loss": 0.19347065687179565 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.15967178344726562, "learning_rate": 6.958072864745441e-06, "loss": 0.1182, "step": 21028, "teacher_loss": 0.11361486464738846 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.26478299498558044, "learning_rate": 6.9561557123600244e-06, "loss": 0.1845, "step": 21029, "teacher_loss": 0.17555615305900574 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.6103314161300659, "learning_rate": 6.9542387443958365e-06, "loss": 0.2461, "step": 21030, "teacher_loss": 0.20566898584365845 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.31152522563934326, "learning_rate": 6.95232196089683e-06, "loss": 0.2233, "step": 21031, "teacher_loss": 0.21348586678504944 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.5567577481269836, "learning_rate": 6.95040536190696e-06, "loss": 0.2002, "step": 21032, "teacher_loss": 0.16059619188308716 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.49905839562416077, "learning_rate": 6.948488947470156e-06, "loss": 0.243, "step": 21033, "teacher_loss": 0.2146032154560089 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.5236203670501709, "learning_rate": 6.94657271763036e-06, "loss": 0.3276, "step": 21034, "teacher_loss": 0.3058511018753052 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.5662654042243958, "learning_rate": 6.944656672431512e-06, "loss": 0.2389, "step": 21035, "teacher_loss": 0.20250052213668823 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.29981210827827454, "learning_rate": 6.942740811917535e-06, "loss": 0.2146, "step": 21036, "teacher_loss": 0.20509478449821472 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.19058065116405487, "learning_rate": 6.940825136132342e-06, "loss": 0.2701, "step": 21037, "teacher_loss": 0.27888914942741394 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.443927526473999, "learning_rate": 6.938909645119879e-06, "loss": 0.2009, "step": 21038, "teacher_loss": 0.17391954362392426 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.49460023641586304, "learning_rate": 6.936994338924049e-06, "loss": 0.234, "step": 21039, "teacher_loss": 0.20503324270248413 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.19955509901046753, "learning_rate": 6.9350792175887576e-06, "loss": 0.1589, "step": 21040, "teacher_loss": 0.1543952226638794 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.6149266362190247, "learning_rate": 6.933164281157922e-06, "loss": 0.2972, "step": 21041, "teacher_loss": 0.2619290351867676 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.5166228413581848, "learning_rate": 6.931249529675449e-06, "loss": 0.2044, "step": 21042, "teacher_loss": 0.16966111958026886 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.739244282245636, "learning_rate": 6.9293349631852255e-06, "loss": 0.2731, "step": 21043, "teacher_loss": 0.22129297256469727 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.26722463965415955, "learning_rate": 6.927420581731155e-06, "loss": 0.2073, "step": 21044, "teacher_loss": 0.2006259560585022 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.3457545042037964, "learning_rate": 6.925506385357135e-06, "loss": 0.2181, "step": 21045, "teacher_loss": 0.20396915078163147 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.3232949674129486, "learning_rate": 6.923592374107038e-06, "loss": 0.2023, "step": 21046, "teacher_loss": 0.1888878047466278 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.12580648064613342, "learning_rate": 6.921678548024754e-06, "loss": 0.1572, "step": 21047, "teacher_loss": 0.1607227921485901 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.4828297793865204, "learning_rate": 6.919764907154165e-06, "loss": 0.2346, "step": 21048, "teacher_loss": 0.20697534084320068 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.4111780524253845, "learning_rate": 6.917851451539137e-06, "loss": 0.1784, "step": 21049, "teacher_loss": 0.15252187848091125 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.4990905523300171, "learning_rate": 6.9159381812235486e-06, "loss": 0.2628, "step": 21050, "teacher_loss": 0.23658891022205353 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.2217494249343872, "learning_rate": 6.914025096251255e-06, "loss": 0.1448, "step": 21051, "teacher_loss": 0.13627220690250397 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.7158692479133606, "learning_rate": 6.912112196666128e-06, "loss": 0.222, "step": 21052, "teacher_loss": 0.16711562871932983 }, { "compression_loss": 0.0, "epoch": 3.8, "label_loss": 0.1085197851061821, "learning_rate": 6.9101994825120144e-06, "loss": 0.23, "step": 21053, "teacher_loss": 0.24353109300136566 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.3970978856086731, "learning_rate": 6.908286953832772e-06, "loss": 0.2552, "step": 21054, "teacher_loss": 0.23943139612674713 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.3145167827606201, "learning_rate": 6.906374610672257e-06, "loss": 0.1878, "step": 21055, "teacher_loss": 0.17368367314338684 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.610586941242218, "learning_rate": 6.904462453074297e-06, "loss": 0.2281, "step": 21056, "teacher_loss": 0.18563087284564972 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5640156269073486, "learning_rate": 6.902550481082743e-06, "loss": 0.2232, "step": 21057, "teacher_loss": 0.18533433973789215 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.39983314275741577, "learning_rate": 6.900638694741436e-06, "loss": 0.2388, "step": 21058, "teacher_loss": 0.22095569968223572 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5509340763092041, "learning_rate": 6.898727094094194e-06, "loss": 0.2533, "step": 21059, "teacher_loss": 0.22021010518074036 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.43815234303474426, "learning_rate": 6.896815679184851e-06, "loss": 0.2016, "step": 21060, "teacher_loss": 0.1753210723400116 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.7544982433319092, "learning_rate": 6.894904450057234e-06, "loss": 0.2432, "step": 21061, "teacher_loss": 0.18640050292015076 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.2629411220550537, "learning_rate": 6.8929934067551535e-06, "loss": 0.2205, "step": 21062, "teacher_loss": 0.21574972569942474 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.6879023909568787, "learning_rate": 6.891082549322433e-06, "loss": 0.2507, "step": 21063, "teacher_loss": 0.2021312713623047 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5753012895584106, "learning_rate": 6.8891718778028724e-06, "loss": 0.1953, "step": 21064, "teacher_loss": 0.15304477512836456 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.3750990629196167, "learning_rate": 6.887261392240284e-06, "loss": 0.2342, "step": 21065, "teacher_loss": 0.21851582825183868 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.3156360387802124, "learning_rate": 6.8853510926784745e-06, "loss": 0.1818, "step": 21066, "teacher_loss": 0.16691815853118896 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 1.0356909036636353, "learning_rate": 6.883440979161228e-06, "loss": 0.4858, "step": 21067, "teacher_loss": 0.424721360206604 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5096825361251831, "learning_rate": 6.881531051732347e-06, "loss": 0.2962, "step": 21068, "teacher_loss": 0.2725079655647278 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.7660454511642456, "learning_rate": 6.879621310435622e-06, "loss": 0.2488, "step": 21069, "teacher_loss": 0.1913796067237854 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5489686727523804, "learning_rate": 6.87771175531483e-06, "loss": 0.2832, "step": 21070, "teacher_loss": 0.2536696195602417 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.999770998954773, "learning_rate": 6.8758023864137555e-06, "loss": 0.3899, "step": 21071, "teacher_loss": 0.3221236765384674 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5881187319755554, "learning_rate": 6.8738932037761816e-06, "loss": 0.4927, "step": 21072, "teacher_loss": 0.4821445941925049 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.4585700035095215, "learning_rate": 6.871984207445872e-06, "loss": 0.2915, "step": 21073, "teacher_loss": 0.27295318245887756 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5144942402839661, "learning_rate": 6.870075397466588e-06, "loss": 0.2786, "step": 21074, "teacher_loss": 0.25237247347831726 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.6914947628974915, "learning_rate": 6.868166773882103e-06, "loss": 0.2414, "step": 21075, "teacher_loss": 0.19138209521770477 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.204611673951149, "learning_rate": 6.866258336736178e-06, "loss": 0.1546, "step": 21076, "teacher_loss": 0.14908358454704285 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.3015681505203247, "learning_rate": 6.864350086072556e-06, "loss": 0.2178, "step": 21077, "teacher_loss": 0.20846150815486908 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.26221543550491333, "learning_rate": 6.862442021934998e-06, "loss": 0.2108, "step": 21078, "teacher_loss": 0.20504575967788696 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.957612156867981, "learning_rate": 6.860534144367249e-06, "loss": 0.513, "step": 21079, "teacher_loss": 0.4636082053184509 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.36956244707107544, "learning_rate": 6.858626453413045e-06, "loss": 0.2305, "step": 21080, "teacher_loss": 0.21506652235984802 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.9125055074691772, "learning_rate": 6.856718949116127e-06, "loss": 0.2884, "step": 21081, "teacher_loss": 0.21903762221336365 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.24365290999412537, "learning_rate": 6.854811631520235e-06, "loss": 0.1498, "step": 21082, "teacher_loss": 0.13938620686531067 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.363231897354126, "learning_rate": 6.852904500669085e-06, "loss": 0.2, "step": 21083, "teacher_loss": 0.18183794617652893 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.4659852385520935, "learning_rate": 6.850997556606409e-06, "loss": 0.1745, "step": 21084, "teacher_loss": 0.14213553071022034 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5907101631164551, "learning_rate": 6.849090799375931e-06, "loss": 0.2683, "step": 21085, "teacher_loss": 0.23244041204452515 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.3744877278804779, "learning_rate": 6.847184229021365e-06, "loss": 0.2118, "step": 21086, "teacher_loss": 0.19376710057258606 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.21573379635810852, "learning_rate": 6.8452778455864106e-06, "loss": 0.2398, "step": 21087, "teacher_loss": 0.2424522042274475 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.13263298571109772, "learning_rate": 6.843371649114797e-06, "loss": 0.2118, "step": 21088, "teacher_loss": 0.22055436670780182 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.09606871008872986, "learning_rate": 6.8414656396502156e-06, "loss": 0.1418, "step": 21089, "teacher_loss": 0.1468939185142517 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.8944100141525269, "learning_rate": 6.839559817236362e-06, "loss": 0.2824, "step": 21090, "teacher_loss": 0.21438215672969818 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.7346954345703125, "learning_rate": 6.8376541819169355e-06, "loss": 0.5214, "step": 21091, "teacher_loss": 0.4976603388786316 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 1.1110830307006836, "learning_rate": 6.8357487337356304e-06, "loss": 0.6057, "step": 21092, "teacher_loss": 0.5496015548706055 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.31797337532043457, "learning_rate": 6.833843472736125e-06, "loss": 0.2363, "step": 21093, "teacher_loss": 0.22726817429065704 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.36460769176483154, "learning_rate": 6.831938398962104e-06, "loss": 0.2106, "step": 21094, "teacher_loss": 0.19353246688842773 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5137042999267578, "learning_rate": 6.830033512457255e-06, "loss": 0.2762, "step": 21095, "teacher_loss": 0.24976465106010437 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.49862152338027954, "learning_rate": 6.82812881326524e-06, "loss": 0.2361, "step": 21096, "teacher_loss": 0.20689120888710022 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.4919617772102356, "learning_rate": 6.826224301429721e-06, "loss": 0.2545, "step": 21097, "teacher_loss": 0.22813186049461365 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.4095369577407837, "learning_rate": 6.824319976994383e-06, "loss": 0.3209, "step": 21098, "teacher_loss": 0.3110586404800415 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.368999719619751, "learning_rate": 6.8224158400028765e-06, "loss": 0.3046, "step": 21099, "teacher_loss": 0.29744774103164673 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.42498618364334106, "learning_rate": 6.8205118904988455e-06, "loss": 0.1937, "step": 21100, "teacher_loss": 0.16805553436279297 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.308207631111145, "learning_rate": 6.818608128525965e-06, "loss": 0.2073, "step": 21101, "teacher_loss": 0.19603359699249268 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.3554556965827942, "learning_rate": 6.81670455412787e-06, "loss": 0.2175, "step": 21102, "teacher_loss": 0.20220491290092468 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5371696352958679, "learning_rate": 6.814801167348201e-06, "loss": 0.2344, "step": 21103, "teacher_loss": 0.20077010989189148 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.8057507872581482, "learning_rate": 6.8128979682305995e-06, "loss": 0.2935, "step": 21104, "teacher_loss": 0.23659461736679077 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5763615369796753, "learning_rate": 6.810994956818707e-06, "loss": 0.2666, "step": 21105, "teacher_loss": 0.23222331702709198 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.5847717523574829, "learning_rate": 6.809092133156142e-06, "loss": 0.2309, "step": 21106, "teacher_loss": 0.19152754545211792 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.32749539613723755, "learning_rate": 6.807189497286539e-06, "loss": 0.2192, "step": 21107, "teacher_loss": 0.20712804794311523 }, { "compression_loss": 0.0, "epoch": 3.81, "label_loss": 0.4948459565639496, "learning_rate": 6.805287049253522e-06, "loss": 0.2077, "step": 21108, "teacher_loss": 0.17575965821743011 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.44669681787490845, "learning_rate": 6.8033847891006975e-06, "loss": 0.1961, "step": 21109, "teacher_loss": 0.16829557716846466 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.860389232635498, "learning_rate": 6.801482716871685e-06, "loss": 0.3325, "step": 21110, "teacher_loss": 0.2738436460494995 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.589352011680603, "learning_rate": 6.799580832610099e-06, "loss": 0.2635, "step": 21111, "teacher_loss": 0.2272915542125702 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.4365181028842926, "learning_rate": 6.797679136359534e-06, "loss": 0.2414, "step": 21112, "teacher_loss": 0.21973586082458496 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.14987675845623016, "learning_rate": 6.795777628163599e-06, "loss": 0.2141, "step": 21113, "teacher_loss": 0.2212480902671814 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.38866978883743286, "learning_rate": 6.793876308065881e-06, "loss": 0.2111, "step": 21114, "teacher_loss": 0.1913265883922577 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.7936194539070129, "learning_rate": 6.791975176109981e-06, "loss": 0.2584, "step": 21115, "teacher_loss": 0.19888126850128174 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.2828065752983093, "learning_rate": 6.790074232339476e-06, "loss": 0.1843, "step": 21116, "teacher_loss": 0.17339789867401123 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.2894996404647827, "learning_rate": 6.788173476797954e-06, "loss": 0.1518, "step": 21117, "teacher_loss": 0.13650378584861755 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.3175807297229767, "learning_rate": 6.786272909529e-06, "loss": 0.1747, "step": 21118, "teacher_loss": 0.15884855389595032 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.2899169921875, "learning_rate": 6.7843725305761746e-06, "loss": 0.2786, "step": 21119, "teacher_loss": 0.27733784914016724 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.5662544965744019, "learning_rate": 6.7824723399830575e-06, "loss": 0.2838, "step": 21120, "teacher_loss": 0.2524040937423706 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.8492851257324219, "learning_rate": 6.7805723377932166e-06, "loss": 0.3373, "step": 21121, "teacher_loss": 0.2804449796676636 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.23926222324371338, "learning_rate": 6.778672524050204e-06, "loss": 0.1382, "step": 21122, "teacher_loss": 0.12692655622959137 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.39224082231521606, "learning_rate": 6.776772898797586e-06, "loss": 0.2975, "step": 21123, "teacher_loss": 0.2869310975074768 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.6006549596786499, "learning_rate": 6.774873462078907e-06, "loss": 0.2443, "step": 21124, "teacher_loss": 0.20471206307411194 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.4461630582809448, "learning_rate": 6.7729742139377186e-06, "loss": 0.2239, "step": 21125, "teacher_loss": 0.1992393285036087 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.3043396472930908, "learning_rate": 6.77107515441757e-06, "loss": 0.2154, "step": 21126, "teacher_loss": 0.20556125044822693 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.5684664249420166, "learning_rate": 6.769176283561992e-06, "loss": 0.2645, "step": 21127, "teacher_loss": 0.230694979429245 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.4666040539741516, "learning_rate": 6.767277601414525e-06, "loss": 0.2365, "step": 21128, "teacher_loss": 0.2109057903289795 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.20061102509498596, "learning_rate": 6.765379108018704e-06, "loss": 0.2106, "step": 21129, "teacher_loss": 0.2117154747247696 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.18540042638778687, "learning_rate": 6.763480803418048e-06, "loss": 0.1787, "step": 21130, "teacher_loss": 0.1780080497264862 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.14695845544338226, "learning_rate": 6.7615826876560806e-06, "loss": 0.1269, "step": 21131, "teacher_loss": 0.12461711466312408 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.3551092743873596, "learning_rate": 6.759684760776328e-06, "loss": 0.208, "step": 21132, "teacher_loss": 0.19161498546600342 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.31561899185180664, "learning_rate": 6.757787022822293e-06, "loss": 0.2137, "step": 21133, "teacher_loss": 0.20236268639564514 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.243172287940979, "learning_rate": 6.75588947383749e-06, "loss": 0.2175, "step": 21134, "teacher_loss": 0.2146616131067276 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.20968863368034363, "learning_rate": 6.75399211386543e-06, "loss": 0.1397, "step": 21135, "teacher_loss": 0.13196861743927002 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.34811481833457947, "learning_rate": 6.752094942949608e-06, "loss": 0.1773, "step": 21136, "teacher_loss": 0.15833115577697754 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.3638768196105957, "learning_rate": 6.750197961133515e-06, "loss": 0.2358, "step": 21137, "teacher_loss": 0.22155886888504028 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.20576980710029602, "learning_rate": 6.748301168460648e-06, "loss": 0.1577, "step": 21138, "teacher_loss": 0.1523124873638153 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.5612190961837769, "learning_rate": 6.746404564974501e-06, "loss": 0.2397, "step": 21139, "teacher_loss": 0.2040124088525772 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.6796547770500183, "learning_rate": 6.744508150718547e-06, "loss": 0.3164, "step": 21140, "teacher_loss": 0.2760828733444214 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.15383681654930115, "learning_rate": 6.742611925736269e-06, "loss": 0.1846, "step": 21141, "teacher_loss": 0.18803825974464417 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.17699280381202698, "learning_rate": 6.740715890071148e-06, "loss": 0.1369, "step": 21142, "teacher_loss": 0.13239236176013947 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.2258513867855072, "learning_rate": 6.738820043766644e-06, "loss": 0.1992, "step": 21143, "teacher_loss": 0.19621768593788147 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.12503355741500854, "learning_rate": 6.736924386866228e-06, "loss": 0.1514, "step": 21144, "teacher_loss": 0.15431565046310425 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.23177844285964966, "learning_rate": 6.735028919413367e-06, "loss": 0.1706, "step": 21145, "teacher_loss": 0.16376319527626038 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.35602760314941406, "learning_rate": 6.733133641451513e-06, "loss": 0.181, "step": 21146, "teacher_loss": 0.16154512763023376 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.5941517949104309, "learning_rate": 6.7312385530241095e-06, "loss": 0.1915, "step": 21147, "teacher_loss": 0.1467203050851822 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.545018196105957, "learning_rate": 6.7293436541746254e-06, "loss": 0.2556, "step": 21148, "teacher_loss": 0.22344353795051575 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.27495136857032776, "learning_rate": 6.727448944946494e-06, "loss": 0.164, "step": 21149, "teacher_loss": 0.15166683495044708 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.379241406917572, "learning_rate": 6.725554425383146e-06, "loss": 0.2026, "step": 21150, "teacher_loss": 0.18297061324119568 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.34359195828437805, "learning_rate": 6.723660095528037e-06, "loss": 0.197, "step": 21151, "teacher_loss": 0.18068063259124756 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.5772611498832703, "learning_rate": 6.721765955424589e-06, "loss": 0.2756, "step": 21152, "teacher_loss": 0.24209725856781006 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.15077432990074158, "learning_rate": 6.719872005116221e-06, "loss": 0.1373, "step": 21153, "teacher_loss": 0.1357787698507309 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.4526090621948242, "learning_rate": 6.717978244646364e-06, "loss": 0.1703, "step": 21154, "teacher_loss": 0.13896337151527405 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.3305814266204834, "learning_rate": 6.716084674058441e-06, "loss": 0.3119, "step": 21155, "teacher_loss": 0.3098183870315552 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.7844934463500977, "learning_rate": 6.714191293395854e-06, "loss": 0.3014, "step": 21156, "teacher_loss": 0.24774567782878876 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.3782028555870056, "learning_rate": 6.7122981027020186e-06, "loss": 0.2073, "step": 21157, "teacher_loss": 0.18836012482643127 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.7794773578643799, "learning_rate": 6.710405102020346e-06, "loss": 0.3392, "step": 21158, "teacher_loss": 0.29029643535614014 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.1706942617893219, "learning_rate": 6.7085122913942295e-06, "loss": 0.1666, "step": 21159, "teacher_loss": 0.166156604886055 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.5009013414382935, "learning_rate": 6.706619670867057e-06, "loss": 0.2085, "step": 21160, "teacher_loss": 0.1760426014661789 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.4840961694717407, "learning_rate": 6.704727240482242e-06, "loss": 0.2592, "step": 21161, "teacher_loss": 0.23425209522247314 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.6796861290931702, "learning_rate": 6.70283500028316e-06, "loss": 0.1904, "step": 21162, "teacher_loss": 0.13603845238685608 }, { "compression_loss": 0.0, "epoch": 3.82, "label_loss": 0.47639307379722595, "learning_rate": 6.70094295031319e-06, "loss": 0.2179, "step": 21163, "teacher_loss": 0.18918576836585999 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.568954348564148, "learning_rate": 6.699051090615717e-06, "loss": 0.1934, "step": 21164, "teacher_loss": 0.1516387164592743 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.4993837773799896, "learning_rate": 6.697159421234119e-06, "loss": 0.3126, "step": 21165, "teacher_loss": 0.2917935252189636 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.5164945125579834, "learning_rate": 6.695267942211757e-06, "loss": 0.2167, "step": 21166, "teacher_loss": 0.18336230516433716 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.3432307839393616, "learning_rate": 6.693376653592003e-06, "loss": 0.2664, "step": 21167, "teacher_loss": 0.25784116983413696 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.6550067663192749, "learning_rate": 6.691485555418224e-06, "loss": 0.2076, "step": 21168, "teacher_loss": 0.1579257845878601 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.36990267038345337, "learning_rate": 6.6895946477337635e-06, "loss": 0.2064, "step": 21169, "teacher_loss": 0.18823090195655823 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.6400458812713623, "learning_rate": 6.687703930581983e-06, "loss": 0.2279, "step": 21170, "teacher_loss": 0.18209432065486908 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.8232654929161072, "learning_rate": 6.685813404006235e-06, "loss": 0.4007, "step": 21171, "teacher_loss": 0.35370147228240967 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.2232530415058136, "learning_rate": 6.683923068049853e-06, "loss": 0.1781, "step": 21172, "teacher_loss": 0.17303961515426636 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.5943315029144287, "learning_rate": 6.682032922756188e-06, "loss": 0.4468, "step": 21173, "teacher_loss": 0.43046078085899353 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.13391068577766418, "learning_rate": 6.6801429681685644e-06, "loss": 0.2278, "step": 21174, "teacher_loss": 0.23825660347938538 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.6444460153579712, "learning_rate": 6.678253204330317e-06, "loss": 0.2619, "step": 21175, "teacher_loss": 0.219389870762825 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.33574429154396057, "learning_rate": 6.676363631284779e-06, "loss": 0.2321, "step": 21176, "teacher_loss": 0.22062653303146362 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.5184218287467957, "learning_rate": 6.674474249075263e-06, "loss": 0.294, "step": 21177, "teacher_loss": 0.26907235383987427 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.2569783926010132, "learning_rate": 6.672585057745097e-06, "loss": 0.212, "step": 21178, "teacher_loss": 0.20696985721588135 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.4303276538848877, "learning_rate": 6.670696057337583e-06, "loss": 0.2021, "step": 21179, "teacher_loss": 0.1767246425151825 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.38043051958084106, "learning_rate": 6.668807247896036e-06, "loss": 0.3754, "step": 21180, "teacher_loss": 0.37483131885528564 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.19700020551681519, "learning_rate": 6.666918629463764e-06, "loss": 0.2035, "step": 21181, "teacher_loss": 0.2042715847492218 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.26751846075057983, "learning_rate": 6.665030202084061e-06, "loss": 0.1745, "step": 21182, "teacher_loss": 0.1641301065683365 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.311440110206604, "learning_rate": 6.6631419658002245e-06, "loss": 0.1908, "step": 21183, "teacher_loss": 0.1774301826953888 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.316212922334671, "learning_rate": 6.661253920655554e-06, "loss": 0.202, "step": 21184, "teacher_loss": 0.18932712078094482 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.29894915223121643, "learning_rate": 6.659366066693324e-06, "loss": 0.1777, "step": 21185, "teacher_loss": 0.16421669721603394 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.459450364112854, "learning_rate": 6.657478403956831e-06, "loss": 0.21, "step": 21186, "teacher_loss": 0.18226423859596252 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.4174155592918396, "learning_rate": 6.6555909324893386e-06, "loss": 0.2314, "step": 21187, "teacher_loss": 0.2107028365135193 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.24748720228672028, "learning_rate": 6.653703652334128e-06, "loss": 0.1958, "step": 21188, "teacher_loss": 0.19006532430648804 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.3583128750324249, "learning_rate": 6.6518165635344766e-06, "loss": 0.2353, "step": 21189, "teacher_loss": 0.22163251042366028 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.2770041525363922, "learning_rate": 6.649929666133634e-06, "loss": 0.2207, "step": 21190, "teacher_loss": 0.2144688218832016 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.5123680830001831, "learning_rate": 6.648042960174871e-06, "loss": 0.2118, "step": 21191, "teacher_loss": 0.178436741232872 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.6827540397644043, "learning_rate": 6.646156445701448e-06, "loss": 0.2334, "step": 21192, "teacher_loss": 0.18350112438201904 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.550858736038208, "learning_rate": 6.644270122756605e-06, "loss": 0.326, "step": 21193, "teacher_loss": 0.30100661516189575 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.4779033064842224, "learning_rate": 6.6423839913835985e-06, "loss": 0.2924, "step": 21194, "teacher_loss": 0.2718360424041748 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.2881973385810852, "learning_rate": 6.640498051625673e-06, "loss": 0.2144, "step": 21195, "teacher_loss": 0.2062150537967682 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.2724769115447998, "learning_rate": 6.638612303526066e-06, "loss": 0.2146, "step": 21196, "teacher_loss": 0.20814786851406097 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.31559669971466064, "learning_rate": 6.636726747128003e-06, "loss": 0.1691, "step": 21197, "teacher_loss": 0.15287527441978455 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.4456956386566162, "learning_rate": 6.634841382474721e-06, "loss": 0.2652, "step": 21198, "teacher_loss": 0.24512681365013123 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.3128053545951843, "learning_rate": 6.632956209609452e-06, "loss": 0.1949, "step": 21199, "teacher_loss": 0.1818285882472992 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.2124515324831009, "learning_rate": 6.6310712285754074e-06, "loss": 0.1407, "step": 21200, "teacher_loss": 0.13277366757392883 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.24811293184757233, "learning_rate": 6.629186439415807e-06, "loss": 0.132, "step": 21201, "teacher_loss": 0.11911018192768097 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.32954642176628113, "learning_rate": 6.62730184217387e-06, "loss": 0.1883, "step": 21202, "teacher_loss": 0.17257171869277954 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.47639939188957214, "learning_rate": 6.625417436892794e-06, "loss": 0.2801, "step": 21203, "teacher_loss": 0.25829243659973145 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.3459671437740326, "learning_rate": 6.623533223615787e-06, "loss": 0.1941, "step": 21204, "teacher_loss": 0.17720988392829895 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.4597497582435608, "learning_rate": 6.6216492023860546e-06, "loss": 0.255, "step": 21205, "teacher_loss": 0.23227080702781677 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.25717926025390625, "learning_rate": 6.619765373246782e-06, "loss": 0.2017, "step": 21206, "teacher_loss": 0.1955321580171585 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.29475924372673035, "learning_rate": 6.6178817362411634e-06, "loss": 0.1847, "step": 21207, "teacher_loss": 0.1724410355091095 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.4035297632217407, "learning_rate": 6.615998291412391e-06, "loss": 0.3622, "step": 21208, "teacher_loss": 0.3575735092163086 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.23649314045906067, "learning_rate": 6.6141150388036425e-06, "loss": 0.1641, "step": 21209, "teacher_loss": 0.15609237551689148 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.10772210359573364, "learning_rate": 6.612231978458083e-06, "loss": 0.1671, "step": 21210, "teacher_loss": 0.17372208833694458 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.6323534846305847, "learning_rate": 6.610349110418907e-06, "loss": 0.2256, "step": 21211, "teacher_loss": 0.18045005202293396 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.17334814369678497, "learning_rate": 6.60846643472927e-06, "loss": 0.1481, "step": 21212, "teacher_loss": 0.14526762068271637 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.4721274673938751, "learning_rate": 6.606583951432336e-06, "loss": 0.3164, "step": 21213, "teacher_loss": 0.2990991175174713 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.40152931213378906, "learning_rate": 6.604701660571267e-06, "loss": 0.2491, "step": 21214, "teacher_loss": 0.23215769231319427 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.6213292479515076, "learning_rate": 6.602819562189223e-06, "loss": 0.2539, "step": 21215, "teacher_loss": 0.213044673204422 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.581149160861969, "learning_rate": 6.600937656329345e-06, "loss": 0.4377, "step": 21216, "teacher_loss": 0.42180147767066956 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.7182506322860718, "learning_rate": 6.599055943034786e-06, "loss": 0.3285, "step": 21217, "teacher_loss": 0.2851560711860657 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.2575365900993347, "learning_rate": 6.59717442234869e-06, "loss": 0.192, "step": 21218, "teacher_loss": 0.18473955988883972 }, { "compression_loss": 0.0, "epoch": 3.83, "label_loss": 0.3901729881763458, "learning_rate": 6.595293094314194e-06, "loss": 0.3165, "step": 21219, "teacher_loss": 0.3083469271659851 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.12353768944740295, "learning_rate": 6.593411958974417e-06, "loss": 0.156, "step": 21220, "teacher_loss": 0.15963146090507507 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.4487302899360657, "learning_rate": 6.591531016372509e-06, "loss": 0.1903, "step": 21221, "teacher_loss": 0.16162961721420288 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.7151828408241272, "learning_rate": 6.5896502665515874e-06, "loss": 0.2405, "step": 21222, "teacher_loss": 0.18773436546325684 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.39963942766189575, "learning_rate": 6.5877697095547574e-06, "loss": 0.2176, "step": 21223, "teacher_loss": 0.1973292976617813 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.8283702731132507, "learning_rate": 6.585889345425158e-06, "loss": 0.2375, "step": 21224, "teacher_loss": 0.17180094122886658 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.7733970880508423, "learning_rate": 6.584009174205888e-06, "loss": 0.2622, "step": 21225, "teacher_loss": 0.20539385080337524 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.25096195936203003, "learning_rate": 6.582129195940051e-06, "loss": 0.188, "step": 21226, "teacher_loss": 0.18101000785827637 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.3072476387023926, "learning_rate": 6.580249410670753e-06, "loss": 0.2049, "step": 21227, "teacher_loss": 0.19352814555168152 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.6021425724029541, "learning_rate": 6.5783698184410975e-06, "loss": 0.2364, "step": 21228, "teacher_loss": 0.19581303000450134 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.4362931251525879, "learning_rate": 6.576490419294167e-06, "loss": 0.2698, "step": 21229, "teacher_loss": 0.2512766122817993 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.48105841875076294, "learning_rate": 6.574611213273056e-06, "loss": 0.2737, "step": 21230, "teacher_loss": 0.25064852833747864 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.4490678906440735, "learning_rate": 6.5727322004208565e-06, "loss": 0.3315, "step": 21231, "teacher_loss": 0.3184799253940582 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.3275010287761688, "learning_rate": 6.570853380780636e-06, "loss": 0.2409, "step": 21232, "teacher_loss": 0.23130479454994202 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.32303979992866516, "learning_rate": 6.568974754395473e-06, "loss": 0.2022, "step": 21233, "teacher_loss": 0.18879126012325287 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.5101224184036255, "learning_rate": 6.567096321308449e-06, "loss": 0.2153, "step": 21234, "teacher_loss": 0.18253692984580994 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.2579573392868042, "learning_rate": 6.565218081562616e-06, "loss": 0.1492, "step": 21235, "teacher_loss": 0.1370660811662674 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.4701187312602997, "learning_rate": 6.56334003520105e-06, "loss": 0.2028, "step": 21236, "teacher_loss": 0.1731138825416565 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.7712534666061401, "learning_rate": 6.561462182266797e-06, "loss": 0.3855, "step": 21237, "teacher_loss": 0.342593252658844 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.3943137526512146, "learning_rate": 6.559584522802917e-06, "loss": 0.1945, "step": 21238, "teacher_loss": 0.17228448390960693 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.5542356967926025, "learning_rate": 6.5577070568524625e-06, "loss": 0.2757, "step": 21239, "teacher_loss": 0.24477559328079224 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.3369714021682739, "learning_rate": 6.555829784458469e-06, "loss": 0.2125, "step": 21240, "teacher_loss": 0.19869573414325714 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.46898338198661804, "learning_rate": 6.553952705663987e-06, "loss": 0.2675, "step": 21241, "teacher_loss": 0.24510252475738525 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.30435502529144287, "learning_rate": 6.552075820512042e-06, "loss": 0.1983, "step": 21242, "teacher_loss": 0.18647930026054382 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.31451988220214844, "learning_rate": 6.55019912904567e-06, "loss": 0.2599, "step": 21243, "teacher_loss": 0.2538384199142456 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.3133608400821686, "learning_rate": 6.548322631307902e-06, "loss": 0.2555, "step": 21244, "teacher_loss": 0.2490580528974533 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.17214983701705933, "learning_rate": 6.546446327341752e-06, "loss": 0.2, "step": 21245, "teacher_loss": 0.20313741266727448 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.474582701921463, "learning_rate": 6.544570217190249e-06, "loss": 0.2494, "step": 21246, "teacher_loss": 0.2243245542049408 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.38884395360946655, "learning_rate": 6.542694300896395e-06, "loss": 0.1991, "step": 21247, "teacher_loss": 0.17803409695625305 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.3393251895904541, "learning_rate": 6.540818578503204e-06, "loss": 0.1801, "step": 21248, "teacher_loss": 0.16244950890541077 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.31911084055900574, "learning_rate": 6.538943050053685e-06, "loss": 0.1826, "step": 21249, "teacher_loss": 0.16745352745056152 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.5622643828392029, "learning_rate": 6.5370677155908296e-06, "loss": 0.2686, "step": 21250, "teacher_loss": 0.23598787188529968 }, { "epoch": 3.84, "eval_exact_match": 79.93377483443709, "eval_f1": 87.44578526535899, "step": 21250 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.3693341612815857, "learning_rate": 6.535192575157638e-06, "loss": 0.2087, "step": 21251, "teacher_loss": 0.19081524014472961 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.3323872685432434, "learning_rate": 6.5333176287971094e-06, "loss": 0.1988, "step": 21252, "teacher_loss": 0.18392062187194824 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.46378836035728455, "learning_rate": 6.531442876552216e-06, "loss": 0.2246, "step": 21253, "teacher_loss": 0.1980430632829666 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.3981441259384155, "learning_rate": 6.529568318465948e-06, "loss": 0.2749, "step": 21254, "teacher_loss": 0.2611829936504364 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.8474220633506775, "learning_rate": 6.527693954581288e-06, "loss": 0.2616, "step": 21255, "teacher_loss": 0.19652634859085083 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.4249844253063202, "learning_rate": 6.5258197849411985e-06, "loss": 0.2097, "step": 21256, "teacher_loss": 0.1857403814792633 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.3462231755256653, "learning_rate": 6.523945809588655e-06, "loss": 0.2009, "step": 21257, "teacher_loss": 0.1847608983516693 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.38004302978515625, "learning_rate": 6.522072028566626e-06, "loss": 0.2491, "step": 21258, "teacher_loss": 0.2345041185617447 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.44807034730911255, "learning_rate": 6.520198441918068e-06, "loss": 0.2683, "step": 21259, "teacher_loss": 0.24829131364822388 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.41762250661849976, "learning_rate": 6.518325049685931e-06, "loss": 0.2699, "step": 21260, "teacher_loss": 0.25345468521118164 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.8911525011062622, "learning_rate": 6.516451851913169e-06, "loss": 0.301, "step": 21261, "teacher_loss": 0.23543407022953033 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.5937632918357849, "learning_rate": 6.5145788486427385e-06, "loss": 0.1986, "step": 21262, "teacher_loss": 0.15471762418746948 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.800538957118988, "learning_rate": 6.5127060399175695e-06, "loss": 0.2953, "step": 21263, "teacher_loss": 0.23913763463497162 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.20043110847473145, "learning_rate": 6.510833425780604e-06, "loss": 0.1748, "step": 21264, "teacher_loss": 0.17196637392044067 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.4386744499206543, "learning_rate": 6.508961006274781e-06, "loss": 0.2532, "step": 21265, "teacher_loss": 0.23259669542312622 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.5904057025909424, "learning_rate": 6.50708878144302e-06, "loss": 0.2666, "step": 21266, "teacher_loss": 0.23065067827701569 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.4290776550769806, "learning_rate": 6.505216751328251e-06, "loss": 0.26, "step": 21267, "teacher_loss": 0.241220623254776 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.2511195242404938, "learning_rate": 6.503344915973396e-06, "loss": 0.1958, "step": 21268, "teacher_loss": 0.1896679401397705 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.21728770434856415, "learning_rate": 6.50147327542137e-06, "loss": 0.2148, "step": 21269, "teacher_loss": 0.2145073413848877 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.7306472063064575, "learning_rate": 6.4996018297150705e-06, "loss": 0.2975, "step": 21270, "teacher_loss": 0.2493753433227539 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.35846054553985596, "learning_rate": 6.4977305788974266e-06, "loss": 0.2118, "step": 21271, "teacher_loss": 0.1955542117357254 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.4081936478614807, "learning_rate": 6.49585952301133e-06, "loss": 0.2623, "step": 21272, "teacher_loss": 0.24612005054950714 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.47310203313827515, "learning_rate": 6.493988662099664e-06, "loss": 0.316, "step": 21273, "teacher_loss": 0.298520565032959 }, { "compression_loss": 0.0, "epoch": 3.84, "label_loss": 0.7038092613220215, "learning_rate": 6.492117996205349e-06, "loss": 0.2376, "step": 21274, "teacher_loss": 0.18574732542037964 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.3826104402542114, "learning_rate": 6.490247525371259e-06, "loss": 0.2746, "step": 21275, "teacher_loss": 0.26265203952789307 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.9430464506149292, "learning_rate": 6.488377249640277e-06, "loss": 0.2187, "step": 21276, "teacher_loss": 0.13820284605026245 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.7118151187896729, "learning_rate": 6.486507169055283e-06, "loss": 0.2432, "step": 21277, "teacher_loss": 0.19114740192890167 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 1.2927591800689697, "learning_rate": 6.4846372836591614e-06, "loss": 0.5825, "step": 21278, "teacher_loss": 0.5035779476165771 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.25623345375061035, "learning_rate": 6.48276759349477e-06, "loss": 0.1681, "step": 21279, "teacher_loss": 0.15832293033599854 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.4538447856903076, "learning_rate": 6.480898098604982e-06, "loss": 0.2756, "step": 21280, "teacher_loss": 0.2558346092700958 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.44045794010162354, "learning_rate": 6.479028799032664e-06, "loss": 0.2403, "step": 21281, "teacher_loss": 0.21802747249603271 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.29872065782546997, "learning_rate": 6.47715969482067e-06, "loss": 0.2826, "step": 21282, "teacher_loss": 0.28076332807540894 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.5400710105895996, "learning_rate": 6.47529078601184e-06, "loss": 0.2674, "step": 21283, "teacher_loss": 0.23709625005722046 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.8857929706573486, "learning_rate": 6.473422072649044e-06, "loss": 0.3003, "step": 21284, "teacher_loss": 0.2352481335401535 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.2900814414024353, "learning_rate": 6.471553554775116e-06, "loss": 0.1892, "step": 21285, "teacher_loss": 0.17800059914588928 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.7973989844322205, "learning_rate": 6.469685232432891e-06, "loss": 0.3332, "step": 21286, "teacher_loss": 0.281582236289978 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.7149227857589722, "learning_rate": 6.467817105665207e-06, "loss": 0.2395, "step": 21287, "teacher_loss": 0.18672248721122742 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.7514445781707764, "learning_rate": 6.465949174514902e-06, "loss": 0.4698, "step": 21288, "teacher_loss": 0.43846404552459717 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 1.015103816986084, "learning_rate": 6.464081439024792e-06, "loss": 0.2986, "step": 21289, "teacher_loss": 0.21903246641159058 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.4348888397216797, "learning_rate": 6.4622138992377e-06, "loss": 0.2618, "step": 21290, "teacher_loss": 0.24254050850868225 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.17433705925941467, "learning_rate": 6.460346555196453e-06, "loss": 0.1432, "step": 21291, "teacher_loss": 0.13970379531383514 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.2623525559902191, "learning_rate": 6.45847940694385e-06, "loss": 0.1488, "step": 21292, "teacher_loss": 0.13616153597831726 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.6825239658355713, "learning_rate": 6.4566124545227055e-06, "loss": 0.2716, "step": 21293, "teacher_loss": 0.22592885792255402 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.6175816655158997, "learning_rate": 6.454745697975829e-06, "loss": 0.3044, "step": 21294, "teacher_loss": 0.26958343386650085 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.247538223862648, "learning_rate": 6.4528791373460076e-06, "loss": 0.2223, "step": 21295, "teacher_loss": 0.21953842043876648 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.6099358797073364, "learning_rate": 6.451012772676047e-06, "loss": 0.2547, "step": 21296, "teacher_loss": 0.2151889204978943 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.26875975728034973, "learning_rate": 6.449146604008727e-06, "loss": 0.1818, "step": 21297, "teacher_loss": 0.17213866114616394 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.42377060651779175, "learning_rate": 6.44728063138684e-06, "loss": 0.1877, "step": 21298, "teacher_loss": 0.16150692105293274 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.45346811413764954, "learning_rate": 6.44541485485317e-06, "loss": 0.2364, "step": 21299, "teacher_loss": 0.21231895685195923 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.7622081637382507, "learning_rate": 6.443549274450485e-06, "loss": 0.2955, "step": 21300, "teacher_loss": 0.24369731545448303 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.7685917615890503, "learning_rate": 6.44168389022156e-06, "loss": 0.2936, "step": 21301, "teacher_loss": 0.24083341658115387 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.4388599097728729, "learning_rate": 6.439818702209171e-06, "loss": 0.2334, "step": 21302, "teacher_loss": 0.2106252908706665 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.6749705076217651, "learning_rate": 6.437953710456068e-06, "loss": 0.297, "step": 21303, "teacher_loss": 0.25495949387550354 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.34914135932922363, "learning_rate": 6.436088915005021e-06, "loss": 0.1789, "step": 21304, "teacher_loss": 0.15993613004684448 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.296528160572052, "learning_rate": 6.434224315898775e-06, "loss": 0.1575, "step": 21305, "teacher_loss": 0.14201316237449646 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.392334908246994, "learning_rate": 6.432359913180084e-06, "loss": 0.2573, "step": 21306, "teacher_loss": 0.24233193695545197 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.6466615796089172, "learning_rate": 6.430495706891698e-06, "loss": 0.2676, "step": 21307, "teacher_loss": 0.22547055780887604 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.3968576490879059, "learning_rate": 6.428631697076347e-06, "loss": 0.2107, "step": 21308, "teacher_loss": 0.18996086716651917 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.21296623349189758, "learning_rate": 6.42676788377678e-06, "loss": 0.1977, "step": 21309, "teacher_loss": 0.19597885012626648 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.25313711166381836, "learning_rate": 6.424904267035715e-06, "loss": 0.19, "step": 21310, "teacher_loss": 0.18298307061195374 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.3535030484199524, "learning_rate": 6.423040846895884e-06, "loss": 0.1931, "step": 21311, "teacher_loss": 0.17532047629356384 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.4789242446422577, "learning_rate": 6.421177623400019e-06, "loss": 0.293, "step": 21312, "teacher_loss": 0.272366464138031 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.3339740037918091, "learning_rate": 6.419314596590824e-06, "loss": 0.184, "step": 21313, "teacher_loss": 0.16729968786239624 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.5131508111953735, "learning_rate": 6.41745176651102e-06, "loss": 0.2186, "step": 21314, "teacher_loss": 0.18590402603149414 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.3310840129852295, "learning_rate": 6.4155891332033196e-06, "loss": 0.2974, "step": 21315, "teacher_loss": 0.29366785287857056 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.5613046884536743, "learning_rate": 6.413726696710419e-06, "loss": 0.249, "step": 21316, "teacher_loss": 0.21428847312927246 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.38214248418807983, "learning_rate": 6.41186445707502e-06, "loss": 0.2635, "step": 21317, "teacher_loss": 0.25026655197143555 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.5044153332710266, "learning_rate": 6.410002414339826e-06, "loss": 0.3077, "step": 21318, "teacher_loss": 0.2858063578605652 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.5468268990516663, "learning_rate": 6.408140568547524e-06, "loss": 0.2552, "step": 21319, "teacher_loss": 0.22278521955013275 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.6593981981277466, "learning_rate": 6.406278919740788e-06, "loss": 0.2298, "step": 21320, "teacher_loss": 0.182090163230896 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.3173362910747528, "learning_rate": 6.404417467962322e-06, "loss": 0.1603, "step": 21321, "teacher_loss": 0.14279726147651672 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.23680120706558228, "learning_rate": 6.402556213254792e-06, "loss": 0.1861, "step": 21322, "teacher_loss": 0.1804875135421753 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.8890953660011292, "learning_rate": 6.400695155660866e-06, "loss": 0.2989, "step": 21323, "teacher_loss": 0.23326990008354187 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.4791303277015686, "learning_rate": 6.3988342952232195e-06, "loss": 0.2918, "step": 21324, "teacher_loss": 0.27100297808647156 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.3956109881401062, "learning_rate": 6.396973631984519e-06, "loss": 0.2394, "step": 21325, "teacher_loss": 0.22199493646621704 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.37856486439704895, "learning_rate": 6.3951131659874145e-06, "loss": 0.2143, "step": 21326, "teacher_loss": 0.1960412859916687 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.2856766879558563, "learning_rate": 6.393252897274567e-06, "loss": 0.1705, "step": 21327, "teacher_loss": 0.15773595869541168 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.4091029763221741, "learning_rate": 6.391392825888632e-06, "loss": 0.2519, "step": 21328, "teacher_loss": 0.23444359004497528 }, { "compression_loss": 0.0, "epoch": 3.85, "label_loss": 0.3835133910179138, "learning_rate": 6.389532951872242e-06, "loss": 0.2369, "step": 21329, "teacher_loss": 0.22056493163108826 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.49710577726364136, "learning_rate": 6.387673275268048e-06, "loss": 0.3325, "step": 21330, "teacher_loss": 0.31423354148864746 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.6124380826950073, "learning_rate": 6.385813796118689e-06, "loss": 0.3479, "step": 21331, "teacher_loss": 0.31848978996276855 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.3856092095375061, "learning_rate": 6.383954514466793e-06, "loss": 0.2487, "step": 21332, "teacher_loss": 0.23353232443332672 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.270458459854126, "learning_rate": 6.382095430354978e-06, "loss": 0.1988, "step": 21333, "teacher_loss": 0.19085386395454407 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.4799838960170746, "learning_rate": 6.380236543825887e-06, "loss": 0.2807, "step": 21334, "teacher_loss": 0.2585732936859131 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.6220208406448364, "learning_rate": 6.3783778549221285e-06, "loss": 0.2217, "step": 21335, "teacher_loss": 0.17724472284317017 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.45883625745773315, "learning_rate": 6.376519363686312e-06, "loss": 0.2282, "step": 21336, "teacher_loss": 0.20257119834423065 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.2728968858718872, "learning_rate": 6.374661070161051e-06, "loss": 0.1736, "step": 21337, "teacher_loss": 0.1626073569059372 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.41584211587905884, "learning_rate": 6.3728029743889586e-06, "loss": 0.2366, "step": 21338, "teacher_loss": 0.21667324006557465 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.4721145033836365, "learning_rate": 6.370945076412622e-06, "loss": 0.2967, "step": 21339, "teacher_loss": 0.2772143483161926 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.44651615619659424, "learning_rate": 6.3690873762746425e-06, "loss": 0.2668, "step": 21340, "teacher_loss": 0.24687179923057556 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.37927573919296265, "learning_rate": 6.3672298740176195e-06, "loss": 0.2322, "step": 21341, "teacher_loss": 0.2158384472131729 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.654780924320221, "learning_rate": 6.365372569684126e-06, "loss": 0.2321, "step": 21342, "teacher_loss": 0.18510353565216064 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.3908180892467499, "learning_rate": 6.363515463316752e-06, "loss": 0.1835, "step": 21343, "teacher_loss": 0.16049689054489136 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.3326825499534607, "learning_rate": 6.361658554958082e-06, "loss": 0.2838, "step": 21344, "teacher_loss": 0.27838772535324097 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.16537687182426453, "learning_rate": 6.359801844650681e-06, "loss": 0.1517, "step": 21345, "teacher_loss": 0.15020406246185303 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.16807019710540771, "learning_rate": 6.357945332437108e-06, "loss": 0.1459, "step": 21346, "teacher_loss": 0.14348533749580383 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.7297033071517944, "learning_rate": 6.35608901835995e-06, "loss": 0.3378, "step": 21347, "teacher_loss": 0.29423487186431885 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.8914479613304138, "learning_rate": 6.354232902461754e-06, "loss": 0.2454, "step": 21348, "teacher_loss": 0.17363935708999634 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.350069522857666, "learning_rate": 6.352376984785072e-06, "loss": 0.1419, "step": 21349, "teacher_loss": 0.11879783868789673 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.30284547805786133, "learning_rate": 6.350521265372458e-06, "loss": 0.1861, "step": 21350, "teacher_loss": 0.1731119453907013 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.5110034346580505, "learning_rate": 6.348665744266466e-06, "loss": 0.3736, "step": 21351, "teacher_loss": 0.35834556818008423 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.42597055435180664, "learning_rate": 6.346810421509623e-06, "loss": 0.3195, "step": 21352, "teacher_loss": 0.3076527714729309 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.7577675580978394, "learning_rate": 6.3449552971444735e-06, "loss": 0.2453, "step": 21353, "teacher_loss": 0.18836981058120728 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.24229231476783752, "learning_rate": 6.343100371213555e-06, "loss": 0.1843, "step": 21354, "teacher_loss": 0.17787739634513855 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.8748788237571716, "learning_rate": 6.341245643759386e-06, "loss": 0.3982, "step": 21355, "teacher_loss": 0.34520116448402405 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.6479318737983704, "learning_rate": 6.339391114824494e-06, "loss": 0.275, "step": 21356, "teacher_loss": 0.23356686532497406 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.34384024143218994, "learning_rate": 6.337536784451403e-06, "loss": 0.1711, "step": 21357, "teacher_loss": 0.15194770693778992 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.5623119473457336, "learning_rate": 6.335682652682616e-06, "loss": 0.1986, "step": 21358, "teacher_loss": 0.1581922471523285 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.502917468547821, "learning_rate": 6.333828719560655e-06, "loss": 0.1933, "step": 21359, "teacher_loss": 0.15886220335960388 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.2588789165019989, "learning_rate": 6.331974985128014e-06, "loss": 0.1729, "step": 21360, "teacher_loss": 0.16331566870212555 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.5945191383361816, "learning_rate": 6.330121449427198e-06, "loss": 0.2449, "step": 21361, "teacher_loss": 0.2061018943786621 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.3617771565914154, "learning_rate": 6.328268112500709e-06, "loss": 0.1848, "step": 21362, "teacher_loss": 0.16519123315811157 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.7821840047836304, "learning_rate": 6.326414974391026e-06, "loss": 0.2769, "step": 21363, "teacher_loss": 0.22077956795692444 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.43017128109931946, "learning_rate": 6.324562035140651e-06, "loss": 0.1804, "step": 21364, "teacher_loss": 0.1526479870080948 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.3873622715473175, "learning_rate": 6.322709294792051e-06, "loss": 0.184, "step": 21365, "teacher_loss": 0.16139712929725647 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.5269544720649719, "learning_rate": 6.320856753387712e-06, "loss": 0.2897, "step": 21366, "teacher_loss": 0.2633237838745117 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.25156816840171814, "learning_rate": 6.319004410970112e-06, "loss": 0.1453, "step": 21367, "teacher_loss": 0.13350479304790497 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.7302528023719788, "learning_rate": 6.317152267581706e-06, "loss": 0.6018, "step": 21368, "teacher_loss": 0.5875493288040161 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.3036249279975891, "learning_rate": 6.315300323264974e-06, "loss": 0.1993, "step": 21369, "teacher_loss": 0.18773885071277618 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.45646196603775024, "learning_rate": 6.31344857806236e-06, "loss": 0.199, "step": 21370, "teacher_loss": 0.17034044861793518 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.5490099191665649, "learning_rate": 6.311597032016328e-06, "loss": 0.2668, "step": 21371, "teacher_loss": 0.23541852831840515 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.43646240234375, "learning_rate": 6.3097456851693316e-06, "loss": 0.21, "step": 21372, "teacher_loss": 0.18478283286094666 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.7461293935775757, "learning_rate": 6.307894537563807e-06, "loss": 0.2903, "step": 21373, "teacher_loss": 0.23969972133636475 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.7128242254257202, "learning_rate": 6.3060435892422005e-06, "loss": 0.2532, "step": 21374, "teacher_loss": 0.20215964317321777 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.26486867666244507, "learning_rate": 6.3041928402469525e-06, "loss": 0.2536, "step": 21375, "teacher_loss": 0.25234732031822205 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.2693418860435486, "learning_rate": 6.302342290620488e-06, "loss": 0.2109, "step": 21376, "teacher_loss": 0.2043648064136505 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.2843697667121887, "learning_rate": 6.3004919404052376e-06, "loss": 0.2217, "step": 21377, "teacher_loss": 0.2147739827632904 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.5605969429016113, "learning_rate": 6.298641789643629e-06, "loss": 0.3499, "step": 21378, "teacher_loss": 0.3265239894390106 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.20030370354652405, "learning_rate": 6.29679183837807e-06, "loss": 0.1543, "step": 21379, "teacher_loss": 0.1492004096508026 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.4723861813545227, "learning_rate": 6.294942086650981e-06, "loss": 0.235, "step": 21380, "teacher_loss": 0.20859014987945557 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.6509625315666199, "learning_rate": 6.2930925345047784e-06, "loss": 0.2998, "step": 21381, "teacher_loss": 0.2607758045196533 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.23299269378185272, "learning_rate": 6.291243181981857e-06, "loss": 0.2184, "step": 21382, "teacher_loss": 0.21673761308193207 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.10066620260477066, "learning_rate": 6.289394029124608e-06, "loss": 0.2047, "step": 21383, "teacher_loss": 0.2163110375404358 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.5919342041015625, "learning_rate": 6.287545075975451e-06, "loss": 0.2081, "step": 21384, "teacher_loss": 0.16545751690864563 }, { "compression_loss": 0.0, "epoch": 3.86, "label_loss": 0.6712451577186584, "learning_rate": 6.285696322576764e-06, "loss": 0.2244, "step": 21385, "teacher_loss": 0.17476502060890198 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.34813517332077026, "learning_rate": 6.283847768970927e-06, "loss": 0.1884, "step": 21386, "teacher_loss": 0.17064355313777924 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.23295201361179352, "learning_rate": 6.281999415200329e-06, "loss": 0.231, "step": 21387, "teacher_loss": 0.23081165552139282 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.34332409501075745, "learning_rate": 6.280151261307351e-06, "loss": 0.2099, "step": 21388, "teacher_loss": 0.19511368870735168 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.4440908133983612, "learning_rate": 6.278303307334357e-06, "loss": 0.1991, "step": 21389, "teacher_loss": 0.1718633770942688 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.24108481407165527, "learning_rate": 6.276455553323717e-06, "loss": 0.1692, "step": 21390, "teacher_loss": 0.16126412153244019 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.43409717082977295, "learning_rate": 6.274607999317804e-06, "loss": 0.2205, "step": 21391, "teacher_loss": 0.19671514630317688 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5630717277526855, "learning_rate": 6.27276064535897e-06, "loss": 0.229, "step": 21392, "teacher_loss": 0.19190245866775513 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.24490173161029816, "learning_rate": 6.270913491489556e-06, "loss": 0.2057, "step": 21393, "teacher_loss": 0.2013475000858307 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.4531782865524292, "learning_rate": 6.269066537751936e-06, "loss": 0.5419, "step": 21394, "teacher_loss": 0.5517560243606567 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.2280678004026413, "learning_rate": 6.267219784188443e-06, "loss": 0.1915, "step": 21395, "teacher_loss": 0.18748915195465088 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.9340258836746216, "learning_rate": 6.265373230841409e-06, "loss": 0.2911, "step": 21396, "teacher_loss": 0.21962030231952667 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5182957649230957, "learning_rate": 6.2635268777531896e-06, "loss": 0.4731, "step": 21397, "teacher_loss": 0.4681251049041748 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.641831636428833, "learning_rate": 6.261680724966104e-06, "loss": 0.2431, "step": 21398, "teacher_loss": 0.1987903118133545 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.3692972660064697, "learning_rate": 6.259834772522477e-06, "loss": 0.1932, "step": 21399, "teacher_loss": 0.17364542186260223 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.32787269353866577, "learning_rate": 6.257989020464633e-06, "loss": 0.1981, "step": 21400, "teacher_loss": 0.18366721272468567 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.2867777943611145, "learning_rate": 6.2561434688348965e-06, "loss": 0.2023, "step": 21401, "teacher_loss": 0.19293692708015442 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.3665568232536316, "learning_rate": 6.25429811767557e-06, "loss": 0.2142, "step": 21402, "teacher_loss": 0.19722774624824524 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.6526110768318176, "learning_rate": 6.252452967028965e-06, "loss": 0.3142, "step": 21403, "teacher_loss": 0.27665427327156067 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.16048680245876312, "learning_rate": 6.250608016937394e-06, "loss": 0.1532, "step": 21404, "teacher_loss": 0.15236012637615204 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.41316312551498413, "learning_rate": 6.248763267443144e-06, "loss": 0.2407, "step": 21405, "teacher_loss": 0.22158949077129364 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.49262863397598267, "learning_rate": 6.246918718588513e-06, "loss": 0.2785, "step": 21406, "teacher_loss": 0.2547357678413391 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5956610441207886, "learning_rate": 6.245074370415799e-06, "loss": 0.2521, "step": 21407, "teacher_loss": 0.213896244764328 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.3322790563106537, "learning_rate": 6.243230222967282e-06, "loss": 0.2078, "step": 21408, "teacher_loss": 0.19392454624176025 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5611691474914551, "learning_rate": 6.241386276285236e-06, "loss": 0.274, "step": 21409, "teacher_loss": 0.24212554097175598 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.7483846545219421, "learning_rate": 6.239542530411941e-06, "loss": 0.2189, "step": 21410, "teacher_loss": 0.16011865437030792 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5295722484588623, "learning_rate": 6.237698985389678e-06, "loss": 0.2254, "step": 21411, "teacher_loss": 0.19156429171562195 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.3596239686012268, "learning_rate": 6.2358556412607e-06, "loss": 0.1845, "step": 21412, "teacher_loss": 0.16499629616737366 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.10400324314832687, "learning_rate": 6.2340124980672775e-06, "loss": 0.129, "step": 21413, "teacher_loss": 0.13176730275154114 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.3605911433696747, "learning_rate": 6.2321695558516705e-06, "loss": 0.163, "step": 21414, "teacher_loss": 0.1410941481590271 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.21043020486831665, "learning_rate": 6.230326814656124e-06, "loss": 0.1989, "step": 21415, "teacher_loss": 0.19761121273040771 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5109516382217407, "learning_rate": 6.22848427452289e-06, "loss": 0.2639, "step": 21416, "teacher_loss": 0.23645088076591492 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.808468222618103, "learning_rate": 6.226641935494218e-06, "loss": 0.3006, "step": 21417, "teacher_loss": 0.24420185387134552 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.48544827103614807, "learning_rate": 6.22479979761234e-06, "loss": 0.2029, "step": 21418, "teacher_loss": 0.17149657011032104 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5281727313995361, "learning_rate": 6.222957860919496e-06, "loss": 0.2252, "step": 21419, "teacher_loss": 0.19158722460269928 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.4462193250656128, "learning_rate": 6.221116125457907e-06, "loss": 0.2328, "step": 21420, "teacher_loss": 0.20905530452728271 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.17726922035217285, "learning_rate": 6.219274591269807e-06, "loss": 0.1494, "step": 21421, "teacher_loss": 0.14630496501922607 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.35523146390914917, "learning_rate": 6.217433258397419e-06, "loss": 0.1905, "step": 21422, "teacher_loss": 0.17218877375125885 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.45175766944885254, "learning_rate": 6.21559212688295e-06, "loss": 0.2733, "step": 21423, "teacher_loss": 0.2534489035606384 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.41182851791381836, "learning_rate": 6.2137511967686164e-06, "loss": 0.2058, "step": 21424, "teacher_loss": 0.18291160464286804 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.3489312529563904, "learning_rate": 6.211910468096631e-06, "loss": 0.1691, "step": 21425, "teacher_loss": 0.14910775423049927 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5681257247924805, "learning_rate": 6.210069940909185e-06, "loss": 0.2148, "step": 21426, "teacher_loss": 0.17553642392158508 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5742425918579102, "learning_rate": 6.208229615248486e-06, "loss": 0.2861, "step": 21427, "teacher_loss": 0.25411492586135864 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.19894939661026, "learning_rate": 6.2063894911567185e-06, "loss": 0.1735, "step": 21428, "teacher_loss": 0.17070996761322021 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.2264716625213623, "learning_rate": 6.204549568676075e-06, "loss": 0.1985, "step": 21429, "teacher_loss": 0.19544637203216553 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.7795950174331665, "learning_rate": 6.202709847848746e-06, "loss": 0.26, "step": 21430, "teacher_loss": 0.20230337977409363 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.4625800848007202, "learning_rate": 6.2008703287168975e-06, "loss": 0.2143, "step": 21431, "teacher_loss": 0.18675881624221802 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.25561806559562683, "learning_rate": 6.199031011322718e-06, "loss": 0.188, "step": 21432, "teacher_loss": 0.18051858246326447 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.29560545086860657, "learning_rate": 6.197191895708364e-06, "loss": 0.1676, "step": 21433, "teacher_loss": 0.15337711572647095 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5510247945785522, "learning_rate": 6.195352981916009e-06, "loss": 0.2682, "step": 21434, "teacher_loss": 0.23672299087047577 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.36915671825408936, "learning_rate": 6.1935142699878175e-06, "loss": 0.2312, "step": 21435, "teacher_loss": 0.2158956527709961 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.4329996109008789, "learning_rate": 6.191675759965935e-06, "loss": 0.2449, "step": 21436, "teacher_loss": 0.22402054071426392 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.3481356203556061, "learning_rate": 6.189837451892519e-06, "loss": 0.1868, "step": 21437, "teacher_loss": 0.16886171698570251 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.3584153652191162, "learning_rate": 6.1879993458097215e-06, "loss": 0.1898, "step": 21438, "teacher_loss": 0.17111417651176453 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.5717709064483643, "learning_rate": 6.186161441759672e-06, "loss": 0.2785, "step": 21439, "teacher_loss": 0.24594587087631226 }, { "compression_loss": 0.0, "epoch": 3.87, "label_loss": 0.6559672355651855, "learning_rate": 6.184323739784517e-06, "loss": 0.3762, "step": 21440, "teacher_loss": 0.34516775608062744 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.1212519109249115, "learning_rate": 6.182486239926393e-06, "loss": 0.1736, "step": 21441, "teacher_loss": 0.17938290536403656 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.16818839311599731, "learning_rate": 6.180648942227424e-06, "loss": 0.1602, "step": 21442, "teacher_loss": 0.1592603325843811 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.42899173498153687, "learning_rate": 6.17881184672972e-06, "loss": 0.2292, "step": 21443, "teacher_loss": 0.20702405273914337 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.274258553981781, "learning_rate": 6.1769749534754265e-06, "loss": 0.2151, "step": 21444, "teacher_loss": 0.20849429070949554 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.2547885477542877, "learning_rate": 6.175138262506643e-06, "loss": 0.1497, "step": 21445, "teacher_loss": 0.13806220889091492 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.18894247710704803, "learning_rate": 6.17330177386547e-06, "loss": 0.2079, "step": 21446, "teacher_loss": 0.21003150939941406 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.48209500312805176, "learning_rate": 6.171465487594035e-06, "loss": 0.2056, "step": 21447, "teacher_loss": 0.17487266659736633 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.09564375132322311, "learning_rate": 6.169629403734426e-06, "loss": 0.203, "step": 21448, "teacher_loss": 0.21490024030208588 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.7101892232894897, "learning_rate": 6.1677935223287365e-06, "loss": 0.3467, "step": 21449, "teacher_loss": 0.3063589930534363 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.7390053868293762, "learning_rate": 6.16595784341906e-06, "loss": 0.4626, "step": 21450, "teacher_loss": 0.43187761306762695 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.6143171787261963, "learning_rate": 6.164122367047489e-06, "loss": 0.2819, "step": 21451, "teacher_loss": 0.24492789804935455 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.27114546298980713, "learning_rate": 6.162287093256096e-06, "loss": 0.1589, "step": 21452, "teacher_loss": 0.14645709097385406 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.24327969551086426, "learning_rate": 6.160452022086962e-06, "loss": 0.2065, "step": 21453, "teacher_loss": 0.20244361460208893 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.7598296999931335, "learning_rate": 6.158617153582169e-06, "loss": 0.476, "step": 21454, "teacher_loss": 0.4445103406906128 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.3083192706108093, "learning_rate": 6.156782487783775e-06, "loss": 0.1791, "step": 21455, "teacher_loss": 0.16478794813156128 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.16763943433761597, "learning_rate": 6.1549480247338345e-06, "loss": 0.2516, "step": 21456, "teacher_loss": 0.260894775390625 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.26820459961891174, "learning_rate": 6.153113764474428e-06, "loss": 0.1483, "step": 21457, "teacher_loss": 0.13497145473957062 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.08898498862981796, "learning_rate": 6.151279707047597e-06, "loss": 0.1167, "step": 21458, "teacher_loss": 0.11980529129505157 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.8061720728874207, "learning_rate": 6.149445852495389e-06, "loss": 0.2652, "step": 21459, "teacher_loss": 0.20512481033802032 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.4401913583278656, "learning_rate": 6.147612200859852e-06, "loss": 0.2096, "step": 21460, "teacher_loss": 0.1840098798274994 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.5051714777946472, "learning_rate": 6.145778752183031e-06, "loss": 0.2232, "step": 21461, "teacher_loss": 0.1918383538722992 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.4408552050590515, "learning_rate": 6.143945506506952e-06, "loss": 0.1817, "step": 21462, "teacher_loss": 0.15292873978614807 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.6029576659202576, "learning_rate": 6.14211246387365e-06, "loss": 0.2337, "step": 21463, "teacher_loss": 0.19271501898765564 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.20352861285209656, "learning_rate": 6.14027962432516e-06, "loss": 0.2357, "step": 21464, "teacher_loss": 0.2392827570438385 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 1.1519109010696411, "learning_rate": 6.1384469879034875e-06, "loss": 0.3807, "step": 21465, "teacher_loss": 0.29503172636032104 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.7840806245803833, "learning_rate": 6.1366145546506576e-06, "loss": 0.406, "step": 21466, "teacher_loss": 0.36403924226760864 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.9405517578125, "learning_rate": 6.134782324608687e-06, "loss": 0.2789, "step": 21467, "teacher_loss": 0.2053447663784027 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.9230841398239136, "learning_rate": 6.13295029781958e-06, "loss": 0.497, "step": 21468, "teacher_loss": 0.44969502091407776 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.23191297054290771, "learning_rate": 6.131118474325326e-06, "loss": 0.2306, "step": 21469, "teacher_loss": 0.2304980754852295 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.6510720252990723, "learning_rate": 6.129286854167945e-06, "loss": 0.2657, "step": 21470, "teacher_loss": 0.22289106249809265 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.44027066230773926, "learning_rate": 6.127455437389421e-06, "loss": 0.2931, "step": 21471, "teacher_loss": 0.2767890691757202 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.27611273527145386, "learning_rate": 6.125624224031739e-06, "loss": 0.158, "step": 21472, "teacher_loss": 0.14486472308635712 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.249456524848938, "learning_rate": 6.1237932141368855e-06, "loss": 0.2192, "step": 21473, "teacher_loss": 0.21579775214195251 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.19588732719421387, "learning_rate": 6.1219624077468464e-06, "loss": 0.1868, "step": 21474, "teacher_loss": 0.18575289845466614 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.44268372654914856, "learning_rate": 6.120131804903587e-06, "loss": 0.2231, "step": 21475, "teacher_loss": 0.1986725628376007 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.8620580434799194, "learning_rate": 6.118301405649082e-06, "loss": 0.4099, "step": 21476, "teacher_loss": 0.35969996452331543 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.39401498436927795, "learning_rate": 6.116471210025302e-06, "loss": 0.2492, "step": 21477, "teacher_loss": 0.2331630140542984 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.5513173341751099, "learning_rate": 6.114641218074199e-06, "loss": 0.4222, "step": 21478, "teacher_loss": 0.40780752897262573 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.39853644371032715, "learning_rate": 6.112811429837732e-06, "loss": 0.2557, "step": 21479, "teacher_loss": 0.23978257179260254 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.1522049605846405, "learning_rate": 6.110981845357858e-06, "loss": 0.1726, "step": 21480, "teacher_loss": 0.17486245930194855 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.7479170560836792, "learning_rate": 6.109152464676515e-06, "loss": 0.2199, "step": 21481, "teacher_loss": 0.1611787974834442 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.23468512296676636, "learning_rate": 6.107323287835656e-06, "loss": 0.1747, "step": 21482, "teacher_loss": 0.16800400614738464 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.6933165192604065, "learning_rate": 6.1054943148772065e-06, "loss": 0.3515, "step": 21483, "teacher_loss": 0.31354331970214844 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.5207126140594482, "learning_rate": 6.1036655458431065e-06, "loss": 0.2861, "step": 21484, "teacher_loss": 0.2600601315498352 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.3583377003669739, "learning_rate": 6.101836980775286e-06, "loss": 0.2024, "step": 21485, "teacher_loss": 0.1850317120552063 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.3899415135383606, "learning_rate": 6.100008619715661e-06, "loss": 0.2073, "step": 21486, "teacher_loss": 0.1869933009147644 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.621436595916748, "learning_rate": 6.098180462706155e-06, "loss": 0.4256, "step": 21487, "teacher_loss": 0.4038439393043518 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.5359862446784973, "learning_rate": 6.096352509788688e-06, "loss": 0.2321, "step": 21488, "teacher_loss": 0.1983642280101776 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.6086143851280212, "learning_rate": 6.094524761005156e-06, "loss": 0.238, "step": 21489, "teacher_loss": 0.19681468605995178 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.5277094841003418, "learning_rate": 6.092697216397478e-06, "loss": 0.3022, "step": 21490, "teacher_loss": 0.2771047055721283 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.5472970008850098, "learning_rate": 6.090869876007543e-06, "loss": 0.3557, "step": 21491, "teacher_loss": 0.33446234464645386 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.3187077045440674, "learning_rate": 6.089042739877255e-06, "loss": 0.1735, "step": 21492, "teacher_loss": 0.15733088552951813 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.5366401076316833, "learning_rate": 6.087215808048496e-06, "loss": 0.247, "step": 21493, "teacher_loss": 0.21485620737075806 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.21971789002418518, "learning_rate": 6.085389080563155e-06, "loss": 0.2024, "step": 21494, "teacher_loss": 0.20045116543769836 }, { "compression_loss": 0.0, "epoch": 3.88, "label_loss": 0.5270112156867981, "learning_rate": 6.083562557463121e-06, "loss": 0.2658, "step": 21495, "teacher_loss": 0.23676815629005432 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.22443246841430664, "learning_rate": 6.08173623879026e-06, "loss": 0.1647, "step": 21496, "teacher_loss": 0.15806543827056885 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.388766884803772, "learning_rate": 6.079910124586448e-06, "loss": 0.224, "step": 21497, "teacher_loss": 0.20573511719703674 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.26085931062698364, "learning_rate": 6.078084214893559e-06, "loss": 0.2179, "step": 21498, "teacher_loss": 0.2131008505821228 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.6068795323371887, "learning_rate": 6.076258509753444e-06, "loss": 0.2429, "step": 21499, "teacher_loss": 0.20245599746704102 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.17952454090118408, "learning_rate": 6.074433009207967e-06, "loss": 0.2165, "step": 21500, "teacher_loss": 0.22061416506767273 }, { "epoch": 3.89, "eval_exact_match": 80.60548722800378, "eval_f1": 87.91853036863203, "step": 21500 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.17898796498775482, "learning_rate": 6.0726077132989844e-06, "loss": 0.1551, "step": 21501, "teacher_loss": 0.1524914801120758 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.6426769495010376, "learning_rate": 6.070782622068338e-06, "loss": 0.2848, "step": 21502, "teacher_loss": 0.24501320719718933 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.5454964637756348, "learning_rate": 6.068957735557873e-06, "loss": 0.252, "step": 21503, "teacher_loss": 0.21938394010066986 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.5013855695724487, "learning_rate": 6.0671330538094385e-06, "loss": 0.2894, "step": 21504, "teacher_loss": 0.2657957077026367 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.8262927532196045, "learning_rate": 6.065308576864859e-06, "loss": 0.3471, "step": 21505, "teacher_loss": 0.29384446144104004 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.46700114011764526, "learning_rate": 6.063484304765956e-06, "loss": 0.2606, "step": 21506, "teacher_loss": 0.23763345181941986 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 1.2278504371643066, "learning_rate": 6.061660237554576e-06, "loss": 0.309, "step": 21507, "teacher_loss": 0.206892192363739 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.4533717632293701, "learning_rate": 6.059836375272526e-06, "loss": 0.1904, "step": 21508, "teacher_loss": 0.16122302412986755 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.3411828875541687, "learning_rate": 6.058012717961621e-06, "loss": 0.2128, "step": 21509, "teacher_loss": 0.1985655128955841 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.574245810508728, "learning_rate": 6.056189265663674e-06, "loss": 0.2399, "step": 21510, "teacher_loss": 0.20279093086719513 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.19872868061065674, "learning_rate": 6.054366018420499e-06, "loss": 0.2366, "step": 21511, "teacher_loss": 0.24076414108276367 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.46273842453956604, "learning_rate": 6.052542976273882e-06, "loss": 0.2612, "step": 21512, "teacher_loss": 0.23883458971977234 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.4668371081352234, "learning_rate": 6.050720139265631e-06, "loss": 0.2164, "step": 21513, "teacher_loss": 0.18861359357833862 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.690606951713562, "learning_rate": 6.048897507437543e-06, "loss": 0.3609, "step": 21514, "teacher_loss": 0.3242151141166687 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.6028565168380737, "learning_rate": 6.047075080831395e-06, "loss": 0.2307, "step": 21515, "teacher_loss": 0.18936854600906372 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.19261498749256134, "learning_rate": 6.045252859488965e-06, "loss": 0.1777, "step": 21516, "teacher_loss": 0.17602989077568054 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.43819937109947205, "learning_rate": 6.043430843452049e-06, "loss": 0.1818, "step": 21517, "teacher_loss": 0.1532726287841797 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.3872084617614746, "learning_rate": 6.04160903276241e-06, "loss": 0.2306, "step": 21518, "teacher_loss": 0.21323958039283752 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.4040103554725647, "learning_rate": 6.039787427461806e-06, "loss": 0.203, "step": 21519, "teacher_loss": 0.180661141872406 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.5683060884475708, "learning_rate": 6.0379660275920245e-06, "loss": 0.6271, "step": 21520, "teacher_loss": 0.6335958242416382 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.3333659768104553, "learning_rate": 6.0361448331948125e-06, "loss": 0.1466, "step": 21521, "teacher_loss": 0.1258769929409027 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.19274571537971497, "learning_rate": 6.034323844311917e-06, "loss": 0.1551, "step": 21522, "teacher_loss": 0.15088623762130737 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.5214722752571106, "learning_rate": 6.0325030609850976e-06, "loss": 0.2909, "step": 21523, "teacher_loss": 0.2653363347053528 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.40102940797805786, "learning_rate": 6.030682483256101e-06, "loss": 0.3611, "step": 21524, "teacher_loss": 0.35662397742271423 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.38145458698272705, "learning_rate": 6.028862111166657e-06, "loss": 0.213, "step": 21525, "teacher_loss": 0.1943264603614807 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.6443749666213989, "learning_rate": 6.027041944758511e-06, "loss": 0.2827, "step": 21526, "teacher_loss": 0.2424602508544922 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.6635802984237671, "learning_rate": 6.025221984073394e-06, "loss": 0.3144, "step": 21527, "teacher_loss": 0.27563968300819397 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.4399562478065491, "learning_rate": 6.023402229153024e-06, "loss": 0.2411, "step": 21528, "teacher_loss": 0.2189868688583374 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.4511321187019348, "learning_rate": 6.02158268003913e-06, "loss": 0.2645, "step": 21529, "teacher_loss": 0.24379494786262512 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.38995757699012756, "learning_rate": 6.019763336773429e-06, "loss": 0.225, "step": 21530, "teacher_loss": 0.20668691396713257 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.21348296105861664, "learning_rate": 6.0179441993976315e-06, "loss": 0.1601, "step": 21531, "teacher_loss": 0.15411344170570374 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.6203569173812866, "learning_rate": 6.016125267953439e-06, "loss": 0.2786, "step": 21532, "teacher_loss": 0.24059879779815674 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.28592991828918457, "learning_rate": 6.0143065424825585e-06, "loss": 0.2428, "step": 21533, "teacher_loss": 0.2380424439907074 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.31097495555877686, "learning_rate": 6.012488023026694e-06, "loss": 0.1564, "step": 21534, "teacher_loss": 0.1391746997833252 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.79007887840271, "learning_rate": 6.010669709627529e-06, "loss": 0.5178, "step": 21535, "teacher_loss": 0.4875562787055969 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.3001028299331665, "learning_rate": 6.0088516023267546e-06, "loss": 0.2128, "step": 21536, "teacher_loss": 0.20311081409454346 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.2174590826034546, "learning_rate": 6.007033701166061e-06, "loss": 0.2191, "step": 21537, "teacher_loss": 0.21932193636894226 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.36982911825180054, "learning_rate": 6.005216006187118e-06, "loss": 0.2851, "step": 21538, "teacher_loss": 0.27572688460350037 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.594820499420166, "learning_rate": 6.003398517431604e-06, "loss": 0.2289, "step": 21539, "teacher_loss": 0.18825441598892212 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.5105787515640259, "learning_rate": 6.001581234941194e-06, "loss": 0.2157, "step": 21540, "teacher_loss": 0.18293002247810364 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.23383063077926636, "learning_rate": 5.999764158757543e-06, "loss": 0.1922, "step": 21541, "teacher_loss": 0.18755602836608887 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.291191041469574, "learning_rate": 5.99794728892232e-06, "loss": 0.2323, "step": 21542, "teacher_loss": 0.2257024198770523 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.7122907638549805, "learning_rate": 5.996130625477171e-06, "loss": 0.5777, "step": 21543, "teacher_loss": 0.5627931356430054 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.31656503677368164, "learning_rate": 5.994314168463752e-06, "loss": 0.1761, "step": 21544, "teacher_loss": 0.16045910120010376 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.3985392451286316, "learning_rate": 5.992497917923715e-06, "loss": 0.3391, "step": 21545, "teacher_loss": 0.33246538043022156 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.576453685760498, "learning_rate": 5.990681873898688e-06, "loss": 0.3614, "step": 21546, "teacher_loss": 0.3374585509300232 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.14430472254753113, "learning_rate": 5.988866036430314e-06, "loss": 0.1347, "step": 21547, "teacher_loss": 0.133660227060318 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.3273911476135254, "learning_rate": 5.987050405560233e-06, "loss": 0.1659, "step": 21548, "teacher_loss": 0.14794358611106873 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.22120609879493713, "learning_rate": 5.985234981330056e-06, "loss": 0.1605, "step": 21549, "teacher_loss": 0.1537349820137024 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.26090455055236816, "learning_rate": 5.983419763781415e-06, "loss": 0.2047, "step": 21550, "teacher_loss": 0.19847427308559418 }, { "compression_loss": 0.0, "epoch": 3.89, "label_loss": 0.34017497301101685, "learning_rate": 5.981604752955928e-06, "loss": 0.2317, "step": 21551, "teacher_loss": 0.21969184279441833 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.4736497402191162, "learning_rate": 5.979789948895203e-06, "loss": 0.2251, "step": 21552, "teacher_loss": 0.19744724035263062 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.6700502634048462, "learning_rate": 5.977975351640856e-06, "loss": 0.2863, "step": 21553, "teacher_loss": 0.2436957210302353 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.4106016755104065, "learning_rate": 5.976160961234477e-06, "loss": 0.2949, "step": 21554, "teacher_loss": 0.28209900856018066 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.2841656804084778, "learning_rate": 5.9743467777176795e-06, "loss": 0.429, "step": 21555, "teacher_loss": 0.44505560398101807 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.8621479868888855, "learning_rate": 5.972532801132044e-06, "loss": 0.2728, "step": 21556, "teacher_loss": 0.2072819173336029 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.36448854207992554, "learning_rate": 5.970719031519167e-06, "loss": 0.2757, "step": 21557, "teacher_loss": 0.2658294141292572 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.2556465268135071, "learning_rate": 5.968905468920635e-06, "loss": 0.2029, "step": 21558, "teacher_loss": 0.19705471396446228 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.5616415739059448, "learning_rate": 5.9670921133780195e-06, "loss": 0.1884, "step": 21559, "teacher_loss": 0.14696058630943298 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.30442333221435547, "learning_rate": 5.9652789649329e-06, "loss": 0.2593, "step": 21560, "teacher_loss": 0.2543134093284607 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.3682350516319275, "learning_rate": 5.963466023626851e-06, "loss": 0.1647, "step": 21561, "teacher_loss": 0.142124742269516 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.2007249891757965, "learning_rate": 5.9616532895014286e-06, "loss": 0.2226, "step": 21562, "teacher_loss": 0.2250109314918518 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.21461153030395508, "learning_rate": 5.959840762598197e-06, "loss": 0.2866, "step": 21563, "teacher_loss": 0.2945913076400757 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.6896828413009644, "learning_rate": 5.9580284429587185e-06, "loss": 0.2448, "step": 21564, "teacher_loss": 0.1953839361667633 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.39658069610595703, "learning_rate": 5.956216330624539e-06, "loss": 0.2461, "step": 21565, "teacher_loss": 0.22942045331001282 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.6749616265296936, "learning_rate": 5.954404425637194e-06, "loss": 0.23, "step": 21566, "teacher_loss": 0.1805938184261322 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.1674351692199707, "learning_rate": 5.9525927280382455e-06, "loss": 0.1839, "step": 21567, "teacher_loss": 0.18576158583164215 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.9018399715423584, "learning_rate": 5.950781237869219e-06, "loss": 0.5499, "step": 21568, "teacher_loss": 0.5108265280723572 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.30842745304107666, "learning_rate": 5.948969955171639e-06, "loss": 0.1817, "step": 21569, "teacher_loss": 0.16763754189014435 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 1.2110718488693237, "learning_rate": 5.947158879987052e-06, "loss": 0.3147, "step": 21570, "teacher_loss": 0.21513602137565613 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.24220135807991028, "learning_rate": 5.945348012356969e-06, "loss": 0.1807, "step": 21571, "teacher_loss": 0.1738283485174179 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.3856716752052307, "learning_rate": 5.943537352322903e-06, "loss": 0.1967, "step": 21572, "teacher_loss": 0.1757173240184784 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.7092587947845459, "learning_rate": 5.941726899926375e-06, "loss": 0.2608, "step": 21573, "teacher_loss": 0.2109902799129486 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.4329985976219177, "learning_rate": 5.939916655208895e-06, "loss": 0.1621, "step": 21574, "teacher_loss": 0.1320222169160843 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.49796581268310547, "learning_rate": 5.9381066182119565e-06, "loss": 0.2142, "step": 21575, "teacher_loss": 0.1826179027557373 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.25970178842544556, "learning_rate": 5.936296788977065e-06, "loss": 0.2806, "step": 21576, "teacher_loss": 0.2828960418701172 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.47879883646965027, "learning_rate": 5.93448716754572e-06, "loss": 0.1613, "step": 21577, "teacher_loss": 0.1260617971420288 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.2591463327407837, "learning_rate": 5.932677753959405e-06, "loss": 0.2474, "step": 21578, "teacher_loss": 0.24609911441802979 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.17357288300991058, "learning_rate": 5.93086854825959e-06, "loss": 0.1894, "step": 21579, "teacher_loss": 0.19119200110435486 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.3128025531768799, "learning_rate": 5.929059550487782e-06, "loss": 0.1902, "step": 21580, "teacher_loss": 0.1765539050102234 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.3320736289024353, "learning_rate": 5.927250760685441e-06, "loss": 0.2052, "step": 21581, "teacher_loss": 0.19105927646160126 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.5902751088142395, "learning_rate": 5.9254421788940335e-06, "loss": 0.2577, "step": 21582, "teacher_loss": 0.22069881856441498 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.3179823160171509, "learning_rate": 5.923633805155032e-06, "loss": 0.1593, "step": 21583, "teacher_loss": 0.14171132445335388 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.471457302570343, "learning_rate": 5.921825639509898e-06, "loss": 0.2273, "step": 21584, "teacher_loss": 0.2001430094242096 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.09451904892921448, "learning_rate": 5.920017682000078e-06, "loss": 0.1473, "step": 21585, "teacher_loss": 0.15319198369979858 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.2655085325241089, "learning_rate": 5.918209932667031e-06, "loss": 0.1933, "step": 21586, "teacher_loss": 0.18524982035160065 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.238729327917099, "learning_rate": 5.916402391552207e-06, "loss": 0.1531, "step": 21587, "teacher_loss": 0.14356687664985657 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.2084427922964096, "learning_rate": 5.914595058697037e-06, "loss": 0.2039, "step": 21588, "teacher_loss": 0.20335537195205688 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.5805263519287109, "learning_rate": 5.912787934142962e-06, "loss": 0.237, "step": 21589, "teacher_loss": 0.1987752914428711 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.5270664095878601, "learning_rate": 5.910981017931422e-06, "loss": 0.2363, "step": 21590, "teacher_loss": 0.2040441632270813 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.10751629620790482, "learning_rate": 5.90917431010383e-06, "loss": 0.1752, "step": 21591, "teacher_loss": 0.18277347087860107 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.503272533416748, "learning_rate": 5.907367810701615e-06, "loss": 0.2367, "step": 21592, "teacher_loss": 0.20710471272468567 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.3892131447792053, "learning_rate": 5.9055615197662e-06, "loss": 0.2686, "step": 21593, "teacher_loss": 0.25521424412727356 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.4386591613292694, "learning_rate": 5.9037554373389954e-06, "loss": 0.1855, "step": 21594, "teacher_loss": 0.15737372636795044 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.8190631866455078, "learning_rate": 5.901949563461399e-06, "loss": 0.263, "step": 21595, "teacher_loss": 0.2012414187192917 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.19345691800117493, "learning_rate": 5.900143898174822e-06, "loss": 0.1821, "step": 21596, "teacher_loss": 0.18083441257476807 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.8136383295059204, "learning_rate": 5.8983384415206685e-06, "loss": 0.3277, "step": 21597, "teacher_loss": 0.27368173003196716 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.2521097958087921, "learning_rate": 5.89653319354032e-06, "loss": 0.2449, "step": 21598, "teacher_loss": 0.24408583343029022 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.6602687239646912, "learning_rate": 5.894728154275173e-06, "loss": 0.3157, "step": 21599, "teacher_loss": 0.2774094343185425 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.46900177001953125, "learning_rate": 5.892923323766615e-06, "loss": 0.263, "step": 21600, "teacher_loss": 0.2401224821805954 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.479117751121521, "learning_rate": 5.891118702056017e-06, "loss": 0.2792, "step": 21601, "teacher_loss": 0.25693657994270325 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.25928887724876404, "learning_rate": 5.889314289184755e-06, "loss": 0.2592, "step": 21602, "teacher_loss": 0.2591739296913147 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.6671919822692871, "learning_rate": 5.887510085194208e-06, "loss": 0.2252, "step": 21603, "teacher_loss": 0.1760404258966446 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.3644455671310425, "learning_rate": 5.885706090125728e-06, "loss": 0.2489, "step": 21604, "teacher_loss": 0.23601654171943665 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.6150169968605042, "learning_rate": 5.883902304020687e-06, "loss": 0.1691, "step": 21605, "teacher_loss": 0.11956362426280975 }, { "compression_loss": 0.0, "epoch": 3.9, "label_loss": 0.24721619486808777, "learning_rate": 5.882098726920429e-06, "loss": 0.2016, "step": 21606, "teacher_loss": 0.19655998051166534 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.4026600122451782, "learning_rate": 5.88029535886631e-06, "loss": 0.2141, "step": 21607, "teacher_loss": 0.1931876838207245 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.5333244204521179, "learning_rate": 5.8784921998996836e-06, "loss": 0.3039, "step": 21608, "teacher_loss": 0.27839308977127075 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.46213236451148987, "learning_rate": 5.8766892500618755e-06, "loss": 0.2272, "step": 21609, "teacher_loss": 0.20106427371501923 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.6251480579376221, "learning_rate": 5.8748865093942315e-06, "loss": 0.2236, "step": 21610, "teacher_loss": 0.17902909219264984 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.3579678535461426, "learning_rate": 5.873083977938084e-06, "loss": 0.2606, "step": 21611, "teacher_loss": 0.2498040497303009 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.711675763130188, "learning_rate": 5.8712816557347546e-06, "loss": 0.2288, "step": 21612, "teacher_loss": 0.1750963032245636 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.3176117241382599, "learning_rate": 5.869479542825566e-06, "loss": 0.2373, "step": 21613, "teacher_loss": 0.22834038734436035 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.35230937600135803, "learning_rate": 5.867677639251844e-06, "loss": 0.2067, "step": 21614, "teacher_loss": 0.19054833054542542 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.43324583768844604, "learning_rate": 5.865875945054891e-06, "loss": 0.1973, "step": 21615, "teacher_loss": 0.1710677295923233 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.345924437046051, "learning_rate": 5.864074460276012e-06, "loss": 0.2297, "step": 21616, "teacher_loss": 0.21675482392311096 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.2902829349040985, "learning_rate": 5.862273184956515e-06, "loss": 0.158, "step": 21617, "teacher_loss": 0.1433359682559967 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.3983793258666992, "learning_rate": 5.860472119137703e-06, "loss": 0.2064, "step": 21618, "teacher_loss": 0.18507874011993408 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.5508131384849548, "learning_rate": 5.85867126286086e-06, "loss": 0.2973, "step": 21619, "teacher_loss": 0.2691289782524109 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.4712649881839752, "learning_rate": 5.856870616167277e-06, "loss": 0.1931, "step": 21620, "teacher_loss": 0.16214478015899658 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.24591794610023499, "learning_rate": 5.855070179098243e-06, "loss": 0.2419, "step": 21621, "teacher_loss": 0.2414049208164215 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.3579743206501007, "learning_rate": 5.853269951695026e-06, "loss": 0.2639, "step": 21622, "teacher_loss": 0.2534680664539337 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.18500937521457672, "learning_rate": 5.851469933998907e-06, "loss": 0.1945, "step": 21623, "teacher_loss": 0.19550201296806335 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.21116414666175842, "learning_rate": 5.849670126051159e-06, "loss": 0.2002, "step": 21624, "teacher_loss": 0.19899138808250427 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.26453685760498047, "learning_rate": 5.8478705278930335e-06, "loss": 0.1918, "step": 21625, "teacher_loss": 0.18369004130363464 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.5787939429283142, "learning_rate": 5.846071139565799e-06, "loss": 0.2792, "step": 21626, "teacher_loss": 0.24590596556663513 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.19722458720207214, "learning_rate": 5.844271961110713e-06, "loss": 0.2417, "step": 21627, "teacher_loss": 0.2466726005077362 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.1357852816581726, "learning_rate": 5.842472992569021e-06, "loss": 0.1818, "step": 21628, "teacher_loss": 0.18696370720863342 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.5399938225746155, "learning_rate": 5.840674233981957e-06, "loss": 0.2214, "step": 21629, "teacher_loss": 0.1860440969467163 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.3106970191001892, "learning_rate": 5.838875685390783e-06, "loss": 0.1739, "step": 21630, "teacher_loss": 0.1586693525314331 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.4684392809867859, "learning_rate": 5.837077346836722e-06, "loss": 0.2417, "step": 21631, "teacher_loss": 0.21649707853794098 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.3990427851676941, "learning_rate": 5.835279218361001e-06, "loss": 0.1749, "step": 21632, "teacher_loss": 0.15000857412815094 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.2696496844291687, "learning_rate": 5.83348130000485e-06, "loss": 0.17, "step": 21633, "teacher_loss": 0.15894511342048645 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.7739237546920776, "learning_rate": 5.831683591809494e-06, "loss": 0.3032, "step": 21634, "teacher_loss": 0.25093701481819153 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.47465670108795166, "learning_rate": 5.829886093816143e-06, "loss": 0.2106, "step": 21635, "teacher_loss": 0.18124036490917206 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.11065588146448135, "learning_rate": 5.828088806066007e-06, "loss": 0.2025, "step": 21636, "teacher_loss": 0.21266357600688934 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 1.2231824398040771, "learning_rate": 5.826291728600303e-06, "loss": 0.2418, "step": 21637, "teacher_loss": 0.13277727365493774 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.912986159324646, "learning_rate": 5.824494861460226e-06, "loss": 0.3002, "step": 21638, "teacher_loss": 0.2321140319108963 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.5347219705581665, "learning_rate": 5.8226982046869615e-06, "loss": 0.1926, "step": 21639, "teacher_loss": 0.15454307198524475 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.2978341579437256, "learning_rate": 5.820901758321721e-06, "loss": 0.2099, "step": 21640, "teacher_loss": 0.20011213421821594 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.23521390557289124, "learning_rate": 5.819105522405684e-06, "loss": 0.2276, "step": 21641, "teacher_loss": 0.22670245170593262 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.3059546947479248, "learning_rate": 5.817309496980021e-06, "loss": 0.2283, "step": 21642, "teacher_loss": 0.219665989279747 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.27937451004981995, "learning_rate": 5.815513682085931e-06, "loss": 0.3245, "step": 21643, "teacher_loss": 0.32946592569351196 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.44344207644462585, "learning_rate": 5.813718077764576e-06, "loss": 0.2901, "step": 21644, "teacher_loss": 0.27307188510894775 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.9404531121253967, "learning_rate": 5.811922684057118e-06, "loss": 0.3215, "step": 21645, "teacher_loss": 0.252686083316803 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.36817511916160583, "learning_rate": 5.810127501004726e-06, "loss": 0.1936, "step": 21646, "teacher_loss": 0.17422160506248474 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.5702496767044067, "learning_rate": 5.8083325286485615e-06, "loss": 0.2656, "step": 21647, "teacher_loss": 0.2317364513874054 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.5612221956253052, "learning_rate": 5.80653776702977e-06, "loss": 0.2477, "step": 21648, "teacher_loss": 0.21284985542297363 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.327936589717865, "learning_rate": 5.804743216189504e-06, "loss": 0.1785, "step": 21649, "teacher_loss": 0.16184954345226288 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.572303295135498, "learning_rate": 5.802948876168912e-06, "loss": 0.2176, "step": 21650, "teacher_loss": 0.17815014719963074 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.2846210300922394, "learning_rate": 5.8011547470091224e-06, "loss": 0.3121, "step": 21651, "teacher_loss": 0.31512880325317383 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.20536591112613678, "learning_rate": 5.7993608287512756e-06, "loss": 0.22, "step": 21652, "teacher_loss": 0.2216547727584839 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.8416795134544373, "learning_rate": 5.797567121436505e-06, "loss": 0.365, "step": 21653, "teacher_loss": 0.31208163499832153 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.34806978702545166, "learning_rate": 5.7957736251059265e-06, "loss": 0.3385, "step": 21654, "teacher_loss": 0.3374047875404358 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.31881096959114075, "learning_rate": 5.793980339800666e-06, "loss": 0.2143, "step": 21655, "teacher_loss": 0.20268628001213074 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.16841164231300354, "learning_rate": 5.792187265561831e-06, "loss": 0.1811, "step": 21656, "teacher_loss": 0.18250912427902222 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.17222747206687927, "learning_rate": 5.79039440243054e-06, "loss": 0.1196, "step": 21657, "teacher_loss": 0.11379311978816986 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.29466214776039124, "learning_rate": 5.7886017504478896e-06, "loss": 0.2291, "step": 21658, "teacher_loss": 0.2217944860458374 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.4614105224609375, "learning_rate": 5.786809309654983e-06, "loss": 0.247, "step": 21659, "teacher_loss": 0.22321642935276031 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.4673064053058624, "learning_rate": 5.785017080092921e-06, "loss": 0.2022, "step": 21660, "teacher_loss": 0.17278021574020386 }, { "compression_loss": 0.0, "epoch": 3.91, "label_loss": 0.6787277460098267, "learning_rate": 5.783225061802786e-06, "loss": 0.2876, "step": 21661, "teacher_loss": 0.24414291977882385 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.44859573245048523, "learning_rate": 5.781433254825666e-06, "loss": 0.2384, "step": 21662, "teacher_loss": 0.2150963395833969 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.3178305923938751, "learning_rate": 5.779641659202648e-06, "loss": 0.1844, "step": 21663, "teacher_loss": 0.1695285439491272 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.48061874508857727, "learning_rate": 5.777850274974797e-06, "loss": 0.2806, "step": 21664, "teacher_loss": 0.25834590196609497 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.4110620617866516, "learning_rate": 5.776059102183194e-06, "loss": 0.2221, "step": 21665, "teacher_loss": 0.20107778906822205 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.18785390257835388, "learning_rate": 5.774268140868897e-06, "loss": 0.2042, "step": 21666, "teacher_loss": 0.20604942739009857 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.5896732807159424, "learning_rate": 5.772477391072972e-06, "loss": 0.2453, "step": 21667, "teacher_loss": 0.20706912875175476 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.35474371910095215, "learning_rate": 5.77068685283648e-06, "loss": 0.2741, "step": 21668, "teacher_loss": 0.26514932513237 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.8012501001358032, "learning_rate": 5.7688965262004625e-06, "loss": 0.3274, "step": 21669, "teacher_loss": 0.2747178077697754 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.45436084270477295, "learning_rate": 5.7671064112059695e-06, "loss": 0.2584, "step": 21670, "teacher_loss": 0.23663340508937836 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.49641603231430054, "learning_rate": 5.765316507894052e-06, "loss": 0.2174, "step": 21671, "teacher_loss": 0.18639829754829407 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.6210223436355591, "learning_rate": 5.763526816305734e-06, "loss": 0.2618, "step": 21672, "teacher_loss": 0.2218715101480484 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.44714102149009705, "learning_rate": 5.761737336482054e-06, "loss": 0.3006, "step": 21673, "teacher_loss": 0.28429192304611206 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.2280219942331314, "learning_rate": 5.759948068464044e-06, "loss": 0.1613, "step": 21674, "teacher_loss": 0.15390491485595703 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.23498138785362244, "learning_rate": 5.758159012292717e-06, "loss": 0.1762, "step": 21675, "teacher_loss": 0.16965194046497345 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.33354857563972473, "learning_rate": 5.7563701680090995e-06, "loss": 0.1639, "step": 21676, "teacher_loss": 0.14507059752941132 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.3858204185962677, "learning_rate": 5.754581535654196e-06, "loss": 0.1567, "step": 21677, "teacher_loss": 0.1312216967344284 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.7607284188270569, "learning_rate": 5.752793115269023e-06, "loss": 0.2977, "step": 21678, "teacher_loss": 0.2463020235300064 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.4339180588722229, "learning_rate": 5.7510049068945755e-06, "loss": 0.2399, "step": 21679, "teacher_loss": 0.21835613250732422 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.24386480450630188, "learning_rate": 5.749216910571854e-06, "loss": 0.2023, "step": 21680, "teacher_loss": 0.1976274847984314 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.5869287252426147, "learning_rate": 5.747429126341859e-06, "loss": 0.293, "step": 21681, "teacher_loss": 0.26029354333877563 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.388175904750824, "learning_rate": 5.745641554245569e-06, "loss": 0.2242, "step": 21682, "teacher_loss": 0.20600861310958862 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.27570071816444397, "learning_rate": 5.743854194323972e-06, "loss": 0.2267, "step": 21683, "teacher_loss": 0.2212313711643219 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.09506860375404358, "learning_rate": 5.742067046618052e-06, "loss": 0.1422, "step": 21684, "teacher_loss": 0.14746659994125366 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.8266760110855103, "learning_rate": 5.7402801111687725e-06, "loss": 0.2364, "step": 21685, "teacher_loss": 0.17083081603050232 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.17504079639911652, "learning_rate": 5.738493388017108e-06, "loss": 0.169, "step": 21686, "teacher_loss": 0.16837219893932343 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.19514837861061096, "learning_rate": 5.736706877204029e-06, "loss": 0.1574, "step": 21687, "teacher_loss": 0.15325510501861572 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.2866716682910919, "learning_rate": 5.734920578770486e-06, "loss": 0.2433, "step": 21688, "teacher_loss": 0.23853181302547455 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.7264032959938049, "learning_rate": 5.7331344927574274e-06, "loss": 0.3449, "step": 21689, "teacher_loss": 0.3025277256965637 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.6957714557647705, "learning_rate": 5.731348619205822e-06, "loss": 0.2894, "step": 21690, "teacher_loss": 0.2442837655544281 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.3914807140827179, "learning_rate": 5.7295629581566035e-06, "loss": 0.2373, "step": 21691, "teacher_loss": 0.22014504671096802 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.38368386030197144, "learning_rate": 5.727777509650701e-06, "loss": 0.1622, "step": 21692, "teacher_loss": 0.1375441551208496 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.3213942050933838, "learning_rate": 5.725992273729073e-06, "loss": 0.2255, "step": 21693, "teacher_loss": 0.21482135355472565 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.3944419026374817, "learning_rate": 5.724207250432635e-06, "loss": 0.2244, "step": 21694, "teacher_loss": 0.20546674728393555 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.4395279288291931, "learning_rate": 5.72242243980231e-06, "loss": 0.1794, "step": 21695, "teacher_loss": 0.15044797956943512 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.5526009798049927, "learning_rate": 5.720637841879024e-06, "loss": 0.2504, "step": 21696, "teacher_loss": 0.21684645116329193 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.3428601324558258, "learning_rate": 5.718853456703696e-06, "loss": 0.2316, "step": 21697, "teacher_loss": 0.21926641464233398 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.45548972487449646, "learning_rate": 5.717069284317225e-06, "loss": 0.1613, "step": 21698, "teacher_loss": 0.12860971689224243 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.8189011812210083, "learning_rate": 5.715285324760525e-06, "loss": 0.213, "step": 21699, "teacher_loss": 0.14563071727752686 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.3457135558128357, "learning_rate": 5.7135015780745e-06, "loss": 0.2774, "step": 21700, "teacher_loss": 0.269858717918396 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.12041258811950684, "learning_rate": 5.7117180443000425e-06, "loss": 0.1715, "step": 21701, "teacher_loss": 0.17721140384674072 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.18133969604969025, "learning_rate": 5.7099347234780315e-06, "loss": 0.1473, "step": 21702, "teacher_loss": 0.1435309648513794 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.3937842845916748, "learning_rate": 5.708151615649375e-06, "loss": 0.1895, "step": 21703, "teacher_loss": 0.16677403450012207 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.4104272723197937, "learning_rate": 5.706368720854943e-06, "loss": 0.2521, "step": 21704, "teacher_loss": 0.23454910516738892 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.46406644582748413, "learning_rate": 5.704586039135608e-06, "loss": 0.216, "step": 21705, "teacher_loss": 0.18847134709358215 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.4580375552177429, "learning_rate": 5.702803570532245e-06, "loss": 0.208, "step": 21706, "teacher_loss": 0.1801944077014923 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.2542845606803894, "learning_rate": 5.701021315085728e-06, "loss": 0.1832, "step": 21707, "teacher_loss": 0.17528820037841797 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.3979668617248535, "learning_rate": 5.699239272836907e-06, "loss": 0.2292, "step": 21708, "teacher_loss": 0.21046772599220276 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.30313894152641296, "learning_rate": 5.697457443826644e-06, "loss": 0.1717, "step": 21709, "teacher_loss": 0.15709473192691803 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.38411879539489746, "learning_rate": 5.695675828095797e-06, "loss": 0.2681, "step": 21710, "teacher_loss": 0.25517305731773376 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.24414892494678497, "learning_rate": 5.6938944256852025e-06, "loss": 0.1488, "step": 21711, "teacher_loss": 0.1382075846195221 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.6691270470619202, "learning_rate": 5.692113236635706e-06, "loss": 0.2393, "step": 21712, "teacher_loss": 0.19156885147094727 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.42486846446990967, "learning_rate": 5.690332260988152e-06, "loss": 0.17, "step": 21713, "teacher_loss": 0.1416792869567871 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.21350818872451782, "learning_rate": 5.688551498783364e-06, "loss": 0.2314, "step": 21714, "teacher_loss": 0.23338176310062408 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.6247075200080872, "learning_rate": 5.686770950062177e-06, "loss": 0.2873, "step": 21715, "teacher_loss": 0.2498628944158554 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.22747118771076202, "learning_rate": 5.684990614865406e-06, "loss": 0.1412, "step": 21716, "teacher_loss": 0.13165727257728577 }, { "compression_loss": 0.0, "epoch": 3.92, "label_loss": 0.6841297149658203, "learning_rate": 5.68321049323387e-06, "loss": 0.2239, "step": 21717, "teacher_loss": 0.17275935411453247 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.22954882681369781, "learning_rate": 5.681430585208391e-06, "loss": 0.1863, "step": 21718, "teacher_loss": 0.18147417902946472 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.3274756371974945, "learning_rate": 5.679650890829763e-06, "loss": 0.2014, "step": 21719, "teacher_loss": 0.18739989399909973 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.33712196350097656, "learning_rate": 5.677871410138804e-06, "loss": 0.1734, "step": 21720, "teacher_loss": 0.1551661193370819 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.6797663569450378, "learning_rate": 5.676092143176297e-06, "loss": 0.219, "step": 21721, "teacher_loss": 0.16781717538833618 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.34490782022476196, "learning_rate": 5.674313089983044e-06, "loss": 0.1934, "step": 21722, "teacher_loss": 0.17651715874671936 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.6178730130195618, "learning_rate": 5.672534250599836e-06, "loss": 0.2021, "step": 21723, "teacher_loss": 0.15591385960578918 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.6356312036514282, "learning_rate": 5.670755625067449e-06, "loss": 0.2063, "step": 21724, "teacher_loss": 0.15861597657203674 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.2869800925254822, "learning_rate": 5.668977213426664e-06, "loss": 0.2186, "step": 21725, "teacher_loss": 0.21102482080459595 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 1.0469518899917603, "learning_rate": 5.667199015718261e-06, "loss": 0.3694, "step": 21726, "teacher_loss": 0.29410645365715027 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.8406246900558472, "learning_rate": 5.665421031982997e-06, "loss": 0.2424, "step": 21727, "teacher_loss": 0.1759752780199051 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.4865397810935974, "learning_rate": 5.663643262261648e-06, "loss": 0.1862, "step": 21728, "teacher_loss": 0.1527741253376007 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.46112725138664246, "learning_rate": 5.661865706594963e-06, "loss": 0.2133, "step": 21729, "teacher_loss": 0.185746431350708 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.3609007000923157, "learning_rate": 5.660088365023699e-06, "loss": 0.2796, "step": 21730, "teacher_loss": 0.2705879211425781 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.5885866284370422, "learning_rate": 5.658311237588613e-06, "loss": 0.3239, "step": 21731, "teacher_loss": 0.29449528455734253 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.17170582711696625, "learning_rate": 5.656534324330436e-06, "loss": 0.1719, "step": 21732, "teacher_loss": 0.1719554364681244 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.643259584903717, "learning_rate": 5.654757625289913e-06, "loss": 0.324, "step": 21733, "teacher_loss": 0.28848567605018616 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.7844679355621338, "learning_rate": 5.652981140507786e-06, "loss": 0.2941, "step": 21734, "teacher_loss": 0.23958855867385864 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.24841442704200745, "learning_rate": 5.651204870024772e-06, "loss": 0.1883, "step": 21735, "teacher_loss": 0.18161404132843018 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.3275895118713379, "learning_rate": 5.649428813881601e-06, "loss": 0.213, "step": 21736, "teacher_loss": 0.20023277401924133 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.26092907786369324, "learning_rate": 5.647652972118998e-06, "loss": 0.1217, "step": 21737, "teacher_loss": 0.10623326152563095 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.2945171296596527, "learning_rate": 5.645877344777672e-06, "loss": 0.1851, "step": 21738, "teacher_loss": 0.1729859709739685 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.21960745751857758, "learning_rate": 5.64410193189833e-06, "loss": 0.2344, "step": 21739, "teacher_loss": 0.2360624074935913 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.5657855272293091, "learning_rate": 5.642326733521678e-06, "loss": 0.2475, "step": 21740, "teacher_loss": 0.21210221946239471 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.7064233422279358, "learning_rate": 5.640551749688424e-06, "loss": 0.3263, "step": 21741, "teacher_loss": 0.2841082811355591 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.31813669204711914, "learning_rate": 5.638776980439253e-06, "loss": 0.1862, "step": 21742, "teacher_loss": 0.17152544856071472 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.24169036746025085, "learning_rate": 5.6370024258148595e-06, "loss": 0.1888, "step": 21743, "teacher_loss": 0.18289324641227722 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.25272631645202637, "learning_rate": 5.635228085855934e-06, "loss": 0.1771, "step": 21744, "teacher_loss": 0.1687333583831787 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.2720526456832886, "learning_rate": 5.633453960603146e-06, "loss": 0.1485, "step": 21745, "teacher_loss": 0.13482016324996948 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.4527941942214966, "learning_rate": 5.631680050097176e-06, "loss": 0.2561, "step": 21746, "teacher_loss": 0.23424610495567322 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.11322474479675293, "learning_rate": 5.629906354378699e-06, "loss": 0.2063, "step": 21747, "teacher_loss": 0.2166532576084137 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.2210656702518463, "learning_rate": 5.628132873488372e-06, "loss": 0.2498, "step": 21748, "teacher_loss": 0.25294333696365356 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.6846473813056946, "learning_rate": 5.626359607466861e-06, "loss": 0.2688, "step": 21749, "teacher_loss": 0.22262606024742126 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.3647555410861969, "learning_rate": 5.624586556354824e-06, "loss": 0.2215, "step": 21750, "teacher_loss": 0.20553144812583923 }, { "epoch": 3.93, "eval_exact_match": 80.50141911069063, "eval_f1": 87.82737745441388, "step": 21750 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.6421164274215698, "learning_rate": 5.622813720192909e-06, "loss": 0.2168, "step": 21751, "teacher_loss": 0.16956683993339539 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.3941727578639984, "learning_rate": 5.621041099021751e-06, "loss": 0.2662, "step": 21752, "teacher_loss": 0.2520271837711334 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.406181275844574, "learning_rate": 5.619268692882011e-06, "loss": 0.1993, "step": 21753, "teacher_loss": 0.17625921964645386 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.9459228515625, "learning_rate": 5.6174965018143145e-06, "loss": 0.3764, "step": 21754, "teacher_loss": 0.3130866289138794 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.43356359004974365, "learning_rate": 5.61572452585929e-06, "loss": 0.3113, "step": 21755, "teacher_loss": 0.29775160551071167 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.6293652057647705, "learning_rate": 5.613952765057564e-06, "loss": 0.3279, "step": 21756, "teacher_loss": 0.29445815086364746 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.42724162340164185, "learning_rate": 5.612181219449766e-06, "loss": 0.2644, "step": 21757, "teacher_loss": 0.2462974339723587 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.23358628153800964, "learning_rate": 5.610409889076502e-06, "loss": 0.1699, "step": 21758, "teacher_loss": 0.16280022263526917 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.26864466071128845, "learning_rate": 5.608638773978389e-06, "loss": 0.1288, "step": 21759, "teacher_loss": 0.11324891448020935 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.5911637544631958, "learning_rate": 5.6068678741960345e-06, "loss": 0.2722, "step": 21760, "teacher_loss": 0.23670923709869385 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.30494260787963867, "learning_rate": 5.605097189770039e-06, "loss": 0.1862, "step": 21761, "teacher_loss": 0.17301592230796814 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.4083373248577118, "learning_rate": 5.6033267207409875e-06, "loss": 0.2313, "step": 21762, "teacher_loss": 0.2116081714630127 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.3688318729400635, "learning_rate": 5.6015564671494924e-06, "loss": 0.2139, "step": 21763, "teacher_loss": 0.19663822650909424 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.5753298997879028, "learning_rate": 5.599786429036129e-06, "loss": 0.3095, "step": 21764, "teacher_loss": 0.27996665239334106 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.2599407434463501, "learning_rate": 5.598016606441468e-06, "loss": 0.2092, "step": 21765, "teacher_loss": 0.20357708632946014 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.5110395550727844, "learning_rate": 5.59624699940611e-06, "loss": 0.2345, "step": 21766, "teacher_loss": 0.20372425019741058 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.7064459919929504, "learning_rate": 5.594477607970613e-06, "loss": 0.3742, "step": 21767, "teacher_loss": 0.3373359739780426 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.3331388831138611, "learning_rate": 5.59270843217554e-06, "loss": 0.1494, "step": 21768, "teacher_loss": 0.1290060132741928 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.1701088696718216, "learning_rate": 5.590939472061459e-06, "loss": 0.1274, "step": 21769, "teacher_loss": 0.12268504500389099 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.4092593193054199, "learning_rate": 5.58917072766893e-06, "loss": 0.1735, "step": 21770, "teacher_loss": 0.1472693681716919 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.34680867195129395, "learning_rate": 5.587402199038497e-06, "loss": 0.2562, "step": 21771, "teacher_loss": 0.24616605043411255 }, { "compression_loss": 0.0, "epoch": 3.93, "label_loss": 0.6532238721847534, "learning_rate": 5.585633886210711e-06, "loss": 0.2809, "step": 21772, "teacher_loss": 0.23955005407333374 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.3128882646560669, "learning_rate": 5.583865789226118e-06, "loss": 0.2019, "step": 21773, "teacher_loss": 0.18954743444919586 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.41160523891448975, "learning_rate": 5.582097908125249e-06, "loss": 0.2393, "step": 21774, "teacher_loss": 0.22016194462776184 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.687391996383667, "learning_rate": 5.580330242948636e-06, "loss": 0.2868, "step": 21775, "teacher_loss": 0.24227085709571838 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.16445928812026978, "learning_rate": 5.578562793736816e-06, "loss": 0.116, "step": 21776, "teacher_loss": 0.1106310710310936 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.2942621409893036, "learning_rate": 5.576795560530297e-06, "loss": 0.2749, "step": 21777, "teacher_loss": 0.2727093994617462 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.29929181933403015, "learning_rate": 5.57502854336961e-06, "loss": 0.1435, "step": 21778, "teacher_loss": 0.1261957436800003 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.6811418533325195, "learning_rate": 5.573261742295257e-06, "loss": 0.2265, "step": 21779, "teacher_loss": 0.17594948410987854 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.29011276364326477, "learning_rate": 5.571495157347749e-06, "loss": 0.223, "step": 21780, "teacher_loss": 0.215493306517601 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.3197956085205078, "learning_rate": 5.569728788567593e-06, "loss": 0.2945, "step": 21781, "teacher_loss": 0.2916387617588043 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.4756806194782257, "learning_rate": 5.567962635995276e-06, "loss": 0.2883, "step": 21782, "teacher_loss": 0.2674769461154938 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.39954444766044617, "learning_rate": 5.566196699671304e-06, "loss": 0.215, "step": 21783, "teacher_loss": 0.19446727633476257 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 1.0768918991088867, "learning_rate": 5.5644309796361525e-06, "loss": 0.2594, "step": 21784, "teacher_loss": 0.1685260683298111 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.38015520572662354, "learning_rate": 5.5626654759303085e-06, "loss": 0.3141, "step": 21785, "teacher_loss": 0.30670660734176636 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.23343563079833984, "learning_rate": 5.560900188594257e-06, "loss": 0.1825, "step": 21786, "teacher_loss": 0.1767873615026474 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.27691322565078735, "learning_rate": 5.559135117668458e-06, "loss": 0.1749, "step": 21787, "teacher_loss": 0.1635380983352661 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.36531662940979004, "learning_rate": 5.557370263193392e-06, "loss": 0.2043, "step": 21788, "teacher_loss": 0.18639256060123444 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.45378464460372925, "learning_rate": 5.555605625209509e-06, "loss": 0.2425, "step": 21789, "teacher_loss": 0.21904532611370087 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.3090687394142151, "learning_rate": 5.553841203757273e-06, "loss": 0.1944, "step": 21790, "teacher_loss": 0.18169020116329193 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.2517775893211365, "learning_rate": 5.5520769988771445e-06, "loss": 0.1977, "step": 21791, "teacher_loss": 0.19167311489582062 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.283770889043808, "learning_rate": 5.550313010609557e-06, "loss": 0.2648, "step": 21792, "teacher_loss": 0.2626928389072418 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.30221399664878845, "learning_rate": 5.54854923899496e-06, "loss": 0.2385, "step": 21793, "teacher_loss": 0.23138776421546936 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.6881433725357056, "learning_rate": 5.546785684073798e-06, "loss": 0.2157, "step": 21794, "teacher_loss": 0.1632264256477356 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.2312692105770111, "learning_rate": 5.545022345886493e-06, "loss": 0.3013, "step": 21795, "teacher_loss": 0.30905210971832275 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.3266114294528961, "learning_rate": 5.543259224473477e-06, "loss": 0.2714, "step": 21796, "teacher_loss": 0.26523900032043457 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.39791879057884216, "learning_rate": 5.541496319875181e-06, "loss": 0.2085, "step": 21797, "teacher_loss": 0.18747058510780334 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.33680111169815063, "learning_rate": 5.539733632132012e-06, "loss": 0.2262, "step": 21798, "teacher_loss": 0.21385599672794342 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.22774305939674377, "learning_rate": 5.5379711612843854e-06, "loss": 0.1904, "step": 21799, "teacher_loss": 0.18624094128608704 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.29141971468925476, "learning_rate": 5.536208907372717e-06, "loss": 0.1722, "step": 21800, "teacher_loss": 0.1589074581861496 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.3927082419395447, "learning_rate": 5.534446870437404e-06, "loss": 0.2846, "step": 21801, "teacher_loss": 0.2725604772567749 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.099654421210289, "learning_rate": 5.532685050518842e-06, "loss": 0.1741, "step": 21802, "teacher_loss": 0.1823582649230957 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.3140486180782318, "learning_rate": 5.530923447657425e-06, "loss": 0.2063, "step": 21803, "teacher_loss": 0.1943490207195282 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.537128746509552, "learning_rate": 5.529162061893551e-06, "loss": 0.2741, "step": 21804, "teacher_loss": 0.2448616623878479 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.4055987298488617, "learning_rate": 5.527400893267588e-06, "loss": 0.2032, "step": 21805, "teacher_loss": 0.18075653910636902 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.7366605997085571, "learning_rate": 5.525639941819924e-06, "loss": 0.287, "step": 21806, "teacher_loss": 0.23702384531497955 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.4150068163871765, "learning_rate": 5.5238792075909345e-06, "loss": 0.3218, "step": 21807, "teacher_loss": 0.31139639019966125 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.2262985110282898, "learning_rate": 5.52211869062098e-06, "loss": 0.2492, "step": 21808, "teacher_loss": 0.25169500708580017 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.6309796571731567, "learning_rate": 5.520358390950427e-06, "loss": 0.2852, "step": 21809, "teacher_loss": 0.24677090346813202 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.2678382992744446, "learning_rate": 5.51859830861964e-06, "loss": 0.1759, "step": 21810, "teacher_loss": 0.1657118797302246 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.33942747116088867, "learning_rate": 5.516838443668967e-06, "loss": 0.1828, "step": 21811, "teacher_loss": 0.16539278626441956 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.44235509634017944, "learning_rate": 5.515078796138746e-06, "loss": 0.2174, "step": 21812, "teacher_loss": 0.19235381484031677 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.5736873149871826, "learning_rate": 5.513319366069343e-06, "loss": 0.2513, "step": 21813, "teacher_loss": 0.21543771028518677 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.8301466703414917, "learning_rate": 5.511560153501083e-06, "loss": 0.3284, "step": 21814, "teacher_loss": 0.27264589071273804 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.5817592144012451, "learning_rate": 5.50980115847429e-06, "loss": 0.2379, "step": 21815, "teacher_loss": 0.19968563318252563 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.3100306987762451, "learning_rate": 5.508042381029314e-06, "loss": 0.2265, "step": 21816, "teacher_loss": 0.21723297238349915 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.4455883204936981, "learning_rate": 5.506283821206468e-06, "loss": 0.2349, "step": 21817, "teacher_loss": 0.21149076521396637 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.23605969548225403, "learning_rate": 5.504525479046064e-06, "loss": 0.2005, "step": 21818, "teacher_loss": 0.19651061296463013 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.6319211721420288, "learning_rate": 5.502767354588425e-06, "loss": 0.5765, "step": 21819, "teacher_loss": 0.570328950881958 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.7493718862533569, "learning_rate": 5.501009447873859e-06, "loss": 0.4379, "step": 21820, "teacher_loss": 0.40327948331832886 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.4499378800392151, "learning_rate": 5.499251758942662e-06, "loss": 0.3207, "step": 21821, "teacher_loss": 0.3062889575958252 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.7097110748291016, "learning_rate": 5.497494287835138e-06, "loss": 0.2722, "step": 21822, "teacher_loss": 0.2235608994960785 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.33150917291641235, "learning_rate": 5.495737034591584e-06, "loss": 0.1943, "step": 21823, "teacher_loss": 0.1790352314710617 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.35428741574287415, "learning_rate": 5.4939799992522864e-06, "loss": 0.1636, "step": 21824, "teacher_loss": 0.1423853486776352 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.5675662755966187, "learning_rate": 5.492223181857515e-06, "loss": 0.2165, "step": 21825, "teacher_loss": 0.1774882972240448 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.591162383556366, "learning_rate": 5.490466582447572e-06, "loss": 0.2788, "step": 21826, "teacher_loss": 0.2440471202135086 }, { "compression_loss": 0.0, "epoch": 3.94, "label_loss": 0.38896623253822327, "learning_rate": 5.488710201062717e-06, "loss": 0.2564, "step": 21827, "teacher_loss": 0.24171802401542664 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.2594240605831146, "learning_rate": 5.486954037743218e-06, "loss": 0.1649, "step": 21828, "teacher_loss": 0.15434449911117554 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.5492939949035645, "learning_rate": 5.48519809252934e-06, "loss": 0.254, "step": 21829, "teacher_loss": 0.2211982011795044 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.40110912919044495, "learning_rate": 5.483442365461349e-06, "loss": 0.2413, "step": 21830, "teacher_loss": 0.22355502843856812 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.8902040123939514, "learning_rate": 5.481686856579487e-06, "loss": 0.2806, "step": 21831, "teacher_loss": 0.2128480076789856 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.5316786766052246, "learning_rate": 5.479931565924007e-06, "loss": 0.314, "step": 21832, "teacher_loss": 0.2898046374320984 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.056444279849529266, "learning_rate": 5.478176493535159e-06, "loss": 0.138, "step": 21833, "teacher_loss": 0.14707374572753906 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.2887080907821655, "learning_rate": 5.476421639453172e-06, "loss": 0.1842, "step": 21834, "teacher_loss": 0.1726272702217102 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.47937312722206116, "learning_rate": 5.474667003718282e-06, "loss": 0.2876, "step": 21835, "teacher_loss": 0.2663082480430603 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.50431227684021, "learning_rate": 5.472912586370725e-06, "loss": 0.2268, "step": 21836, "teacher_loss": 0.19596019387245178 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.3487039804458618, "learning_rate": 5.471158387450716e-06, "loss": 0.175, "step": 21837, "teacher_loss": 0.15565647184848785 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.49248382449150085, "learning_rate": 5.469404406998478e-06, "loss": 0.3081, "step": 21838, "teacher_loss": 0.2875773310661316 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.473918616771698, "learning_rate": 5.467650645054218e-06, "loss": 0.368, "step": 21839, "teacher_loss": 0.35623180866241455 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.23867295682430267, "learning_rate": 5.4658971016581505e-06, "loss": 0.1834, "step": 21840, "teacher_loss": 0.17722952365875244 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.8058426976203918, "learning_rate": 5.464143776850483e-06, "loss": 0.4407, "step": 21841, "teacher_loss": 0.40007829666137695 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.35105955600738525, "learning_rate": 5.462390670671403e-06, "loss": 0.198, "step": 21842, "teacher_loss": 0.1809605211019516 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.3181931674480438, "learning_rate": 5.4606377831611135e-06, "loss": 0.2697, "step": 21843, "teacher_loss": 0.26432764530181885 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.30017197132110596, "learning_rate": 5.458885114359795e-06, "loss": 0.2011, "step": 21844, "teacher_loss": 0.19004732370376587 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.44406893849372864, "learning_rate": 5.457132664307636e-06, "loss": 0.2961, "step": 21845, "teacher_loss": 0.27965036034584045 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.5293300747871399, "learning_rate": 5.455380433044819e-06, "loss": 0.3116, "step": 21846, "teacher_loss": 0.28742873668670654 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.2917672395706177, "learning_rate": 5.453628420611507e-06, "loss": 0.1952, "step": 21847, "teacher_loss": 0.18445372581481934 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.45203158259391785, "learning_rate": 5.451876627047873e-06, "loss": 0.2534, "step": 21848, "teacher_loss": 0.23138515651226044 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.3581094741821289, "learning_rate": 5.4501250523940875e-06, "loss": 0.2194, "step": 21849, "teacher_loss": 0.20397770404815674 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.2336127758026123, "learning_rate": 5.448373696690297e-06, "loss": 0.1412, "step": 21850, "teacher_loss": 0.13092857599258423 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 1.235878825187683, "learning_rate": 5.446622559976665e-06, "loss": 0.3322, "step": 21851, "teacher_loss": 0.23183083534240723 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.4069324731826782, "learning_rate": 5.44487164229333e-06, "loss": 0.1788, "step": 21852, "teacher_loss": 0.15341225266456604 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.38151299953460693, "learning_rate": 5.443120943680441e-06, "loss": 0.1677, "step": 21853, "teacher_loss": 0.14396381378173828 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.8180519342422485, "learning_rate": 5.441370464178142e-06, "loss": 0.2228, "step": 21854, "teacher_loss": 0.15660548210144043 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.774874210357666, "learning_rate": 5.439620203826553e-06, "loss": 0.3775, "step": 21855, "teacher_loss": 0.3333788812160492 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.6710628867149353, "learning_rate": 5.43787016266581e-06, "loss": 0.2115, "step": 21856, "teacher_loss": 0.16038648784160614 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.40120095014572144, "learning_rate": 5.43612034073604e-06, "loss": 0.185, "step": 21857, "teacher_loss": 0.1610175222158432 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.7244506478309631, "learning_rate": 5.434370738077352e-06, "loss": 0.3165, "step": 21858, "teacher_loss": 0.2711865305900574 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.6187089085578918, "learning_rate": 5.432621354729863e-06, "loss": 0.2043, "step": 21859, "teacher_loss": 0.15826475620269775 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.2565605044364929, "learning_rate": 5.430872190733688e-06, "loss": 0.2408, "step": 21860, "teacher_loss": 0.23909947276115417 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.2647855877876282, "learning_rate": 5.4291232461289244e-06, "loss": 0.1918, "step": 21861, "teacher_loss": 0.1836376041173935 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.24888500571250916, "learning_rate": 5.427374520955659e-06, "loss": 0.1442, "step": 21862, "teacher_loss": 0.13257496058940887 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.5142412781715393, "learning_rate": 5.425626015254004e-06, "loss": 0.2939, "step": 21863, "teacher_loss": 0.26938581466674805 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.32193291187286377, "learning_rate": 5.4238777290640415e-06, "loss": 0.2117, "step": 21864, "teacher_loss": 0.19943425059318542 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.47531965374946594, "learning_rate": 5.422129662425845e-06, "loss": 0.2375, "step": 21865, "teacher_loss": 0.2110653817653656 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 1.0679867267608643, "learning_rate": 5.420381815379501e-06, "loss": 0.4126, "step": 21866, "teacher_loss": 0.33975234627723694 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.22654172778129578, "learning_rate": 5.4186341879650845e-06, "loss": 0.154, "step": 21867, "teacher_loss": 0.14592206478118896 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.34396350383758545, "learning_rate": 5.416886780222657e-06, "loss": 0.3483, "step": 21868, "teacher_loss": 0.3487998843193054 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.48915645480155945, "learning_rate": 5.41513959219228e-06, "loss": 0.232, "step": 21869, "teacher_loss": 0.20348191261291504 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.409118115901947, "learning_rate": 5.413392623914023e-06, "loss": 0.1913, "step": 21870, "teacher_loss": 0.1671338975429535 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.7449277639389038, "learning_rate": 5.411645875427926e-06, "loss": 0.5844, "step": 21871, "teacher_loss": 0.5665807723999023 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.35170722007751465, "learning_rate": 5.409899346774041e-06, "loss": 0.1855, "step": 21872, "teacher_loss": 0.16700395941734314 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.41726088523864746, "learning_rate": 5.408153037992416e-06, "loss": 0.2622, "step": 21873, "teacher_loss": 0.24496275186538696 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.1638878881931305, "learning_rate": 5.406406949123086e-06, "loss": 0.1534, "step": 21874, "teacher_loss": 0.15228670835494995 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.9312049746513367, "learning_rate": 5.404661080206068e-06, "loss": 0.3758, "step": 21875, "teacher_loss": 0.3140539526939392 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.7727344632148743, "learning_rate": 5.402915431281417e-06, "loss": 0.2677, "step": 21876, "teacher_loss": 0.21159148216247559 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.17131558060646057, "learning_rate": 5.401170002389139e-06, "loss": 0.1889, "step": 21877, "teacher_loss": 0.19080935418605804 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.3958020508289337, "learning_rate": 5.39942479356925e-06, "loss": 0.1737, "step": 21878, "teacher_loss": 0.14901262521743774 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.5957311987876892, "learning_rate": 5.397679804861766e-06, "loss": 0.2385, "step": 21879, "teacher_loss": 0.1987738013267517 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.40834569931030273, "learning_rate": 5.3959350363067e-06, "loss": 0.197, "step": 21880, "teacher_loss": 0.17347633838653564 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.5880615711212158, "learning_rate": 5.394190487944044e-06, "loss": 0.271, "step": 21881, "teacher_loss": 0.23574616014957428 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.7812591195106506, "learning_rate": 5.392446159813802e-06, "loss": 0.3244, "step": 21882, "teacher_loss": 0.27360600233078003 }, { "compression_loss": 0.0, "epoch": 3.95, "label_loss": 0.48583924770355225, "learning_rate": 5.390702051955967e-06, "loss": 0.1665, "step": 21883, "teacher_loss": 0.13096457719802856 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.17131707072257996, "learning_rate": 5.38895816441052e-06, "loss": 0.1386, "step": 21884, "teacher_loss": 0.13492825627326965 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.5767116546630859, "learning_rate": 5.387214497217447e-06, "loss": 0.2577, "step": 21885, "teacher_loss": 0.22230495512485504 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 1.3411751985549927, "learning_rate": 5.385471050416731e-06, "loss": 0.3623, "step": 21886, "teacher_loss": 0.25353020429611206 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.36906176805496216, "learning_rate": 5.383727824048337e-06, "loss": 0.3784, "step": 21887, "teacher_loss": 0.3794023394584656 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.49249595403671265, "learning_rate": 5.381984818152223e-06, "loss": 0.2447, "step": 21888, "teacher_loss": 0.21719059348106384 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.10936786234378815, "learning_rate": 5.38024203276837e-06, "loss": 0.1409, "step": 21889, "teacher_loss": 0.14434897899627686 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.11709539592266083, "learning_rate": 5.378499467936725e-06, "loss": 0.1705, "step": 21890, "teacher_loss": 0.176442489027977 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.5083616375923157, "learning_rate": 5.376757123697238e-06, "loss": 0.2001, "step": 21891, "teacher_loss": 0.16587528586387634 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.22735896706581116, "learning_rate": 5.375015000089856e-06, "loss": 0.1575, "step": 21892, "teacher_loss": 0.14976409077644348 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.3107032775878906, "learning_rate": 5.37327309715453e-06, "loss": 0.1738, "step": 21893, "teacher_loss": 0.15860790014266968 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.5569702386856079, "learning_rate": 5.371531414931183e-06, "loss": 0.2272, "step": 21894, "teacher_loss": 0.1905101090669632 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.24172909557819366, "learning_rate": 5.369789953459752e-06, "loss": 0.1724, "step": 21895, "teacher_loss": 0.16465112566947937 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.6556931734085083, "learning_rate": 5.368048712780171e-06, "loss": 0.2669, "step": 21896, "teacher_loss": 0.2237006574869156 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.40778079628944397, "learning_rate": 5.3663076929323505e-06, "loss": 0.1996, "step": 21897, "teacher_loss": 0.1764843463897705 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 1.3197052478790283, "learning_rate": 5.36456689395621e-06, "loss": 0.5313, "step": 21898, "teacher_loss": 0.44372835755348206 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.2644239068031311, "learning_rate": 5.362826315891669e-06, "loss": 0.1717, "step": 21899, "teacher_loss": 0.16143682599067688 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.38508686423301697, "learning_rate": 5.361085958778619e-06, "loss": 0.2071, "step": 21900, "teacher_loss": 0.1873604953289032 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.19162605702877045, "learning_rate": 5.359345822656975e-06, "loss": 0.1814, "step": 21901, "teacher_loss": 0.1802879273891449 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.554720938205719, "learning_rate": 5.357605907566623e-06, "loss": 0.427, "step": 21902, "teacher_loss": 0.4128074645996094 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.3988930583000183, "learning_rate": 5.3558662135474586e-06, "loss": 0.2117, "step": 21903, "teacher_loss": 0.19088244438171387 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.3039918839931488, "learning_rate": 5.35412674063937e-06, "loss": 0.1842, "step": 21904, "teacher_loss": 0.1708521842956543 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.8364557027816772, "learning_rate": 5.352387488882232e-06, "loss": 0.2954, "step": 21905, "teacher_loss": 0.2352619469165802 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.5868608951568604, "learning_rate": 5.350648458315929e-06, "loss": 0.2448, "step": 21906, "teacher_loss": 0.2068420797586441 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.40496188402175903, "learning_rate": 5.348909648980321e-06, "loss": 0.1858, "step": 21907, "teacher_loss": 0.16146139800548553 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.4571641683578491, "learning_rate": 5.347171060915278e-06, "loss": 0.2853, "step": 21908, "teacher_loss": 0.2662588953971863 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.42678797245025635, "learning_rate": 5.345432694160669e-06, "loss": 0.2124, "step": 21909, "teacher_loss": 0.18859143555164337 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.22702844440937042, "learning_rate": 5.343694548756338e-06, "loss": 0.1918, "step": 21910, "teacher_loss": 0.18784919381141663 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.5872438549995422, "learning_rate": 5.341956624742142e-06, "loss": 0.2541, "step": 21911, "teacher_loss": 0.2170286327600479 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.3950216770172119, "learning_rate": 5.340218922157922e-06, "loss": 0.2017, "step": 21912, "teacher_loss": 0.18019825220108032 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.4140271544456482, "learning_rate": 5.33848144104352e-06, "loss": 0.1925, "step": 21913, "teacher_loss": 0.16784211993217468 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.2768034040927887, "learning_rate": 5.336744181438776e-06, "loss": 0.1862, "step": 21914, "teacher_loss": 0.17618080973625183 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.885356068611145, "learning_rate": 5.335007143383512e-06, "loss": 0.43, "step": 21915, "teacher_loss": 0.37940162420272827 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.4397169351577759, "learning_rate": 5.333270326917555e-06, "loss": 0.2712, "step": 21916, "teacher_loss": 0.25245797634124756 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.3569839596748352, "learning_rate": 5.331533732080735e-06, "loss": 0.2081, "step": 21917, "teacher_loss": 0.1915380209684372 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.3570977449417114, "learning_rate": 5.329797358912851e-06, "loss": 0.2494, "step": 21918, "teacher_loss": 0.23746784031391144 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.4444868564605713, "learning_rate": 5.328061207453724e-06, "loss": 0.2691, "step": 21919, "teacher_loss": 0.2495938092470169 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.24075017869472504, "learning_rate": 5.326325277743157e-06, "loss": 0.1704, "step": 21920, "teacher_loss": 0.1625375747680664 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.5903390645980835, "learning_rate": 5.324589569820945e-06, "loss": 0.3088, "step": 21921, "teacher_loss": 0.27756839990615845 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.24058055877685547, "learning_rate": 5.3228540837268854e-06, "loss": 0.1737, "step": 21922, "teacher_loss": 0.16629287600517273 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.6437263488769531, "learning_rate": 5.321118819500774e-06, "loss": 0.1981, "step": 21923, "teacher_loss": 0.14857585728168488 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.5505216717720032, "learning_rate": 5.319383777182389e-06, "loss": 0.3062, "step": 21924, "teacher_loss": 0.2790648937225342 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.7036067247390747, "learning_rate": 5.3176489568115e-06, "loss": 0.2472, "step": 21925, "teacher_loss": 0.19644492864608765 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.4073464870452881, "learning_rate": 5.3159143584279e-06, "loss": 0.2291, "step": 21926, "teacher_loss": 0.20932528376579285 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.24316957592964172, "learning_rate": 5.314179982071351e-06, "loss": 0.1628, "step": 21927, "teacher_loss": 0.15388178825378418 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.24487102031707764, "learning_rate": 5.312445827781609e-06, "loss": 0.2651, "step": 21928, "teacher_loss": 0.2673344910144806 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.16839244961738586, "learning_rate": 5.310711895598439e-06, "loss": 0.2103, "step": 21929, "teacher_loss": 0.2149975597858429 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.2728954553604126, "learning_rate": 5.308978185561603e-06, "loss": 0.239, "step": 21930, "teacher_loss": 0.23524220287799835 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.17911314964294434, "learning_rate": 5.307244697710834e-06, "loss": 0.1567, "step": 21931, "teacher_loss": 0.1542629599571228 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.7517884969711304, "learning_rate": 5.305511432085885e-06, "loss": 0.2734, "step": 21932, "teacher_loss": 0.2202303111553192 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.7069342732429504, "learning_rate": 5.303778388726498e-06, "loss": 0.2402, "step": 21933, "teacher_loss": 0.18835780024528503 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.21009202301502228, "learning_rate": 5.302045567672403e-06, "loss": 0.2139, "step": 21934, "teacher_loss": 0.21434888243675232 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.6228509545326233, "learning_rate": 5.3003129689633165e-06, "loss": 0.3394, "step": 21935, "teacher_loss": 0.30785971879959106 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.5184410214424133, "learning_rate": 5.298580592638982e-06, "loss": 0.2127, "step": 21936, "teacher_loss": 0.17874939739704132 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.37738293409347534, "learning_rate": 5.296848438739108e-06, "loss": 0.2038, "step": 21937, "teacher_loss": 0.18446308374404907 }, { "compression_loss": 0.0, "epoch": 3.96, "label_loss": 0.16322796046733856, "learning_rate": 5.295116507303398e-06, "loss": 0.1947, "step": 21938, "teacher_loss": 0.19823195040225983 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.5134272575378418, "learning_rate": 5.29338479837158e-06, "loss": 0.3674, "step": 21939, "teacher_loss": 0.3511603772640228 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.7748701572418213, "learning_rate": 5.291653311983345e-06, "loss": 0.3179, "step": 21940, "teacher_loss": 0.2671516239643097 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.2868606448173523, "learning_rate": 5.28992204817839e-06, "loss": 0.1578, "step": 21941, "teacher_loss": 0.14349418878555298 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.38682180643081665, "learning_rate": 5.288191006996408e-06, "loss": 0.23, "step": 21942, "teacher_loss": 0.2125495821237564 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.5617120862007141, "learning_rate": 5.286460188477096e-06, "loss": 0.2656, "step": 21943, "teacher_loss": 0.2326863557100296 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.11406774073839188, "learning_rate": 5.284729592660122e-06, "loss": 0.1522, "step": 21944, "teacher_loss": 0.15642790496349335 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.1886179894208908, "learning_rate": 5.282999219585172e-06, "loss": 0.1552, "step": 21945, "teacher_loss": 0.15147164463996887 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.5381919145584106, "learning_rate": 5.28126906929192e-06, "loss": 0.2979, "step": 21946, "teacher_loss": 0.2712230682373047 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.2647937536239624, "learning_rate": 5.279539141820032e-06, "loss": 0.1907, "step": 21947, "teacher_loss": 0.1824457049369812 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.4587059020996094, "learning_rate": 5.277809437209156e-06, "loss": 0.2646, "step": 21948, "teacher_loss": 0.2430022805929184 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.26791971921920776, "learning_rate": 5.276079955498974e-06, "loss": 0.1901, "step": 21949, "teacher_loss": 0.18147799372673035 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.37886765599250793, "learning_rate": 5.274350696729122e-06, "loss": 0.1962, "step": 21950, "teacher_loss": 0.17591789364814758 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.38569360971450806, "learning_rate": 5.2726216609392455e-06, "loss": 0.2594, "step": 21951, "teacher_loss": 0.24531909823417664 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.4861651062965393, "learning_rate": 5.270892848168989e-06, "loss": 0.2013, "step": 21952, "teacher_loss": 0.16964855790138245 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.21778753399848938, "learning_rate": 5.269164258457997e-06, "loss": 0.1992, "step": 21953, "teacher_loss": 0.19712093472480774 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.4101647734642029, "learning_rate": 5.267435891845887e-06, "loss": 0.2353, "step": 21954, "teacher_loss": 0.21583417057991028 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.5885810852050781, "learning_rate": 5.265707748372295e-06, "loss": 0.2264, "step": 21955, "teacher_loss": 0.1862037181854248 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.44949185848236084, "learning_rate": 5.263979828076843e-06, "loss": 0.2402, "step": 21956, "teacher_loss": 0.2169126570224762 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.20756162703037262, "learning_rate": 5.262252130999137e-06, "loss": 0.1484, "step": 21957, "teacher_loss": 0.14178110659122467 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.15677033364772797, "learning_rate": 5.260524657178796e-06, "loss": 0.1709, "step": 21958, "teacher_loss": 0.17251485586166382 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.5402560234069824, "learning_rate": 5.258797406655429e-06, "loss": 0.223, "step": 21959, "teacher_loss": 0.18779020011425018 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.1859736144542694, "learning_rate": 5.257070379468627e-06, "loss": 0.1555, "step": 21960, "teacher_loss": 0.15209782123565674 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.31697842478752136, "learning_rate": 5.255343575657995e-06, "loss": 0.1817, "step": 21961, "teacher_loss": 0.16671551764011383 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.48178887367248535, "learning_rate": 5.253616995263114e-06, "loss": 0.2626, "step": 21962, "teacher_loss": 0.23824243247509003 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.18887396156787872, "learning_rate": 5.251890638323573e-06, "loss": 0.1713, "step": 21963, "teacher_loss": 0.1693476140499115 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.36619412899017334, "learning_rate": 5.25016450487896e-06, "loss": 0.2081, "step": 21964, "teacher_loss": 0.19056178629398346 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 1.170992374420166, "learning_rate": 5.248438594968837e-06, "loss": 0.3506, "step": 21965, "teacher_loss": 0.2594633102416992 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.2523373067378998, "learning_rate": 5.246712908632781e-06, "loss": 0.2452, "step": 21966, "teacher_loss": 0.2443729043006897 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.7331432700157166, "learning_rate": 5.244987445910361e-06, "loss": 0.2292, "step": 21967, "teacher_loss": 0.1732456088066101 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.5944974422454834, "learning_rate": 5.243262206841127e-06, "loss": 0.1933, "step": 21968, "teacher_loss": 0.14867115020751953 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.512334406375885, "learning_rate": 5.241537191464644e-06, "loss": 0.1948, "step": 21969, "teacher_loss": 0.15955850481987 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.4872363805770874, "learning_rate": 5.23981239982045e-06, "loss": 0.2486, "step": 21970, "teacher_loss": 0.22211408615112305 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.30077463388442993, "learning_rate": 5.2380878319480944e-06, "loss": 0.2451, "step": 21971, "teacher_loss": 0.2389182448387146 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.4363465905189514, "learning_rate": 5.236363487887123e-06, "loss": 0.2195, "step": 21972, "teacher_loss": 0.19537557661533356 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.7902982234954834, "learning_rate": 5.234639367677059e-06, "loss": 0.5048, "step": 21973, "teacher_loss": 0.4730609059333801 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.23141071200370789, "learning_rate": 5.232915471357441e-06, "loss": 0.1557, "step": 21974, "teacher_loss": 0.14732295274734497 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.485482782125473, "learning_rate": 5.2311917989677835e-06, "loss": 0.2487, "step": 21975, "teacher_loss": 0.22240306437015533 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.8187615275382996, "learning_rate": 5.229468350547608e-06, "loss": 0.2183, "step": 21976, "teacher_loss": 0.15154016017913818 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.2518799602985382, "learning_rate": 5.227745126136436e-06, "loss": 0.1736, "step": 21977, "teacher_loss": 0.16494855284690857 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.3740791082382202, "learning_rate": 5.2260221257737656e-06, "loss": 0.1892, "step": 21978, "teacher_loss": 0.16861701011657715 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.3134530186653137, "learning_rate": 5.224299349499102e-06, "loss": 0.1815, "step": 21979, "teacher_loss": 0.16689112782478333 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.13086268305778503, "learning_rate": 5.222576797351951e-06, "loss": 0.1597, "step": 21980, "teacher_loss": 0.1629471778869629 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.44103682041168213, "learning_rate": 5.220854469371794e-06, "loss": 0.184, "step": 21981, "teacher_loss": 0.15545111894607544 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.305397629737854, "learning_rate": 5.219132365598126e-06, "loss": 0.1847, "step": 21982, "teacher_loss": 0.17123432457447052 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.4067939519882202, "learning_rate": 5.217410486070433e-06, "loss": 0.2329, "step": 21983, "teacher_loss": 0.21361765265464783 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.7942676544189453, "learning_rate": 5.2156888308281875e-06, "loss": 0.3093, "step": 21984, "teacher_loss": 0.25536322593688965 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.4729829430580139, "learning_rate": 5.213967399910852e-06, "loss": 0.3221, "step": 21985, "teacher_loss": 0.30538833141326904 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.13893572986125946, "learning_rate": 5.212246193357914e-06, "loss": 0.2003, "step": 21986, "teacher_loss": 0.20710909366607666 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.9185959100723267, "learning_rate": 5.210525211208826e-06, "loss": 0.2589, "step": 21987, "teacher_loss": 0.18559253215789795 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.505616307258606, "learning_rate": 5.208804453503034e-06, "loss": 0.6072, "step": 21988, "teacher_loss": 0.618511438369751 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.5615938305854797, "learning_rate": 5.207083920280011e-06, "loss": 0.2407, "step": 21989, "teacher_loss": 0.2050146907567978 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.20613881945610046, "learning_rate": 5.205363611579192e-06, "loss": 0.1974, "step": 21990, "teacher_loss": 0.1964205801486969 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.2284737229347229, "learning_rate": 5.203643527440016e-06, "loss": 0.3099, "step": 21991, "teacher_loss": 0.31893667578697205 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.2649606466293335, "learning_rate": 5.201923667901921e-06, "loss": 0.2806, "step": 21992, "teacher_loss": 0.282285213470459 }, { "compression_loss": 0.0, "epoch": 3.97, "label_loss": 0.49434322118759155, "learning_rate": 5.200204033004347e-06, "loss": 0.2238, "step": 21993, "teacher_loss": 0.19372862577438354 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.5153838396072388, "learning_rate": 5.198484622786708e-06, "loss": 0.2454, "step": 21994, "teacher_loss": 0.2153470516204834 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.28129684925079346, "learning_rate": 5.19676543728843e-06, "loss": 0.1925, "step": 21995, "teacher_loss": 0.18261420726776123 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 1.0850865840911865, "learning_rate": 5.195046476548932e-06, "loss": 0.3091, "step": 21996, "teacher_loss": 0.2228819727897644 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4608195424079895, "learning_rate": 5.193327740607624e-06, "loss": 0.2134, "step": 21997, "teacher_loss": 0.18594929575920105 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.7459148168563843, "learning_rate": 5.191609229503898e-06, "loss": 0.4145, "step": 21998, "teacher_loss": 0.37763434648513794 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.22278110682964325, "learning_rate": 5.189890943277175e-06, "loss": 0.2346, "step": 21999, "teacher_loss": 0.2359198033809662 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.7811424732208252, "learning_rate": 5.188172881966841e-06, "loss": 0.4405, "step": 22000, "teacher_loss": 0.4026644229888916 }, { "epoch": 3.98, "eval_exact_match": 80.3027436140019, "eval_f1": 87.79446748314663, "step": 22000 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4003264904022217, "learning_rate": 5.186455045612279e-06, "loss": 0.1988, "step": 22001, "teacher_loss": 0.17646202445030212 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.45877087116241455, "learning_rate": 5.184737434252882e-06, "loss": 0.2387, "step": 22002, "teacher_loss": 0.2142493575811386 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.3367345333099365, "learning_rate": 5.183020047928034e-06, "loss": 0.273, "step": 22003, "teacher_loss": 0.26594263315200806 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.5213367938995361, "learning_rate": 5.181302886677095e-06, "loss": 0.2577, "step": 22004, "teacher_loss": 0.2284557968378067 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.45731914043426514, "learning_rate": 5.179585950539445e-06, "loss": 0.2418, "step": 22005, "teacher_loss": 0.21788471937179565 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4329388439655304, "learning_rate": 5.177869239554452e-06, "loss": 0.2425, "step": 22006, "teacher_loss": 0.22131648659706116 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.34081748127937317, "learning_rate": 5.176152753761461e-06, "loss": 0.2359, "step": 22007, "teacher_loss": 0.2242031693458557 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.7113511562347412, "learning_rate": 5.174436493199836e-06, "loss": 0.2728, "step": 22008, "teacher_loss": 0.22410425543785095 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.44916754961013794, "learning_rate": 5.172720457908929e-06, "loss": 0.2693, "step": 22009, "teacher_loss": 0.24927031993865967 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.25898346304893494, "learning_rate": 5.171004647928078e-06, "loss": 0.1851, "step": 22010, "teacher_loss": 0.17690569162368774 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.340464323759079, "learning_rate": 5.169289063296611e-06, "loss": 0.2026, "step": 22011, "teacher_loss": 0.1872323900461197 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.3818777799606323, "learning_rate": 5.167573704053882e-06, "loss": 0.2761, "step": 22012, "teacher_loss": 0.2643362879753113 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.48050642013549805, "learning_rate": 5.165858570239208e-06, "loss": 0.236, "step": 22013, "teacher_loss": 0.20881447196006775 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.1785348504781723, "learning_rate": 5.164143661891909e-06, "loss": 0.1513, "step": 22014, "teacher_loss": 0.1482510268688202 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.6053401231765747, "learning_rate": 5.162428979051306e-06, "loss": 0.3338, "step": 22015, "teacher_loss": 0.3035826086997986 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.40746402740478516, "learning_rate": 5.160714521756716e-06, "loss": 0.2223, "step": 22016, "teacher_loss": 0.20174473524093628 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.37482452392578125, "learning_rate": 5.159000290047437e-06, "loss": 0.2386, "step": 22017, "teacher_loss": 0.22346365451812744 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.3597516417503357, "learning_rate": 5.157286283962779e-06, "loss": 0.212, "step": 22018, "teacher_loss": 0.19559751451015472 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.5408403277397156, "learning_rate": 5.15557250354204e-06, "loss": 0.255, "step": 22019, "teacher_loss": 0.223200261592865 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.5866103172302246, "learning_rate": 5.153858948824504e-06, "loss": 0.2436, "step": 22020, "teacher_loss": 0.20552338659763336 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.25638547539711, "learning_rate": 5.152145619849461e-06, "loss": 0.2177, "step": 22021, "teacher_loss": 0.21336421370506287 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4853782057762146, "learning_rate": 5.1504325166561995e-06, "loss": 0.2431, "step": 22022, "teacher_loss": 0.21622025966644287 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.17334792017936707, "learning_rate": 5.148719639283985e-06, "loss": 0.165, "step": 22023, "teacher_loss": 0.16411250829696655 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.41954725980758667, "learning_rate": 5.147006987772097e-06, "loss": 0.2275, "step": 22024, "teacher_loss": 0.20621536672115326 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4138488173484802, "learning_rate": 5.145294562159794e-06, "loss": 0.188, "step": 22025, "teacher_loss": 0.16293151676654816 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.1527281105518341, "learning_rate": 5.143582362486341e-06, "loss": 0.1607, "step": 22026, "teacher_loss": 0.16163088381290436 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4191634953022003, "learning_rate": 5.141870388790998e-06, "loss": 0.2674, "step": 22027, "teacher_loss": 0.25057196617126465 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.21376797556877136, "learning_rate": 5.140158641113005e-06, "loss": 0.2135, "step": 22028, "teacher_loss": 0.21347551047801971 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.6267105340957642, "learning_rate": 5.138447119491613e-06, "loss": 0.2759, "step": 22029, "teacher_loss": 0.23692211508750916 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4355524182319641, "learning_rate": 5.1367358239660685e-06, "loss": 0.3392, "step": 22030, "teacher_loss": 0.32846057415008545 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.32390856742858887, "learning_rate": 5.135024754575595e-06, "loss": 0.2357, "step": 22031, "teacher_loss": 0.22592945396900177 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.39424145221710205, "learning_rate": 5.133313911359431e-06, "loss": 0.2402, "step": 22032, "teacher_loss": 0.2231176793575287 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.35485368967056274, "learning_rate": 5.131603294356793e-06, "loss": 0.2006, "step": 22033, "teacher_loss": 0.1834208220243454 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.7223978042602539, "learning_rate": 5.129892903606909e-06, "loss": 0.2442, "step": 22034, "teacher_loss": 0.191110759973526 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.2598540782928467, "learning_rate": 5.1281827391489826e-06, "loss": 0.2022, "step": 22035, "teacher_loss": 0.19583234190940857 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.3204004168510437, "learning_rate": 5.12647280102223e-06, "loss": 0.2429, "step": 22036, "teacher_loss": 0.23428279161453247 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.7353971004486084, "learning_rate": 5.12476308926586e-06, "loss": 0.2717, "step": 22037, "teacher_loss": 0.2201557457447052 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4952617883682251, "learning_rate": 5.1230536039190575e-06, "loss": 0.1909, "step": 22038, "teacher_loss": 0.1570655107498169 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 1.0159000158309937, "learning_rate": 5.121344345021025e-06, "loss": 0.3128, "step": 22039, "teacher_loss": 0.2347167432308197 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.3604879379272461, "learning_rate": 5.119635312610952e-06, "loss": 0.1822, "step": 22040, "teacher_loss": 0.1623912751674652 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.8334888219833374, "learning_rate": 5.117926506728015e-06, "loss": 0.2754, "step": 22041, "teacher_loss": 0.21336081624031067 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.5891445279121399, "learning_rate": 5.116217927411394e-06, "loss": 0.2314, "step": 22042, "teacher_loss": 0.19167275726795197 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.330754816532135, "learning_rate": 5.114509574700269e-06, "loss": 0.1374, "step": 22043, "teacher_loss": 0.11589701473712921 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4094906449317932, "learning_rate": 5.112801448633796e-06, "loss": 0.2296, "step": 22044, "teacher_loss": 0.20965151488780975 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4453967809677124, "learning_rate": 5.111093549251142e-06, "loss": 0.1758, "step": 22045, "teacher_loss": 0.1458282470703125 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4276135265827179, "learning_rate": 5.1093858765914685e-06, "loss": 0.2277, "step": 22046, "teacher_loss": 0.20547285676002502 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.4745435118675232, "learning_rate": 5.107678430693926e-06, "loss": 0.1895, "step": 22047, "teacher_loss": 0.15780854225158691 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.5829769372940063, "learning_rate": 5.105971211597645e-06, "loss": 0.2268, "step": 22048, "teacher_loss": 0.18718942999839783 }, { "compression_loss": 0.0, "epoch": 3.98, "label_loss": 0.714089035987854, "learning_rate": 5.104264219341793e-06, "loss": 0.2145, "step": 22049, "teacher_loss": 0.15894025564193726 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.3871484398841858, "learning_rate": 5.102557453965493e-06, "loss": 0.2252, "step": 22050, "teacher_loss": 0.20721790194511414 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.41557231545448303, "learning_rate": 5.10085091550787e-06, "loss": 0.1955, "step": 22051, "teacher_loss": 0.17108920216560364 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.35114336013793945, "learning_rate": 5.0991446040080574e-06, "loss": 0.3063, "step": 22052, "teacher_loss": 0.30128806829452515 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.4830843508243561, "learning_rate": 5.09743851950518e-06, "loss": 0.1875, "step": 22053, "teacher_loss": 0.15468311309814453 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.34340670704841614, "learning_rate": 5.095732662038342e-06, "loss": 0.1934, "step": 22054, "teacher_loss": 0.17676490545272827 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.5621516704559326, "learning_rate": 5.0940270316466616e-06, "loss": 0.2327, "step": 22055, "teacher_loss": 0.19607576727867126 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.47905653715133667, "learning_rate": 5.092321628369245e-06, "loss": 0.2119, "step": 22056, "teacher_loss": 0.1822408139705658 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.23314689099788666, "learning_rate": 5.090616452245187e-06, "loss": 0.1554, "step": 22057, "teacher_loss": 0.14675891399383545 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.3631364703178406, "learning_rate": 5.088911503313577e-06, "loss": 0.1773, "step": 22058, "teacher_loss": 0.156688392162323 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.2871093153953552, "learning_rate": 5.08720678161352e-06, "loss": 0.1771, "step": 22059, "teacher_loss": 0.16484317183494568 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.705685019493103, "learning_rate": 5.085502287184092e-06, "loss": 0.2145, "step": 22060, "teacher_loss": 0.15992408990859985 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.40642642974853516, "learning_rate": 5.08379802006436e-06, "loss": 0.1829, "step": 22061, "teacher_loss": 0.1580895632505417 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.6396265029907227, "learning_rate": 5.0820939802934205e-06, "loss": 0.225, "step": 22062, "teacher_loss": 0.17892920970916748 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.5726832747459412, "learning_rate": 5.0803901679103284e-06, "loss": 0.2568, "step": 22063, "teacher_loss": 0.2217087745666504 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.6497695446014404, "learning_rate": 5.078686582954147e-06, "loss": 0.2674, "step": 22064, "teacher_loss": 0.22495609521865845 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.6412872672080994, "learning_rate": 5.0769832254639355e-06, "loss": 0.2952, "step": 22065, "teacher_loss": 0.25677546858787537 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.5934469103813171, "learning_rate": 5.075280095478753e-06, "loss": 0.2043, "step": 22066, "teacher_loss": 0.16103440523147583 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.35248565673828125, "learning_rate": 5.073577193037636e-06, "loss": 0.2044, "step": 22067, "teacher_loss": 0.18800094723701477 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.6676783561706543, "learning_rate": 5.071874518179635e-06, "loss": 0.3466, "step": 22068, "teacher_loss": 0.31092602014541626 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.43910035490989685, "learning_rate": 5.0701720709437885e-06, "loss": 0.351, "step": 22069, "teacher_loss": 0.341214656829834 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.10622549057006836, "learning_rate": 5.0684698513691216e-06, "loss": 0.1193, "step": 22070, "teacher_loss": 0.12073312699794769 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.6832747459411621, "learning_rate": 5.066767859494663e-06, "loss": 0.225, "step": 22071, "teacher_loss": 0.17412303388118744 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.2978058159351349, "learning_rate": 5.065066095359442e-06, "loss": 0.2099, "step": 22072, "teacher_loss": 0.2001808136701584 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.30778956413269043, "learning_rate": 5.0633645590024695e-06, "loss": 0.1788, "step": 22073, "teacher_loss": 0.16451063752174377 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.3444862961769104, "learning_rate": 5.06166325046275e-06, "loss": 0.1978, "step": 22074, "teacher_loss": 0.1815415620803833 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.4022221863269806, "learning_rate": 5.059962169779297e-06, "loss": 0.2447, "step": 22075, "teacher_loss": 0.22723780572414398 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.7403604984283447, "learning_rate": 5.058261316991112e-06, "loss": 0.2321, "step": 22076, "teacher_loss": 0.17568188905715942 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.3576715588569641, "learning_rate": 5.056560692137186e-06, "loss": 0.2359, "step": 22077, "teacher_loss": 0.22232437133789062 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.48366469144821167, "learning_rate": 5.05486029525651e-06, "loss": 0.2332, "step": 22078, "teacher_loss": 0.20541802048683167 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.4213287830352783, "learning_rate": 5.0531601263880755e-06, "loss": 0.2065, "step": 22079, "teacher_loss": 0.18267206847667694 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.4027162790298462, "learning_rate": 5.051460185570852e-06, "loss": 0.2436, "step": 22080, "teacher_loss": 0.2258841097354889 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.24941398203372955, "learning_rate": 5.049760472843819e-06, "loss": 0.2356, "step": 22081, "teacher_loss": 0.23404951393604279 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.49006187915802, "learning_rate": 5.048060988245951e-06, "loss": 0.2377, "step": 22082, "teacher_loss": 0.20965850353240967 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.6824772357940674, "learning_rate": 5.046361731816203e-06, "loss": 0.6974, "step": 22083, "teacher_loss": 0.6990044713020325 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.23311832547187805, "learning_rate": 5.044662703593542e-06, "loss": 0.1614, "step": 22084, "teacher_loss": 0.1534784734249115 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.3675001561641693, "learning_rate": 5.0429639036169134e-06, "loss": 0.2496, "step": 22085, "teacher_loss": 0.23654364049434662 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.3297327756881714, "learning_rate": 5.041265331925269e-06, "loss": 0.1691, "step": 22086, "teacher_loss": 0.15125374495983124 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.2938859462738037, "learning_rate": 5.039566988557557e-06, "loss": 0.212, "step": 22087, "teacher_loss": 0.2029523253440857 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.324543297290802, "learning_rate": 5.037868873552708e-06, "loss": 0.2533, "step": 22088, "teacher_loss": 0.24543911218643188 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.47561901807785034, "learning_rate": 5.036170986949656e-06, "loss": 0.3514, "step": 22089, "teacher_loss": 0.3375966548919678 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.6222383975982666, "learning_rate": 5.034473328787337e-06, "loss": 0.296, "step": 22090, "teacher_loss": 0.25972336530685425 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.5484682321548462, "learning_rate": 5.03277589910466e-06, "loss": 0.3056, "step": 22091, "teacher_loss": 0.2786676287651062 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.1844518780708313, "learning_rate": 5.031078697940548e-06, "loss": 0.1513, "step": 22092, "teacher_loss": 0.14764569699764252 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.6276397705078125, "learning_rate": 5.029381725333918e-06, "loss": 0.2538, "step": 22093, "teacher_loss": 0.21228352189064026 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.5493172407150269, "learning_rate": 5.027684981323668e-06, "loss": 0.253, "step": 22094, "teacher_loss": 0.22011929750442505 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.30760014057159424, "learning_rate": 5.025988465948707e-06, "loss": 0.1746, "step": 22095, "teacher_loss": 0.1598581224679947 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.39357078075408936, "learning_rate": 5.024292179247921e-06, "loss": 0.2362, "step": 22096, "teacher_loss": 0.2187683880329132 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.49585866928100586, "learning_rate": 5.0225961212602125e-06, "loss": 0.209, "step": 22097, "teacher_loss": 0.1771656572818756 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.6238361597061157, "learning_rate": 5.020900292024456e-06, "loss": 0.2343, "step": 22098, "teacher_loss": 0.19098469614982605 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.414753794670105, "learning_rate": 5.019204691579538e-06, "loss": 0.4912, "step": 22099, "teacher_loss": 0.4996855854988098 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.4031832218170166, "learning_rate": 5.017509319964335e-06, "loss": 0.3217, "step": 22100, "teacher_loss": 0.3126044273376465 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.5616793036460876, "learning_rate": 5.0158141772177124e-06, "loss": 0.2774, "step": 22101, "teacher_loss": 0.24584342539310455 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.3253864049911499, "learning_rate": 5.014119263378534e-06, "loss": 0.2901, "step": 22102, "teacher_loss": 0.2861271798610687 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.7458573579788208, "learning_rate": 5.012424578485667e-06, "loss": 0.2807, "step": 22103, "teacher_loss": 0.2290249466896057 }, { "compression_loss": 0.0, "epoch": 3.99, "label_loss": 0.5150573253631592, "learning_rate": 5.010730122577956e-06, "loss": 0.213, "step": 22104, "teacher_loss": 0.17945709824562073 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.48404985666275024, "learning_rate": 5.009035895694254e-06, "loss": 0.2648, "step": 22105, "teacher_loss": 0.24043257534503937 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.40745002031326294, "learning_rate": 5.007341897873409e-06, "loss": 0.1966, "step": 22106, "teacher_loss": 0.17316630482673645 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.39655566215515137, "learning_rate": 5.005648129154254e-06, "loss": 0.1663, "step": 22107, "teacher_loss": 0.14074309170246124 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.5433064103126526, "learning_rate": 5.003954589575614e-06, "loss": 0.2166, "step": 22108, "teacher_loss": 0.1803458333015442 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.2639385461807251, "learning_rate": 5.002261279176335e-06, "loss": 0.198, "step": 22109, "teacher_loss": 0.19067248702049255 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.44001516699790955, "learning_rate": 5.0005681979952304e-06, "loss": 0.2026, "step": 22110, "teacher_loss": 0.17626118659973145 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.5811282396316528, "learning_rate": 4.9988753460711066e-06, "loss": 0.2311, "step": 22111, "teacher_loss": 0.19220075011253357 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.41958028078079224, "learning_rate": 4.997182723442798e-06, "loss": 0.1649, "step": 22112, "teacher_loss": 0.13656926155090332 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.4574601948261261, "learning_rate": 4.995490330149097e-06, "loss": 0.2939, "step": 22113, "teacher_loss": 0.275709867477417 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.3676756024360657, "learning_rate": 4.993798166228804e-06, "loss": 0.2762, "step": 22114, "teacher_loss": 0.2660888433456421 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.8565565347671509, "learning_rate": 4.992106231720719e-06, "loss": 0.4078, "step": 22115, "teacher_loss": 0.35796058177948 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.21084296703338623, "learning_rate": 4.990414526663636e-06, "loss": 0.1818, "step": 22116, "teacher_loss": 0.1785779595375061 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.30923327803611755, "learning_rate": 4.988723051096335e-06, "loss": 0.2107, "step": 22117, "teacher_loss": 0.19969907402992249 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.29472821950912476, "learning_rate": 4.987031805057598e-06, "loss": 0.2201, "step": 22118, "teacher_loss": 0.21175909042358398 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.2837311029434204, "learning_rate": 4.985340788586207e-06, "loss": 0.1603, "step": 22119, "teacher_loss": 0.1465539038181305 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.8274620175361633, "learning_rate": 4.9836500017209254e-06, "loss": 0.3808, "step": 22120, "teacher_loss": 0.3311272859573364 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.9161056876182556, "learning_rate": 4.981959444500509e-06, "loss": 0.8852, "step": 22121, "teacher_loss": 0.8818002939224243 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.4583396911621094, "learning_rate": 4.980269116963737e-06, "loss": 0.265, "step": 22122, "teacher_loss": 0.2434912621974945 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.5375685095787048, "learning_rate": 4.978579019149353e-06, "loss": 0.2259, "step": 22123, "teacher_loss": 0.19121941924095154 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.2717920243740082, "learning_rate": 4.976889151096101e-06, "loss": 0.2017, "step": 22124, "teacher_loss": 0.19388742744922638 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.1128486692905426, "learning_rate": 4.97519951284273e-06, "loss": 0.1835, "step": 22125, "teacher_loss": 0.19139564037322998 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.5337270498275757, "learning_rate": 4.973510104427985e-06, "loss": 0.23, "step": 22126, "teacher_loss": 0.19624271988868713 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.656831681728363, "learning_rate": 4.9718209258905865e-06, "loss": 0.2698, "step": 22127, "teacher_loss": 0.22680974006652832 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.33048298954963684, "learning_rate": 4.970131977269267e-06, "loss": 0.1934, "step": 22128, "teacher_loss": 0.1781637966632843 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.13731157779693604, "learning_rate": 4.968443258602757e-06, "loss": 0.1766, "step": 22129, "teacher_loss": 0.18100687861442566 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.734019935131073, "learning_rate": 4.96675476992976e-06, "loss": 0.3203, "step": 22130, "teacher_loss": 0.274295836687088 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.12270964682102203, "learning_rate": 4.965066511288996e-06, "loss": 0.1341, "step": 22131, "teacher_loss": 0.13538146018981934 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.6149711608886719, "learning_rate": 4.963378482719175e-06, "loss": 0.2414, "step": 22132, "teacher_loss": 0.19994506239891052 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.43186551332473755, "learning_rate": 4.96169068425899e-06, "loss": 0.1894, "step": 22133, "teacher_loss": 0.16248568892478943 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.44644060730934143, "learning_rate": 4.960003115947141e-06, "loss": 0.207, "step": 22134, "teacher_loss": 0.18039348721504211 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.5457695722579956, "learning_rate": 4.958315777822323e-06, "loss": 0.343, "step": 22135, "teacher_loss": 0.32048556208610535 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.20110633969306946, "learning_rate": 4.956628669923218e-06, "loss": 0.1596, "step": 22136, "teacher_loss": 0.1549452543258667 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.23813612759113312, "learning_rate": 4.954941792288502e-06, "loss": 0.2076, "step": 22137, "teacher_loss": 0.2042200267314911 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.20629869401454926, "learning_rate": 4.953255144956853e-06, "loss": 0.1569, "step": 22138, "teacher_loss": 0.1514492928981781 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.29561883211135864, "learning_rate": 4.951568727966947e-06, "loss": 0.2015, "step": 22139, "teacher_loss": 0.1910032331943512 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.42475250363349915, "learning_rate": 4.9498825413574385e-06, "loss": 0.205, "step": 22140, "teacher_loss": 0.18053413927555084 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.10949330031871796, "learning_rate": 4.948196585166991e-06, "loss": 0.162, "step": 22141, "teacher_loss": 0.16781732439994812 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.16784986853599548, "learning_rate": 4.946510859434265e-06, "loss": 0.1941, "step": 22142, "teacher_loss": 0.1970541775226593 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.5954061150550842, "learning_rate": 4.944825364197897e-06, "loss": 0.2177, "step": 22143, "teacher_loss": 0.1757291555404663 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.6066509485244751, "learning_rate": 4.9431400994965375e-06, "loss": 0.3274, "step": 22144, "teacher_loss": 0.29642537236213684 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.19768646359443665, "learning_rate": 4.941455065368828e-06, "loss": 0.1734, "step": 22145, "teacher_loss": 0.17072495818138123 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.27365177869796753, "learning_rate": 4.939770261853391e-06, "loss": 0.1688, "step": 22146, "teacher_loss": 0.15717965364456177 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.7069647908210754, "learning_rate": 4.938085688988865e-06, "loss": 0.2357, "step": 22147, "teacher_loss": 0.18336597084999084 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.2205541431903839, "learning_rate": 4.936401346813864e-06, "loss": 0.1608, "step": 22148, "teacher_loss": 0.15416598320007324 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.37163227796554565, "learning_rate": 4.934717235367006e-06, "loss": 0.2136, "step": 22149, "teacher_loss": 0.19604641199111938 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.27454090118408203, "learning_rate": 4.933033354686909e-06, "loss": 0.191, "step": 22150, "teacher_loss": 0.18172410130500793 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.30184727907180786, "learning_rate": 4.931349704812171e-06, "loss": 0.1579, "step": 22151, "teacher_loss": 0.14191964268684387 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.322023868560791, "learning_rate": 4.929666285781396e-06, "loss": 0.2211, "step": 22152, "teacher_loss": 0.20988181233406067 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.2929700016975403, "learning_rate": 4.927983097633188e-06, "loss": 0.1634, "step": 22153, "teacher_loss": 0.14895889163017273 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.19951452314853668, "learning_rate": 4.9263001404061225e-06, "loss": 0.1384, "step": 22154, "teacher_loss": 0.13160760700702667 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.3469867706298828, "learning_rate": 4.924617414138792e-06, "loss": 0.1929, "step": 22155, "teacher_loss": 0.17578881978988647 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.37899789214134216, "learning_rate": 4.922934918869784e-06, "loss": 0.2222, "step": 22156, "teacher_loss": 0.2047813981771469 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.8178020715713501, "learning_rate": 4.921252654637664e-06, "loss": 0.4278, "step": 22157, "teacher_loss": 0.38449203968048096 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.418819397687912, "learning_rate": 4.919570621480997e-06, "loss": 0.1908, "step": 22158, "teacher_loss": 0.16549530625343323 }, { "compression_loss": 0.0, "epoch": 4.0, "label_loss": 0.4267660975456238, "learning_rate": 4.917888819438356e-06, "loss": 0.2025, "step": 22159, "teacher_loss": 0.1775885671377182 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.4184418320655823, "learning_rate": 4.916207248548299e-06, "loss": 0.3032, "step": 22160, "teacher_loss": 0.2903625965118408 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.4742000699043274, "learning_rate": 4.914525908849375e-06, "loss": 0.2153, "step": 22161, "teacher_loss": 0.1865180879831314 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.39197659492492676, "learning_rate": 4.912844800380131e-06, "loss": 0.1761, "step": 22162, "teacher_loss": 0.15207619965076447 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.31735193729400635, "learning_rate": 4.91116392317912e-06, "loss": 0.2331, "step": 22163, "teacher_loss": 0.2237127721309662 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.6306535005569458, "learning_rate": 4.909483277284864e-06, "loss": 0.2714, "step": 22164, "teacher_loss": 0.23151788115501404 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.4461095631122589, "learning_rate": 4.907802862735906e-06, "loss": 0.181, "step": 22165, "teacher_loss": 0.15153281390666962 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.7646471261978149, "learning_rate": 4.906122679570777e-06, "loss": 0.3834, "step": 22166, "teacher_loss": 0.3410441279411316 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.19427461922168732, "learning_rate": 4.9044427278279845e-06, "loss": 0.1484, "step": 22167, "teacher_loss": 0.14329418540000916 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.20533442497253418, "learning_rate": 4.902763007546052e-06, "loss": 0.1525, "step": 22168, "teacher_loss": 0.14658068120479584 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.27081024646759033, "learning_rate": 4.901083518763497e-06, "loss": 0.149, "step": 22169, "teacher_loss": 0.13545003533363342 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.29067543148994446, "learning_rate": 4.899404261518819e-06, "loss": 0.1804, "step": 22170, "teacher_loss": 0.16812169551849365 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.13765586912631989, "learning_rate": 4.897725235850506e-06, "loss": 0.1739, "step": 22171, "teacher_loss": 0.17787493765354156 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.15603819489479065, "learning_rate": 4.896046441797077e-06, "loss": 0.1948, "step": 22172, "teacher_loss": 0.19908232986927032 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3383418917655945, "learning_rate": 4.89436787939701e-06, "loss": 0.2161, "step": 22173, "teacher_loss": 0.20252057909965515 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.15133023262023926, "learning_rate": 4.892689548688784e-06, "loss": 0.1487, "step": 22174, "teacher_loss": 0.14836657047271729 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.21806451678276062, "learning_rate": 4.891011449710882e-06, "loss": 0.165, "step": 22175, "teacher_loss": 0.1591041088104248 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 1.5460799932479858, "learning_rate": 4.889333582501786e-06, "loss": 0.3248, "step": 22176, "teacher_loss": 0.18913854658603668 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.24715489149093628, "learning_rate": 4.887655947099952e-06, "loss": 0.2325, "step": 22177, "teacher_loss": 0.23091718554496765 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.6276915073394775, "learning_rate": 4.885978543543848e-06, "loss": 0.235, "step": 22178, "teacher_loss": 0.19139115512371063 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.29744383692741394, "learning_rate": 4.884301371871938e-06, "loss": 0.1855, "step": 22179, "teacher_loss": 0.17307361960411072 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.5255406498908997, "learning_rate": 4.882624432122669e-06, "loss": 0.1783, "step": 22180, "teacher_loss": 0.1397274136543274 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.4809519052505493, "learning_rate": 4.880947724334479e-06, "loss": 0.1826, "step": 22181, "teacher_loss": 0.14943277835845947 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.702721118927002, "learning_rate": 4.879271248545829e-06, "loss": 0.2333, "step": 22182, "teacher_loss": 0.18114346265792847 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.25440531969070435, "learning_rate": 4.877595004795145e-06, "loss": 0.1814, "step": 22183, "teacher_loss": 0.17327424883842468 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3782019019126892, "learning_rate": 4.8759189931208495e-06, "loss": 0.2086, "step": 22184, "teacher_loss": 0.18980836868286133 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.14903101325035095, "learning_rate": 4.874243213561389e-06, "loss": 0.1521, "step": 22185, "teacher_loss": 0.15243589878082275 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.330823689699173, "learning_rate": 4.8725676661551705e-06, "loss": 0.1907, "step": 22186, "teacher_loss": 0.17509052157402039 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3414389491081238, "learning_rate": 4.870892350940609e-06, "loss": 0.1675, "step": 22187, "teacher_loss": 0.1481526494026184 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.2518971562385559, "learning_rate": 4.869217267956116e-06, "loss": 0.1796, "step": 22188, "teacher_loss": 0.17159605026245117 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.30308133363723755, "learning_rate": 4.867542417240102e-06, "loss": 0.3065, "step": 22189, "teacher_loss": 0.3068299889564514 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.5366024971008301, "learning_rate": 4.8658677988309585e-06, "loss": 0.2319, "step": 22190, "teacher_loss": 0.19801317155361176 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.2289092242717743, "learning_rate": 4.864193412767081e-06, "loss": 0.162, "step": 22191, "teacher_loss": 0.15461131930351257 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.28311195969581604, "learning_rate": 4.862519259086866e-06, "loss": 0.1996, "step": 22192, "teacher_loss": 0.1903626173734665 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3824400305747986, "learning_rate": 4.860845337828684e-06, "loss": 0.2287, "step": 22193, "teacher_loss": 0.21161530911922455 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.2799724340438843, "learning_rate": 4.859171649030922e-06, "loss": 0.1846, "step": 22194, "teacher_loss": 0.1739773005247116 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.28905272483825684, "learning_rate": 4.857498192731955e-06, "loss": 0.2345, "step": 22195, "teacher_loss": 0.22843128442764282 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 1.1945858001708984, "learning_rate": 4.855824968970138e-06, "loss": 0.285, "step": 22196, "teacher_loss": 0.18390268087387085 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3967694640159607, "learning_rate": 4.8541519777838475e-06, "loss": 0.2765, "step": 22197, "teacher_loss": 0.2631611227989197 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3093956708908081, "learning_rate": 4.8524792192114286e-06, "loss": 0.2273, "step": 22198, "teacher_loss": 0.2181403785943985 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3573721647262573, "learning_rate": 4.850806693291242e-06, "loss": 0.1798, "step": 22199, "teacher_loss": 0.16004875302314758 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.38605546951293945, "learning_rate": 4.8491344000616235e-06, "loss": 0.1749, "step": 22200, "teacher_loss": 0.15141837298870087 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.5034204125404358, "learning_rate": 4.84746233956092e-06, "loss": 0.2506, "step": 22201, "teacher_loss": 0.22250115871429443 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3418252766132355, "learning_rate": 4.845790511827472e-06, "loss": 0.2086, "step": 22202, "teacher_loss": 0.19384264945983887 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.4632079005241394, "learning_rate": 4.844118916899597e-06, "loss": 0.2187, "step": 22203, "teacher_loss": 0.1915285289287567 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.37468206882476807, "learning_rate": 4.8424475548156274e-06, "loss": 0.2158, "step": 22204, "teacher_loss": 0.19809266924858093 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.5227788686752319, "learning_rate": 4.840776425613887e-06, "loss": 0.2362, "step": 22205, "teacher_loss": 0.20431622862815857 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.8319765329360962, "learning_rate": 4.839105529332677e-06, "loss": 0.27, "step": 22206, "teacher_loss": 0.20752760767936707 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.7293442487716675, "learning_rate": 4.83743486601032e-06, "loss": 0.2774, "step": 22207, "teacher_loss": 0.22717750072479248 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3652310073375702, "learning_rate": 4.8357644356851076e-06, "loss": 0.2014, "step": 22208, "teacher_loss": 0.18323367834091187 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3104920983314514, "learning_rate": 4.834094238395343e-06, "loss": 0.2243, "step": 22209, "teacher_loss": 0.21474528312683105 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.2556689977645874, "learning_rate": 4.832424274179321e-06, "loss": 0.1917, "step": 22210, "teacher_loss": 0.18454432487487793 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.509351909160614, "learning_rate": 4.830754543075324e-06, "loss": 0.2191, "step": 22211, "teacher_loss": 0.18682368099689484 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.732737123966217, "learning_rate": 4.829085045121636e-06, "loss": 0.375, "step": 22212, "teacher_loss": 0.3352881669998169 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.3403262495994568, "learning_rate": 4.827415780356539e-06, "loss": 0.2194, "step": 22213, "teacher_loss": 0.20597070455551147 }, { "compression_loss": 0.0, "epoch": 4.01, "label_loss": 0.32109522819519043, "learning_rate": 4.825746748818293e-06, "loss": 0.4109, "step": 22214, "teacher_loss": 0.4208357334136963 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.43766844272613525, "learning_rate": 4.824077950545171e-06, "loss": 0.2099, "step": 22215, "teacher_loss": 0.18455472588539124 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.3821280002593994, "learning_rate": 4.822409385575436e-06, "loss": 0.2337, "step": 22216, "teacher_loss": 0.21716630458831787 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.6813108325004578, "learning_rate": 4.820741053947337e-06, "loss": 0.2415, "step": 22217, "teacher_loss": 0.1926506608724594 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 1.1136844158172607, "learning_rate": 4.819072955699131e-06, "loss": 0.7076, "step": 22218, "teacher_loss": 0.6625146865844727 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.44034433364868164, "learning_rate": 4.817405090869053e-06, "loss": 0.2625, "step": 22219, "teacher_loss": 0.2427157163619995 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.3190616965293884, "learning_rate": 4.815737459495353e-06, "loss": 0.2022, "step": 22220, "teacher_loss": 0.18924956023693085 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.14926780760288239, "learning_rate": 4.814070061616253e-06, "loss": 0.1387, "step": 22221, "teacher_loss": 0.13756847381591797 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.328657329082489, "learning_rate": 4.812402897269988e-06, "loss": 0.2141, "step": 22222, "teacher_loss": 0.20142537355422974 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.39032572507858276, "learning_rate": 4.810735966494784e-06, "loss": 0.1925, "step": 22223, "teacher_loss": 0.1705411672592163 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.29390084743499756, "learning_rate": 4.809069269328851e-06, "loss": 0.1994, "step": 22224, "teacher_loss": 0.1888597458600998 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.5845149755477905, "learning_rate": 4.807402805810407e-06, "loss": 0.2422, "step": 22225, "teacher_loss": 0.20420344173908234 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.8635436296463013, "learning_rate": 4.805736575977661e-06, "loss": 0.2249, "step": 22226, "teacher_loss": 0.15398788452148438 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.14236120879650116, "learning_rate": 4.804070579868808e-06, "loss": 0.1922, "step": 22227, "teacher_loss": 0.1976958066225052 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.37992843985557556, "learning_rate": 4.802404817522047e-06, "loss": 0.2011, "step": 22228, "teacher_loss": 0.18126875162124634 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.7497575283050537, "learning_rate": 4.800739288975575e-06, "loss": 0.2285, "step": 22229, "teacher_loss": 0.17053581774234772 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.4402177929878235, "learning_rate": 4.799073994267571e-06, "loss": 0.3145, "step": 22230, "teacher_loss": 0.30058297514915466 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.177322119474411, "learning_rate": 4.797408933436207e-06, "loss": 0.1582, "step": 22231, "teacher_loss": 0.15603359043598175 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.22170838713645935, "learning_rate": 4.795744106519678e-06, "loss": 0.1536, "step": 22232, "teacher_loss": 0.1460103988647461 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.7143243551254272, "learning_rate": 4.794079513556141e-06, "loss": 0.3014, "step": 22233, "teacher_loss": 0.2555413842201233 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.5361828804016113, "learning_rate": 4.792415154583753e-06, "loss": 0.2426, "step": 22234, "teacher_loss": 0.20994088053703308 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.2834022045135498, "learning_rate": 4.79075102964069e-06, "loss": 0.144, "step": 22235, "teacher_loss": 0.1284627616405487 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 1.0938578844070435, "learning_rate": 4.789087138765099e-06, "loss": 0.2609, "step": 22236, "teacher_loss": 0.16839095950126648 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.30538052320480347, "learning_rate": 4.78742348199512e-06, "loss": 0.2016, "step": 22237, "teacher_loss": 0.19008949398994446 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.8319786190986633, "learning_rate": 4.785760059368902e-06, "loss": 0.3755, "step": 22238, "teacher_loss": 0.32474032044410706 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.36674243211746216, "learning_rate": 4.784096870924586e-06, "loss": 0.1629, "step": 22239, "teacher_loss": 0.1402583122253418 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.7657642364501953, "learning_rate": 4.7824339167002954e-06, "loss": 0.3325, "step": 22240, "teacher_loss": 0.28433090448379517 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.13501110672950745, "learning_rate": 4.7807711967341626e-06, "loss": 0.1691, "step": 22241, "teacher_loss": 0.17286773025989532 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.32190483808517456, "learning_rate": 4.779108711064311e-06, "loss": 0.2231, "step": 22242, "teacher_loss": 0.21207579970359802 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.3867034316062927, "learning_rate": 4.7774464597288535e-06, "loss": 0.1813, "step": 22243, "teacher_loss": 0.15851646661758423 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.26163506507873535, "learning_rate": 4.77578444276589e-06, "loss": 0.2059, "step": 22244, "teacher_loss": 0.19967812299728394 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 1.0899204015731812, "learning_rate": 4.774122660213543e-06, "loss": 0.3464, "step": 22245, "teacher_loss": 0.2637593746185303 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.2081674039363861, "learning_rate": 4.772461112109908e-06, "loss": 0.1595, "step": 22246, "teacher_loss": 0.1541392207145691 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.42645394802093506, "learning_rate": 4.7707997984930685e-06, "loss": 0.2538, "step": 22247, "teacher_loss": 0.23458853363990784 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.2986205816268921, "learning_rate": 4.769138719401122e-06, "loss": 0.2081, "step": 22248, "teacher_loss": 0.19806770980358124 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.4007059633731842, "learning_rate": 4.7674778748721565e-06, "loss": 0.2158, "step": 22249, "teacher_loss": 0.1952553540468216 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.666324257850647, "learning_rate": 4.765817264944239e-06, "loss": 0.3621, "step": 22250, "teacher_loss": 0.32832300662994385 }, { "epoch": 4.02, "eval_exact_match": 80.6717123935667, "eval_f1": 87.92336774801645, "step": 22250 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.6306180953979492, "learning_rate": 4.764156889655449e-06, "loss": 0.2465, "step": 22251, "teacher_loss": 0.20385517179965973 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.2822083830833435, "learning_rate": 4.762496749043856e-06, "loss": 0.1333, "step": 22252, "teacher_loss": 0.1167861819267273 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.4688491225242615, "learning_rate": 4.760836843147515e-06, "loss": 0.2075, "step": 22253, "teacher_loss": 0.1784694939851761 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.5995774865150452, "learning_rate": 4.759177172004487e-06, "loss": 0.318, "step": 22254, "teacher_loss": 0.28672486543655396 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.24449220299720764, "learning_rate": 4.7575177356528255e-06, "loss": 0.1813, "step": 22255, "teacher_loss": 0.17430609464645386 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.6079674363136292, "learning_rate": 4.75585853413057e-06, "loss": 0.2481, "step": 22256, "teacher_loss": 0.20806127786636353 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.4405941963195801, "learning_rate": 4.754199567475766e-06, "loss": 0.307, "step": 22257, "teacher_loss": 0.2921597957611084 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.28500232100486755, "learning_rate": 4.752540835726452e-06, "loss": 0.2252, "step": 22258, "teacher_loss": 0.21851998567581177 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.3956871032714844, "learning_rate": 4.750882338920648e-06, "loss": 0.1748, "step": 22259, "teacher_loss": 0.1503017544746399 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.24632880091667175, "learning_rate": 4.749224077096388e-06, "loss": 0.2292, "step": 22260, "teacher_loss": 0.22730940580368042 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.9285036325454712, "learning_rate": 4.747566050291683e-06, "loss": 0.2933, "step": 22261, "teacher_loss": 0.22268739342689514 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.532642126083374, "learning_rate": 4.745908258544553e-06, "loss": 0.1827, "step": 22262, "teacher_loss": 0.14382728934288025 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.29160061478614807, "learning_rate": 4.744250701892999e-06, "loss": 0.231, "step": 22263, "teacher_loss": 0.2242424190044403 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.42184627056121826, "learning_rate": 4.742593380375028e-06, "loss": 0.2248, "step": 22264, "teacher_loss": 0.20291900634765625 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.4828150272369385, "learning_rate": 4.740936294028643e-06, "loss": 0.2277, "step": 22265, "teacher_loss": 0.1993836760520935 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.6507429480552673, "learning_rate": 4.739279442891826e-06, "loss": 0.2558, "step": 22266, "teacher_loss": 0.21187695860862732 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.48997190594673157, "learning_rate": 4.737622827002567e-06, "loss": 0.199, "step": 22267, "teacher_loss": 0.16669908165931702 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.6850756406784058, "learning_rate": 4.735966446398854e-06, "loss": 0.2921, "step": 22268, "teacher_loss": 0.24838471412658691 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.2358751744031906, "learning_rate": 4.734310301118652e-06, "loss": 0.1663, "step": 22269, "teacher_loss": 0.15861360728740692 }, { "compression_loss": 0.0, "epoch": 4.02, "label_loss": 0.9026320576667786, "learning_rate": 4.732654391199941e-06, "loss": 0.3017, "step": 22270, "teacher_loss": 0.23489803075790405 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.597624659538269, "learning_rate": 4.7309987166806775e-06, "loss": 0.2318, "step": 22271, "teacher_loss": 0.1911410391330719 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.2670944929122925, "learning_rate": 4.729343277598825e-06, "loss": 0.1829, "step": 22272, "teacher_loss": 0.17360037565231323 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5493311882019043, "learning_rate": 4.727688073992344e-06, "loss": 0.2722, "step": 22273, "teacher_loss": 0.2413557469844818 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.23454922437667847, "learning_rate": 4.7260331058991715e-06, "loss": 0.1652, "step": 22274, "teacher_loss": 0.15748630464076996 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.3663978576660156, "learning_rate": 4.724378373357257e-06, "loss": 0.2219, "step": 22275, "teacher_loss": 0.20589905977249146 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.3661792576313019, "learning_rate": 4.722723876404544e-06, "loss": 0.1709, "step": 22276, "teacher_loss": 0.14922773838043213 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.23750965297222137, "learning_rate": 4.7210696150789545e-06, "loss": 0.1558, "step": 22277, "teacher_loss": 0.14669647812843323 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5710964798927307, "learning_rate": 4.7194155894184205e-06, "loss": 0.2208, "step": 22278, "teacher_loss": 0.18187859654426575 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5654985308647156, "learning_rate": 4.71776179946087e-06, "loss": 0.22, "step": 22279, "teacher_loss": 0.181605726480484 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.7033708095550537, "learning_rate": 4.716108245244214e-06, "loss": 0.2089, "step": 22280, "teacher_loss": 0.1539667695760727 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.1005990132689476, "learning_rate": 4.714454926806356e-06, "loss": 0.1647, "step": 22281, "teacher_loss": 0.17187516391277313 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.2601490020751953, "learning_rate": 4.712801844185211e-06, "loss": 0.1487, "step": 22282, "teacher_loss": 0.1363557130098343 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.2546847462654114, "learning_rate": 4.71114899741868e-06, "loss": 0.1441, "step": 22283, "teacher_loss": 0.13180966675281525 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.7071696519851685, "learning_rate": 4.709496386544653e-06, "loss": 0.2601, "step": 22284, "teacher_loss": 0.2104508876800537 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.24335068464279175, "learning_rate": 4.707844011601019e-06, "loss": 0.2214, "step": 22285, "teacher_loss": 0.21894752979278564 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.3953036069869995, "learning_rate": 4.7061918726256695e-06, "loss": 0.1994, "step": 22286, "teacher_loss": 0.17759236693382263 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.3916807174682617, "learning_rate": 4.704539969656474e-06, "loss": 0.2451, "step": 22287, "teacher_loss": 0.22876980900764465 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.9959553480148315, "learning_rate": 4.702888302731309e-06, "loss": 0.2062, "step": 22288, "teacher_loss": 0.11843357235193253 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.6659566760063171, "learning_rate": 4.7012368718880476e-06, "loss": 0.2842, "step": 22289, "teacher_loss": 0.24179702997207642 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.4169069826602936, "learning_rate": 4.699585677164543e-06, "loss": 0.2456, "step": 22290, "teacher_loss": 0.2265946865081787 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5631296038627625, "learning_rate": 4.697934718598656e-06, "loss": 0.2342, "step": 22291, "teacher_loss": 0.1976226270198822 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.2258257269859314, "learning_rate": 4.696283996228243e-06, "loss": 0.1927, "step": 22292, "teacher_loss": 0.18901114165782928 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.27129799127578735, "learning_rate": 4.694633510091149e-06, "loss": 0.2032, "step": 22293, "teacher_loss": 0.19562718272209167 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.34878307580947876, "learning_rate": 4.692983260225199e-06, "loss": 0.173, "step": 22294, "teacher_loss": 0.1535157561302185 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.813422679901123, "learning_rate": 4.6913332466682505e-06, "loss": 0.2603, "step": 22295, "teacher_loss": 0.19889700412750244 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.2348555624485016, "learning_rate": 4.689683469458124e-06, "loss": 0.1304, "step": 22296, "teacher_loss": 0.11883604526519775 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.2653898298740387, "learning_rate": 4.688033928632639e-06, "loss": 0.1923, "step": 22297, "teacher_loss": 0.1842237412929535 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.4472760260105133, "learning_rate": 4.6863846242296195e-06, "loss": 0.2418, "step": 22298, "teacher_loss": 0.2189258486032486 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5030593872070312, "learning_rate": 4.684735556286883e-06, "loss": 0.2077, "step": 22299, "teacher_loss": 0.17487642168998718 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5020591020584106, "learning_rate": 4.683086724842228e-06, "loss": 0.2611, "step": 22300, "teacher_loss": 0.23430484533309937 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.23359909653663635, "learning_rate": 4.681438129933464e-06, "loss": 0.1791, "step": 22301, "teacher_loss": 0.17300131916999817 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.1912221610546112, "learning_rate": 4.679789771598392e-06, "loss": 0.1693, "step": 22302, "teacher_loss": 0.16685792803764343 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5604729652404785, "learning_rate": 4.678141649874798e-06, "loss": 0.2874, "step": 22303, "teacher_loss": 0.2570732533931732 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.47085297107696533, "learning_rate": 4.6764937648004595e-06, "loss": 0.3578, "step": 22304, "teacher_loss": 0.34526288509368896 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.39772528409957886, "learning_rate": 4.674846116413178e-06, "loss": 0.1949, "step": 22305, "teacher_loss": 0.17238157987594604 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5819420218467712, "learning_rate": 4.6731987047507185e-06, "loss": 0.3031, "step": 22306, "teacher_loss": 0.2721473276615143 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.9132729768753052, "learning_rate": 4.671551529850841e-06, "loss": 0.2894, "step": 22307, "teacher_loss": 0.22010083496570587 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.6451421976089478, "learning_rate": 4.669904591751332e-06, "loss": 0.2489, "step": 22308, "teacher_loss": 0.20490939915180206 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.43649476766586304, "learning_rate": 4.668257890489937e-06, "loss": 0.2071, "step": 22309, "teacher_loss": 0.18164560198783875 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.6438288688659668, "learning_rate": 4.666611426104409e-06, "loss": 0.2401, "step": 22310, "teacher_loss": 0.19519805908203125 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.299312949180603, "learning_rate": 4.6649651986325e-06, "loss": 0.1781, "step": 22311, "teacher_loss": 0.1645796149969101 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.6446738243103027, "learning_rate": 4.6633192081119575e-06, "loss": 0.3151, "step": 22312, "teacher_loss": 0.2784823775291443 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.27618521451950073, "learning_rate": 4.66167345458051e-06, "loss": 0.2094, "step": 22313, "teacher_loss": 0.20192891359329224 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5535421967506409, "learning_rate": 4.660027938075894e-06, "loss": 0.257, "step": 22314, "teacher_loss": 0.22406136989593506 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5050574541091919, "learning_rate": 4.658382658635841e-06, "loss": 0.2413, "step": 22315, "teacher_loss": 0.2119983732700348 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.5441836714744568, "learning_rate": 4.656737616298065e-06, "loss": 0.2736, "step": 22316, "teacher_loss": 0.24349018931388855 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.35144421458244324, "learning_rate": 4.655092811100282e-06, "loss": 0.149, "step": 22317, "teacher_loss": 0.12652841210365295 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.29485535621643066, "learning_rate": 4.653448243080212e-06, "loss": 0.2767, "step": 22318, "teacher_loss": 0.27469971776008606 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.3731991648674011, "learning_rate": 4.651803912275548e-06, "loss": 0.2161, "step": 22319, "teacher_loss": 0.19862470030784607 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.26870012283325195, "learning_rate": 4.650159818723999e-06, "loss": 0.1644, "step": 22320, "teacher_loss": 0.15275856852531433 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.48765650391578674, "learning_rate": 4.6485159624632505e-06, "loss": 0.1925, "step": 22321, "teacher_loss": 0.15975692868232727 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.4498612582683563, "learning_rate": 4.646872343530999e-06, "loss": 0.2502, "step": 22322, "teacher_loss": 0.22801800072193146 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.4751649498939514, "learning_rate": 4.6452289619649225e-06, "loss": 0.2234, "step": 22323, "teacher_loss": 0.1954212635755539 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.25882992148399353, "learning_rate": 4.643585817802697e-06, "loss": 0.1785, "step": 22324, "teacher_loss": 0.16959968209266663 }, { "compression_loss": 0.0, "epoch": 4.03, "label_loss": 0.33566170930862427, "learning_rate": 4.641942911082007e-06, "loss": 0.2097, "step": 22325, "teacher_loss": 0.1956641972064972 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.2695721387863159, "learning_rate": 4.640300241840505e-06, "loss": 0.1841, "step": 22326, "teacher_loss": 0.17460983991622925 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.7575740218162537, "learning_rate": 4.638657810115857e-06, "loss": 0.2833, "step": 22327, "teacher_loss": 0.23062311112880707 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.4344967007637024, "learning_rate": 4.637015615945727e-06, "loss": 0.2398, "step": 22328, "teacher_loss": 0.2182224988937378 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.21146127581596375, "learning_rate": 4.635373659367753e-06, "loss": 0.1756, "step": 22329, "teacher_loss": 0.17163875699043274 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.3587035536766052, "learning_rate": 4.633731940419592e-06, "loss": 0.1914, "step": 22330, "teacher_loss": 0.172776460647583 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.3692079782485962, "learning_rate": 4.632090459138872e-06, "loss": 0.211, "step": 22331, "teacher_loss": 0.1934436708688736 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.9100313186645508, "learning_rate": 4.6304492155632355e-06, "loss": 0.3771, "step": 22332, "teacher_loss": 0.3178538382053375 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.47658199071884155, "learning_rate": 4.628808209730311e-06, "loss": 0.1917, "step": 22333, "teacher_loss": 0.16002070903778076 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.4166563153266907, "learning_rate": 4.6271674416777164e-06, "loss": 0.2006, "step": 22334, "teacher_loss": 0.17662595212459564 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.4648454785346985, "learning_rate": 4.625526911443073e-06, "loss": 0.2122, "step": 22335, "teacher_loss": 0.18418218195438385 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.18360412120819092, "learning_rate": 4.623886619063997e-06, "loss": 0.1488, "step": 22336, "teacher_loss": 0.1448913961648941 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.23698677122592926, "learning_rate": 4.6222465645780885e-06, "loss": 0.1554, "step": 22337, "teacher_loss": 0.1463409960269928 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.6238726377487183, "learning_rate": 4.620606748022952e-06, "loss": 0.3252, "step": 22338, "teacher_loss": 0.2919802665710449 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.05942612513899803, "learning_rate": 4.618967169436187e-06, "loss": 0.1228, "step": 22339, "teacher_loss": 0.12981215119361877 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.4003221392631531, "learning_rate": 4.617327828855377e-06, "loss": 0.178, "step": 22340, "teacher_loss": 0.15329578518867493 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.2753027081489563, "learning_rate": 4.615688726318111e-06, "loss": 0.2223, "step": 22341, "teacher_loss": 0.2163984477519989 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.7132440805435181, "learning_rate": 4.614049861861974e-06, "loss": 0.3096, "step": 22342, "teacher_loss": 0.2647353410720825 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.34624040126800537, "learning_rate": 4.612411235524533e-06, "loss": 0.1798, "step": 22343, "teacher_loss": 0.1613403856754303 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.41245031356811523, "learning_rate": 4.610772847343358e-06, "loss": 0.2347, "step": 22344, "teacher_loss": 0.21495160460472107 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.1924239695072174, "learning_rate": 4.609134697356009e-06, "loss": 0.1571, "step": 22345, "teacher_loss": 0.15320059657096863 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.7938008308410645, "learning_rate": 4.607496785600054e-06, "loss": 0.2877, "step": 22346, "teacher_loss": 0.23149684071540833 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.8822792768478394, "learning_rate": 4.605859112113036e-06, "loss": 0.3379, "step": 22347, "teacher_loss": 0.2774544358253479 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.7164634466171265, "learning_rate": 4.604221676932503e-06, "loss": 0.2591, "step": 22348, "teacher_loss": 0.20822957158088684 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.4802509546279907, "learning_rate": 4.602584480096005e-06, "loss": 0.1825, "step": 22349, "teacher_loss": 0.14944472908973694 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.7466652989387512, "learning_rate": 4.600947521641066e-06, "loss": 0.2406, "step": 22350, "teacher_loss": 0.18442079424858093 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.929283618927002, "learning_rate": 4.5993108016052235e-06, "loss": 0.3235, "step": 22351, "teacher_loss": 0.25622570514678955 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.6867748498916626, "learning_rate": 4.597674320026006e-06, "loss": 0.4083, "step": 22352, "teacher_loss": 0.37731873989105225 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.4594021439552307, "learning_rate": 4.5960380769409284e-06, "loss": 0.2243, "step": 22353, "teacher_loss": 0.19815891981124878 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.25068002939224243, "learning_rate": 4.594402072387497e-06, "loss": 0.1697, "step": 22354, "teacher_loss": 0.16070103645324707 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.43247419595718384, "learning_rate": 4.592766306403235e-06, "loss": 0.2135, "step": 22355, "teacher_loss": 0.18922486901283264 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.3742496371269226, "learning_rate": 4.591130779025641e-06, "loss": 0.1763, "step": 22356, "teacher_loss": 0.15431681275367737 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.5696107149124146, "learning_rate": 4.5894954902922e-06, "loss": 0.2285, "step": 22357, "teacher_loss": 0.19062277674674988 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.20192936062812805, "learning_rate": 4.5878604402404254e-06, "loss": 0.2141, "step": 22358, "teacher_loss": 0.21547850966453552 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.6407074332237244, "learning_rate": 4.586225628907794e-06, "loss": 0.2304, "step": 22359, "teacher_loss": 0.18479309976100922 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.4709963798522949, "learning_rate": 4.584591056331783e-06, "loss": 0.1995, "step": 22360, "teacher_loss": 0.1693221926689148 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.3023056387901306, "learning_rate": 4.5829567225498696e-06, "loss": 0.1765, "step": 22361, "teacher_loss": 0.16253016889095306 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.4718725085258484, "learning_rate": 4.581322627599533e-06, "loss": 0.2145, "step": 22362, "teacher_loss": 0.18591247498989105 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.45685333013534546, "learning_rate": 4.579688771518227e-06, "loss": 0.2357, "step": 22363, "teacher_loss": 0.2111477553844452 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.4295060634613037, "learning_rate": 4.578055154343414e-06, "loss": 0.253, "step": 22364, "teacher_loss": 0.2333919107913971 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.34789755940437317, "learning_rate": 4.576421776112556e-06, "loss": 0.223, "step": 22365, "teacher_loss": 0.20916923880577087 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.29522159695625305, "learning_rate": 4.574788636863097e-06, "loss": 0.2359, "step": 22366, "teacher_loss": 0.22930538654327393 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.23254722356796265, "learning_rate": 4.573155736632466e-06, "loss": 0.2029, "step": 22367, "teacher_loss": 0.1996101438999176 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.3475077152252197, "learning_rate": 4.571523075458122e-06, "loss": 0.1533, "step": 22368, "teacher_loss": 0.13172005116939545 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.5157480835914612, "learning_rate": 4.569890653377488e-06, "loss": 0.2094, "step": 22369, "teacher_loss": 0.17536842823028564 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.17539718747138977, "learning_rate": 4.568258470427987e-06, "loss": 0.1548, "step": 22370, "teacher_loss": 0.15246671438217163 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.5434921383857727, "learning_rate": 4.566626526647041e-06, "loss": 0.2155, "step": 22371, "teacher_loss": 0.1790551245212555 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.058663271367549896, "learning_rate": 4.564994822072075e-06, "loss": 0.1603, "step": 22372, "teacher_loss": 0.1715439110994339 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.28227531909942627, "learning_rate": 4.563363356740486e-06, "loss": 0.135, "step": 22373, "teacher_loss": 0.11862976849079132 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.4497913420200348, "learning_rate": 4.561732130689684e-06, "loss": 0.2169, "step": 22374, "teacher_loss": 0.19096790254116058 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.06890691816806793, "learning_rate": 4.5601011439570754e-06, "loss": 0.1632, "step": 22375, "teacher_loss": 0.17366473376750946 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.41623252630233765, "learning_rate": 4.558470396580041e-06, "loss": 0.222, "step": 22376, "teacher_loss": 0.20041929185390472 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.32481563091278076, "learning_rate": 4.556839888595974e-06, "loss": 0.2185, "step": 22377, "teacher_loss": 0.20673654973506927 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.6112340092658997, "learning_rate": 4.555209620042266e-06, "loss": 0.2153, "step": 22378, "teacher_loss": 0.17132142186164856 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.558569073677063, "learning_rate": 4.553579590956279e-06, "loss": 0.2643, "step": 22379, "teacher_loss": 0.23162616789340973 }, { "compression_loss": 0.0, "epoch": 4.04, "label_loss": 0.7636164426803589, "learning_rate": 4.5519498013753916e-06, "loss": 0.2577, "step": 22380, "teacher_loss": 0.20148514211177826 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.5127445459365845, "learning_rate": 4.550320251336976e-06, "loss": 0.2081, "step": 22381, "teacher_loss": 0.1742207109928131 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.30378487706184387, "learning_rate": 4.548690940878384e-06, "loss": 0.1539, "step": 22382, "teacher_loss": 0.13729184865951538 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.4069824814796448, "learning_rate": 4.5470618700369755e-06, "loss": 0.2329, "step": 22383, "teacher_loss": 0.2135481834411621 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.6297832727432251, "learning_rate": 4.545433038850098e-06, "loss": 0.2829, "step": 22384, "teacher_loss": 0.24433034658432007 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.24492287635803223, "learning_rate": 4.543804447355098e-06, "loss": 0.1612, "step": 22385, "teacher_loss": 0.1518622636795044 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.24344030022621155, "learning_rate": 4.54217609558931e-06, "loss": 0.1868, "step": 22386, "teacher_loss": 0.18047180771827698 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.4224638342857361, "learning_rate": 4.5405479835900695e-06, "loss": 0.1933, "step": 22387, "teacher_loss": 0.16779302060604095 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 1.02317476272583, "learning_rate": 4.5389201113947085e-06, "loss": 0.2714, "step": 22388, "teacher_loss": 0.18782779574394226 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.5103275775909424, "learning_rate": 4.537292479040542e-06, "loss": 0.1731, "step": 22389, "teacher_loss": 0.1356210708618164 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.40686067938804626, "learning_rate": 4.535665086564888e-06, "loss": 0.1667, "step": 22390, "teacher_loss": 0.13997262716293335 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.4288334846496582, "learning_rate": 4.534037934005066e-06, "loss": 0.1891, "step": 22391, "teacher_loss": 0.16248804330825806 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.4488275945186615, "learning_rate": 4.532411021398371e-06, "loss": 0.2532, "step": 22392, "teacher_loss": 0.23149588704109192 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.3356611430644989, "learning_rate": 4.530784348782112e-06, "loss": 0.2862, "step": 22393, "teacher_loss": 0.2807316780090332 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.38262325525283813, "learning_rate": 4.529157916193576e-06, "loss": 0.2096, "step": 22394, "teacher_loss": 0.19040557742118835 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.2759501039981842, "learning_rate": 4.527531723670055e-06, "loss": 0.1605, "step": 22395, "teacher_loss": 0.1477038562297821 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.9571385383605957, "learning_rate": 4.525905771248838e-06, "loss": 0.3184, "step": 22396, "teacher_loss": 0.24738846719264984 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.5152538418769836, "learning_rate": 4.524280058967193e-06, "loss": 0.1901, "step": 22397, "teacher_loss": 0.15394222736358643 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.18781855702400208, "learning_rate": 4.522654586862401e-06, "loss": 0.2048, "step": 22398, "teacher_loss": 0.2067258656024933 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.3841584324836731, "learning_rate": 4.5210293549717306e-06, "loss": 0.2217, "step": 22399, "teacher_loss": 0.20366591215133667 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.48990312218666077, "learning_rate": 4.5194043633324335e-06, "loss": 0.3433, "step": 22400, "teacher_loss": 0.3269987106323242 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.24159112572669983, "learning_rate": 4.517779611981772e-06, "loss": 0.1774, "step": 22401, "teacher_loss": 0.1702936887741089 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.5890333652496338, "learning_rate": 4.516155100957002e-06, "loss": 0.2186, "step": 22402, "teacher_loss": 0.17747971415519714 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.21373350918293, "learning_rate": 4.514530830295365e-06, "loss": 0.1832, "step": 22403, "teacher_loss": 0.17978642880916595 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.44638463854789734, "learning_rate": 4.512906800034086e-06, "loss": 0.2494, "step": 22404, "teacher_loss": 0.22746042907238007 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.6371122598648071, "learning_rate": 4.511283010210423e-06, "loss": 0.2331, "step": 22405, "teacher_loss": 0.18825319409370422 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.4463879466056824, "learning_rate": 4.509659460861595e-06, "loss": 0.2554, "step": 22406, "teacher_loss": 0.23412543535232544 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.7087075710296631, "learning_rate": 4.508036152024819e-06, "loss": 0.256, "step": 22407, "teacher_loss": 0.20572713017463684 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.5115741491317749, "learning_rate": 4.506413083737317e-06, "loss": 0.2166, "step": 22408, "teacher_loss": 0.1837860345840454 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.38696935772895813, "learning_rate": 4.5047902560363045e-06, "loss": 0.2245, "step": 22409, "teacher_loss": 0.20639248192310333 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.31366080045700073, "learning_rate": 4.503167668958982e-06, "loss": 0.1717, "step": 22410, "teacher_loss": 0.1559767872095108 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.24255573749542236, "learning_rate": 4.501545322542555e-06, "loss": 0.1482, "step": 22411, "teacher_loss": 0.137699156999588 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.4071047306060791, "learning_rate": 4.49992321682422e-06, "loss": 0.2131, "step": 22412, "teacher_loss": 0.19159506261348724 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.46025973558425903, "learning_rate": 4.498301351841162e-06, "loss": 0.2592, "step": 22413, "teacher_loss": 0.2368142306804657 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.18871013820171356, "learning_rate": 4.496679727630568e-06, "loss": 0.1944, "step": 22414, "teacher_loss": 0.19505611062049866 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.553680419921875, "learning_rate": 4.4950583442296205e-06, "loss": 0.2328, "step": 22415, "teacher_loss": 0.19713759422302246 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.39101850986480713, "learning_rate": 4.493437201675491e-06, "loss": 0.2475, "step": 22416, "teacher_loss": 0.23158606886863708 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.4335443675518036, "learning_rate": 4.491816300005336e-06, "loss": 0.2062, "step": 22417, "teacher_loss": 0.18089798092842102 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.5334718823432922, "learning_rate": 4.490195639256339e-06, "loss": 0.1998, "step": 22418, "teacher_loss": 0.1627301722764969 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.3568463623523712, "learning_rate": 4.488575219465645e-06, "loss": 0.1916, "step": 22419, "teacher_loss": 0.17322498559951782 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.8238714933395386, "learning_rate": 4.486955040670401e-06, "loss": 0.2953, "step": 22420, "teacher_loss": 0.23651883006095886 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.18777135014533997, "learning_rate": 4.485335102907758e-06, "loss": 0.1873, "step": 22421, "teacher_loss": 0.18730252981185913 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.20914584398269653, "learning_rate": 4.483715406214863e-06, "loss": 0.2025, "step": 22422, "teacher_loss": 0.20180365443229675 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.4354602098464966, "learning_rate": 4.482095950628839e-06, "loss": 0.2431, "step": 22423, "teacher_loss": 0.22169464826583862 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.35351261496543884, "learning_rate": 4.480476736186819e-06, "loss": 0.3197, "step": 22424, "teacher_loss": 0.31592828035354614 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.34121522307395935, "learning_rate": 4.478857762925934e-06, "loss": 0.1656, "step": 22425, "teacher_loss": 0.14605233073234558 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.303475558757782, "learning_rate": 4.4772390308832966e-06, "loss": 0.1665, "step": 22426, "teacher_loss": 0.15126320719718933 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.5880388021469116, "learning_rate": 4.47562054009601e-06, "loss": 0.2808, "step": 22427, "teacher_loss": 0.2466752678155899 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.21170049905776978, "learning_rate": 4.474002290601201e-06, "loss": 0.1456, "step": 22428, "teacher_loss": 0.13823555409908295 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.6346510648727417, "learning_rate": 4.4723842824359595e-06, "loss": 0.3503, "step": 22429, "teacher_loss": 0.31865566968917847 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.2442774623632431, "learning_rate": 4.470766515637373e-06, "loss": 0.1453, "step": 22430, "teacher_loss": 0.13434451818466187 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.40366122126579285, "learning_rate": 4.469148990242552e-06, "loss": 0.2205, "step": 22431, "teacher_loss": 0.20015056431293488 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.42039209604263306, "learning_rate": 4.467531706288573e-06, "loss": 0.2402, "step": 22432, "teacher_loss": 0.22019259631633759 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.7823781967163086, "learning_rate": 4.465914663812508e-06, "loss": 0.2699, "step": 22433, "teacher_loss": 0.212961345911026 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.5453091859817505, "learning_rate": 4.4642978628514365e-06, "loss": 0.2198, "step": 22434, "teacher_loss": 0.18360097706317902 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.4284927248954773, "learning_rate": 4.4626813034424325e-06, "loss": 0.2054, "step": 22435, "teacher_loss": 0.18064826726913452 }, { "compression_loss": 0.0, "epoch": 4.05, "label_loss": 0.451980859041214, "learning_rate": 4.46106498562255e-06, "loss": 0.1932, "step": 22436, "teacher_loss": 0.1644885241985321 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.22469471395015717, "learning_rate": 4.459448909428848e-06, "loss": 0.2062, "step": 22437, "teacher_loss": 0.20416519045829773 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.13847118616104126, "learning_rate": 4.457833074898386e-06, "loss": 0.1461, "step": 22438, "teacher_loss": 0.14699944853782654 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.3379843235015869, "learning_rate": 4.4562174820682e-06, "loss": 0.1598, "step": 22439, "teacher_loss": 0.1399690955877304 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.5982440710067749, "learning_rate": 4.454602130975336e-06, "loss": 0.2262, "step": 22440, "teacher_loss": 0.18484535813331604 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.5035433769226074, "learning_rate": 4.452987021656832e-06, "loss": 0.1938, "step": 22441, "teacher_loss": 0.15940451622009277 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.25582355260849, "learning_rate": 4.4513721541497095e-06, "loss": 0.1827, "step": 22442, "teacher_loss": 0.17462529242038727 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.26584821939468384, "learning_rate": 4.4497575284910015e-06, "loss": 0.1533, "step": 22443, "teacher_loss": 0.1407933384180069 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.7291821241378784, "learning_rate": 4.448143144717719e-06, "loss": 0.3652, "step": 22444, "teacher_loss": 0.3247072100639343 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.35927897691726685, "learning_rate": 4.446529002866877e-06, "loss": 0.2322, "step": 22445, "teacher_loss": 0.21805424988269806 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.7203177809715271, "learning_rate": 4.444915102975488e-06, "loss": 0.245, "step": 22446, "teacher_loss": 0.19219619035720825 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.5104303359985352, "learning_rate": 4.4433014450805456e-06, "loss": 0.2419, "step": 22447, "teacher_loss": 0.21200843155384064 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.3425580859184265, "learning_rate": 4.441688029219055e-06, "loss": 0.1919, "step": 22448, "teacher_loss": 0.1751413643360138 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.8826892971992493, "learning_rate": 4.440074855427998e-06, "loss": 0.3009, "step": 22449, "teacher_loss": 0.23625552654266357 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.20078477263450623, "learning_rate": 4.438461923744364e-06, "loss": 0.1413, "step": 22450, "teacher_loss": 0.13474193215370178 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.34865784645080566, "learning_rate": 4.43684923420514e-06, "loss": 0.1958, "step": 22451, "teacher_loss": 0.17877960205078125 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.33222702145576477, "learning_rate": 4.435236786847284e-06, "loss": 0.2061, "step": 22452, "teacher_loss": 0.19213572144508362 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.5211025476455688, "learning_rate": 4.433624581707781e-06, "loss": 0.2224, "step": 22453, "teacher_loss": 0.1892303079366684 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.8634397983551025, "learning_rate": 4.432012618823583e-06, "loss": 0.2921, "step": 22454, "teacher_loss": 0.22860601544380188 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.44478845596313477, "learning_rate": 4.430400898231649e-06, "loss": 0.2615, "step": 22455, "teacher_loss": 0.24108169972896576 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.35409748554229736, "learning_rate": 4.428789419968939e-06, "loss": 0.2423, "step": 22456, "teacher_loss": 0.2298848032951355 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.9793566465377808, "learning_rate": 4.427178184072389e-06, "loss": 0.2756, "step": 22457, "teacher_loss": 0.197415292263031 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.3693322539329529, "learning_rate": 4.425567190578943e-06, "loss": 0.2036, "step": 22458, "teacher_loss": 0.18515688180923462 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.7170186042785645, "learning_rate": 4.423956439525544e-06, "loss": 0.327, "step": 22459, "teacher_loss": 0.2836138606071472 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.4539327621459961, "learning_rate": 4.422345930949108e-06, "loss": 0.2453, "step": 22460, "teacher_loss": 0.22208912670612335 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.8809143900871277, "learning_rate": 4.420735664886568e-06, "loss": 0.3518, "step": 22461, "teacher_loss": 0.29303354024887085 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.43342316150665283, "learning_rate": 4.419125641374845e-06, "loss": 0.2345, "step": 22462, "teacher_loss": 0.2124405801296234 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.48221659660339355, "learning_rate": 4.417515860450844e-06, "loss": 0.2727, "step": 22463, "teacher_loss": 0.24941229820251465 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.35635969042778015, "learning_rate": 4.415906322151477e-06, "loss": 0.1794, "step": 22464, "teacher_loss": 0.15978941321372986 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.20930947363376617, "learning_rate": 4.414297026513649e-06, "loss": 0.2111, "step": 22465, "teacher_loss": 0.2113484889268875 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.4871370196342468, "learning_rate": 4.412687973574253e-06, "loss": 0.2214, "step": 22466, "teacher_loss": 0.191874697804451 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.29095304012298584, "learning_rate": 4.411079163370169e-06, "loss": 0.1658, "step": 22467, "teacher_loss": 0.15185675024986267 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.576290488243103, "learning_rate": 4.409470595938303e-06, "loss": 0.6552, "step": 22468, "teacher_loss": 0.6639301776885986 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.26170191168785095, "learning_rate": 4.407862271315524e-06, "loss": 0.1866, "step": 22469, "teacher_loss": 0.17822669446468353 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.41512882709503174, "learning_rate": 4.4062541895387005e-06, "loss": 0.1957, "step": 22470, "teacher_loss": 0.17126977443695068 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.36659109592437744, "learning_rate": 4.404646350644708e-06, "loss": 0.1714, "step": 22471, "teacher_loss": 0.1497318148612976 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.3778338134288788, "learning_rate": 4.403038754670413e-06, "loss": 0.2214, "step": 22472, "teacher_loss": 0.20400582253932953 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.5372523069381714, "learning_rate": 4.401431401652662e-06, "loss": 0.2176, "step": 22473, "teacher_loss": 0.18211859464645386 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.5265488624572754, "learning_rate": 4.3998242916283146e-06, "loss": 0.2109, "step": 22474, "teacher_loss": 0.1757970154285431 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.11866244673728943, "learning_rate": 4.398217424634219e-06, "loss": 0.1437, "step": 22475, "teacher_loss": 0.14645174145698547 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.4486837387084961, "learning_rate": 4.396610800707211e-06, "loss": 0.1912, "step": 22476, "teacher_loss": 0.1626073569059372 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.5628445744514465, "learning_rate": 4.395004419884118e-06, "loss": 0.2659, "step": 22477, "teacher_loss": 0.23292362689971924 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.2755957543849945, "learning_rate": 4.393398282201788e-06, "loss": 0.1653, "step": 22478, "teacher_loss": 0.15302352607250214 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.7415618300437927, "learning_rate": 4.391792387697035e-06, "loss": 0.2657, "step": 22479, "teacher_loss": 0.2128206491470337 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.19963206350803375, "learning_rate": 4.390186736406669e-06, "loss": 0.1791, "step": 22480, "teacher_loss": 0.17687368392944336 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.3069932460784912, "learning_rate": 4.388581328367519e-06, "loss": 0.2306, "step": 22481, "teacher_loss": 0.22214823961257935 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.8350578546524048, "learning_rate": 4.386976163616385e-06, "loss": 0.2873, "step": 22482, "teacher_loss": 0.2264130711555481 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.15924188494682312, "learning_rate": 4.385371242190064e-06, "loss": 0.1554, "step": 22483, "teacher_loss": 0.1549575924873352 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.40631574392318726, "learning_rate": 4.383766564125355e-06, "loss": 0.2564, "step": 22484, "teacher_loss": 0.23979106545448303 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.40063318610191345, "learning_rate": 4.382162129459055e-06, "loss": 0.2245, "step": 22485, "teacher_loss": 0.20490238070487976 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.4342784285545349, "learning_rate": 4.38055793822794e-06, "loss": 0.2351, "step": 22486, "teacher_loss": 0.21298450231552124 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.1803794503211975, "learning_rate": 4.3789539904687905e-06, "loss": 0.1539, "step": 22487, "teacher_loss": 0.1509999781847 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 1.0708751678466797, "learning_rate": 4.377350286218389e-06, "loss": 0.2389, "step": 22488, "teacher_loss": 0.14647985994815826 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.3389892280101776, "learning_rate": 4.375746825513496e-06, "loss": 0.2117, "step": 22489, "teacher_loss": 0.19751468300819397 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.5767058730125427, "learning_rate": 4.374143608390865e-06, "loss": 0.2403, "step": 22490, "teacher_loss": 0.20292508602142334 }, { "compression_loss": 0.0, "epoch": 4.06, "label_loss": 0.3074719309806824, "learning_rate": 4.3725406348872745e-06, "loss": 0.175, "step": 22491, "teacher_loss": 0.16029608249664307 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.37424999475479126, "learning_rate": 4.370937905039463e-06, "loss": 0.2025, "step": 22492, "teacher_loss": 0.18341854214668274 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.28178104758262634, "learning_rate": 4.369335418884173e-06, "loss": 0.1815, "step": 22493, "teacher_loss": 0.17039045691490173 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.7284677624702454, "learning_rate": 4.3677331764581506e-06, "loss": 0.3103, "step": 22494, "teacher_loss": 0.2638307511806488 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.5923912525177002, "learning_rate": 4.366131177798132e-06, "loss": 0.2393, "step": 22495, "teacher_loss": 0.20008601248264313 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.3565843403339386, "learning_rate": 4.3645294229408415e-06, "loss": 0.2369, "step": 22496, "teacher_loss": 0.2236430048942566 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.4684481918811798, "learning_rate": 4.362927911923003e-06, "loss": 0.204, "step": 22497, "teacher_loss": 0.17464913427829742 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.36763694882392883, "learning_rate": 4.361326644781341e-06, "loss": 0.1921, "step": 22498, "teacher_loss": 0.17262691259384155 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.1315077543258667, "learning_rate": 4.359725621552557e-06, "loss": 0.1695, "step": 22499, "teacher_loss": 0.17368349432945251 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.4267739951610565, "learning_rate": 4.358124842273364e-06, "loss": 0.2088, "step": 22500, "teacher_loss": 0.1845782995223999 }, { "epoch": 4.07, "eval_exact_match": 80.17029328287606, "eval_f1": 87.60082876314915, "step": 22500 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.42162588238716125, "learning_rate": 4.3565243069804665e-06, "loss": 0.1791, "step": 22501, "teacher_loss": 0.15219640731811523 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.7650765180587769, "learning_rate": 4.354924015710553e-06, "loss": 0.3852, "step": 22502, "teacher_loss": 0.3429529070854187 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.6458345651626587, "learning_rate": 4.353323968500314e-06, "loss": 0.2388, "step": 22503, "teacher_loss": 0.19356638193130493 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.31992587447166443, "learning_rate": 4.351724165386442e-06, "loss": 0.1969, "step": 22504, "teacher_loss": 0.18323460221290588 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.4559538662433624, "learning_rate": 4.350124606405604e-06, "loss": 0.1766, "step": 22505, "teacher_loss": 0.14556415379047394 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.20623818039894104, "learning_rate": 4.3485252915944845e-06, "loss": 0.2054, "step": 22506, "teacher_loss": 0.20532625913619995 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.19963952898979187, "learning_rate": 4.34692622098974e-06, "loss": 0.1554, "step": 22507, "teacher_loss": 0.15051355957984924 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.24256964027881622, "learning_rate": 4.3453273946280374e-06, "loss": 0.1465, "step": 22508, "teacher_loss": 0.13582547008991241 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.20550374686717987, "learning_rate": 4.3437288125460384e-06, "loss": 0.1488, "step": 22509, "teacher_loss": 0.14255420863628387 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.4193999469280243, "learning_rate": 4.342130474780385e-06, "loss": 0.2008, "step": 22510, "teacher_loss": 0.1764683723449707 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.3219105005264282, "learning_rate": 4.340532381367729e-06, "loss": 0.2, "step": 22511, "teacher_loss": 0.18639929592609406 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.3036709427833557, "learning_rate": 4.338934532344701e-06, "loss": 0.2203, "step": 22512, "teacher_loss": 0.2110208123922348 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.33838146924972534, "learning_rate": 4.337336927747943e-06, "loss": 0.209, "step": 22513, "teacher_loss": 0.19458970427513123 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.4568299949169159, "learning_rate": 4.335739567614085e-06, "loss": 0.1771, "step": 22514, "teacher_loss": 0.14599284529685974 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.567828893661499, "learning_rate": 4.3341424519797415e-06, "loss": 0.222, "step": 22515, "teacher_loss": 0.18355447053909302 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.4044817090034485, "learning_rate": 4.332545580881537e-06, "loss": 0.2063, "step": 22516, "teacher_loss": 0.18430379033088684 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.7734218239784241, "learning_rate": 4.330948954356076e-06, "loss": 0.274, "step": 22517, "teacher_loss": 0.21854552626609802 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.23295968770980835, "learning_rate": 4.329352572439968e-06, "loss": 0.1793, "step": 22518, "teacher_loss": 0.17337566614151 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.29805055260658264, "learning_rate": 4.3277564351698165e-06, "loss": 0.1934, "step": 22519, "teacher_loss": 0.18172572553157806 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.3570689558982849, "learning_rate": 4.32616054258221e-06, "loss": 0.237, "step": 22520, "teacher_loss": 0.22367411851882935 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.29694199562072754, "learning_rate": 4.32456489471374e-06, "loss": 0.2379, "step": 22521, "teacher_loss": 0.2313789576292038 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.6117671728134155, "learning_rate": 4.322969491600993e-06, "loss": 0.2182, "step": 22522, "teacher_loss": 0.17443028092384338 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.2922821640968323, "learning_rate": 4.321374333280541e-06, "loss": 0.2156, "step": 22523, "teacher_loss": 0.20710714161396027 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.11824120581150055, "learning_rate": 4.319779419788961e-06, "loss": 0.1526, "step": 22524, "teacher_loss": 0.15641391277313232 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.24226316809654236, "learning_rate": 4.318184751162821e-06, "loss": 0.1934, "step": 22525, "teacher_loss": 0.187983438372612 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.7134770750999451, "learning_rate": 4.316590327438678e-06, "loss": 0.2504, "step": 22526, "teacher_loss": 0.19889569282531738 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.4353092312812805, "learning_rate": 4.3149961486530795e-06, "loss": 0.2275, "step": 22527, "teacher_loss": 0.2044464498758316 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.9352982044219971, "learning_rate": 4.313402214842595e-06, "loss": 0.4688, "step": 22528, "teacher_loss": 0.4169725775718689 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.2527202367782593, "learning_rate": 4.311808526043756e-06, "loss": 0.1811, "step": 22529, "teacher_loss": 0.1731759011745453 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.4207102656364441, "learning_rate": 4.310215082293094e-06, "loss": 0.2288, "step": 22530, "teacher_loss": 0.20748502016067505 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.3170747756958008, "learning_rate": 4.30862188362716e-06, "loss": 0.1734, "step": 22531, "teacher_loss": 0.15739217400550842 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.17567959427833557, "learning_rate": 4.30702893008247e-06, "loss": 0.1681, "step": 22532, "teacher_loss": 0.1672121286392212 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.4852805733680725, "learning_rate": 4.305436221695545e-06, "loss": 0.253, "step": 22533, "teacher_loss": 0.2271641194820404 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.2636764347553253, "learning_rate": 4.303843758502902e-06, "loss": 0.218, "step": 22534, "teacher_loss": 0.2128811478614807 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.5491704940795898, "learning_rate": 4.302251540541059e-06, "loss": 0.3003, "step": 22535, "teacher_loss": 0.27269434928894043 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.13952423632144928, "learning_rate": 4.300659567846509e-06, "loss": 0.1315, "step": 22536, "teacher_loss": 0.13058537244796753 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.2675098478794098, "learning_rate": 4.299067840455756e-06, "loss": 0.2284, "step": 22537, "teacher_loss": 0.22410380840301514 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.231197789311409, "learning_rate": 4.297476358405301e-06, "loss": 0.2877, "step": 22538, "teacher_loss": 0.293992817401886 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.8145919442176819, "learning_rate": 4.295885121731623e-06, "loss": 0.2463, "step": 22539, "teacher_loss": 0.1831541210412979 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.40225785970687866, "learning_rate": 4.294294130471199e-06, "loss": 0.1761, "step": 22540, "teacher_loss": 0.1509377658367157 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.2878412902355194, "learning_rate": 4.292703384660522e-06, "loss": 0.1519, "step": 22541, "teacher_loss": 0.1368279755115509 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.6010397672653198, "learning_rate": 4.291112884336054e-06, "loss": 0.1907, "step": 22542, "teacher_loss": 0.14513175189495087 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.35069769620895386, "learning_rate": 4.289522629534257e-06, "loss": 0.1866, "step": 22543, "teacher_loss": 0.1684103012084961 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.19859886169433594, "learning_rate": 4.287932620291593e-06, "loss": 0.2011, "step": 22544, "teacher_loss": 0.20140162110328674 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.29303768277168274, "learning_rate": 4.286342856644523e-06, "loss": 0.1973, "step": 22545, "teacher_loss": 0.18661954998970032 }, { "compression_loss": 0.0, "epoch": 4.07, "label_loss": 0.27912259101867676, "learning_rate": 4.284753338629486e-06, "loss": 0.2412, "step": 22546, "teacher_loss": 0.2370205670595169 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.5424875617027283, "learning_rate": 4.2831640662829274e-06, "loss": 0.2808, "step": 22547, "teacher_loss": 0.2516745626926422 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.6425031423568726, "learning_rate": 4.2815750396412924e-06, "loss": 0.2759, "step": 22548, "teacher_loss": 0.2351227104663849 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.30593228340148926, "learning_rate": 4.279986258741001e-06, "loss": 0.2149, "step": 22549, "teacher_loss": 0.20480579137802124 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.2967720329761505, "learning_rate": 4.278397723618485e-06, "loss": 0.1858, "step": 22550, "teacher_loss": 0.17342662811279297 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.398216187953949, "learning_rate": 4.276809434310167e-06, "loss": 0.2753, "step": 22551, "teacher_loss": 0.2615974545478821 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.7743715047836304, "learning_rate": 4.27522139085246e-06, "loss": 0.3086, "step": 22552, "teacher_loss": 0.2568100690841675 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.30458182096481323, "learning_rate": 4.273633593281762e-06, "loss": 0.1726, "step": 22553, "teacher_loss": 0.15796412527561188 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.4019111394882202, "learning_rate": 4.2720460416344975e-06, "loss": 0.226, "step": 22554, "teacher_loss": 0.206502765417099 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.41173383593559265, "learning_rate": 4.270458735947051e-06, "loss": 0.2387, "step": 22555, "teacher_loss": 0.21952712535858154 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.6742020845413208, "learning_rate": 4.268871676255814e-06, "loss": 0.222, "step": 22556, "teacher_loss": 0.1717754304409027 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.49618640542030334, "learning_rate": 4.267284862597174e-06, "loss": 0.1875, "step": 22557, "teacher_loss": 0.15319719910621643 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.33164310455322266, "learning_rate": 4.265698295007521e-06, "loss": 0.2145, "step": 22558, "teacher_loss": 0.20148909091949463 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.6611484885215759, "learning_rate": 4.264111973523215e-06, "loss": 0.222, "step": 22559, "teacher_loss": 0.17321157455444336 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.36705875396728516, "learning_rate": 4.262525898180636e-06, "loss": 0.1663, "step": 22560, "teacher_loss": 0.1439988613128662 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.343810111284256, "learning_rate": 4.26094006901615e-06, "loss": 0.1949, "step": 22561, "teacher_loss": 0.17830270528793335 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.6300286650657654, "learning_rate": 4.259354486066105e-06, "loss": 0.2043, "step": 22562, "teacher_loss": 0.15699800848960876 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.3817034959793091, "learning_rate": 4.257769149366862e-06, "loss": 0.3076, "step": 22563, "teacher_loss": 0.299325168132782 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.5715736150741577, "learning_rate": 4.256184058954767e-06, "loss": 0.2514, "step": 22564, "teacher_loss": 0.2158547192811966 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.547944188117981, "learning_rate": 4.254599214866157e-06, "loss": 0.2818, "step": 22565, "teacher_loss": 0.2522379755973816 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.30402642488479614, "learning_rate": 4.253014617137375e-06, "loss": 0.1873, "step": 22566, "teacher_loss": 0.17429357767105103 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.2781296372413635, "learning_rate": 4.251430265804742e-06, "loss": 0.2112, "step": 22567, "teacher_loss": 0.20381200313568115 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.3918992877006531, "learning_rate": 4.249846160904587e-06, "loss": 0.1953, "step": 22568, "teacher_loss": 0.17350533604621887 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.27357858419418335, "learning_rate": 4.248262302473233e-06, "loss": 0.1626, "step": 22569, "teacher_loss": 0.15030162036418915 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.45551547408103943, "learning_rate": 4.246678690546986e-06, "loss": 0.2243, "step": 22570, "teacher_loss": 0.19865107536315918 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.2869514226913452, "learning_rate": 4.245095325162154e-06, "loss": 0.1556, "step": 22571, "teacher_loss": 0.14104031026363373 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.4500284790992737, "learning_rate": 4.243512206355049e-06, "loss": 0.2183, "step": 22572, "teacher_loss": 0.1925460398197174 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.382717490196228, "learning_rate": 4.2419293341619534e-06, "loss": 0.2563, "step": 22573, "teacher_loss": 0.24221587181091309 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.5417371988296509, "learning_rate": 4.240346708619167e-06, "loss": 0.21, "step": 22574, "teacher_loss": 0.17318981885910034 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.162979394197464, "learning_rate": 4.238764329762969e-06, "loss": 0.1776, "step": 22575, "teacher_loss": 0.17923462390899658 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 1.5670448541641235, "learning_rate": 4.237182197629645e-06, "loss": 0.3629, "step": 22576, "teacher_loss": 0.22915859520435333 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.5816590785980225, "learning_rate": 4.23560031225546e-06, "loss": 0.2635, "step": 22577, "teacher_loss": 0.22810280323028564 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.6411575078964233, "learning_rate": 4.234018673676687e-06, "loss": 0.2548, "step": 22578, "teacher_loss": 0.2118661105632782 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.38639289140701294, "learning_rate": 4.232437281929591e-06, "loss": 0.3001, "step": 22579, "teacher_loss": 0.29051390290260315 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.4705466032028198, "learning_rate": 4.2308561370504205e-06, "loss": 0.1845, "step": 22580, "teacher_loss": 0.1527404934167862 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.25138893723487854, "learning_rate": 4.229275239075433e-06, "loss": 0.184, "step": 22581, "teacher_loss": 0.17651250958442688 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.8316800594329834, "learning_rate": 4.227694588040875e-06, "loss": 0.256, "step": 22582, "teacher_loss": 0.19201889634132385 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.24994195997714996, "learning_rate": 4.22611418398298e-06, "loss": 0.1854, "step": 22583, "teacher_loss": 0.1782335638999939 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.1789945363998413, "learning_rate": 4.2245340269379835e-06, "loss": 0.1409, "step": 22584, "teacher_loss": 0.1366989016532898 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.4202876389026642, "learning_rate": 4.22295411694212e-06, "loss": 0.1627, "step": 22585, "teacher_loss": 0.13408920168876648 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.5731959342956543, "learning_rate": 4.2213744540316035e-06, "loss": 0.2806, "step": 22586, "teacher_loss": 0.24806751310825348 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.20278418064117432, "learning_rate": 4.219795038242655e-06, "loss": 0.2284, "step": 22587, "teacher_loss": 0.2312992811203003 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.33175382018089294, "learning_rate": 4.21821586961149e-06, "loss": 0.1865, "step": 22588, "teacher_loss": 0.17041270434856415 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.19992820918560028, "learning_rate": 4.216636948174308e-06, "loss": 0.1539, "step": 22589, "teacher_loss": 0.14876753091812134 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.28158602118492126, "learning_rate": 4.215058273967303e-06, "loss": 0.2021, "step": 22590, "teacher_loss": 0.1932845264673233 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.7990930080413818, "learning_rate": 4.213479847026686e-06, "loss": 0.2769, "step": 22591, "teacher_loss": 0.21882659196853638 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.2985904812812805, "learning_rate": 4.211901667388635e-06, "loss": 0.2105, "step": 22592, "teacher_loss": 0.20071358978748322 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.41006115078926086, "learning_rate": 4.210323735089332e-06, "loss": 0.169, "step": 22593, "teacher_loss": 0.1421830952167511 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.26100900769233704, "learning_rate": 4.2087460501649554e-06, "loss": 0.194, "step": 22594, "teacher_loss": 0.18660786747932434 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.5923420190811157, "learning_rate": 4.207168612651682e-06, "loss": 0.2242, "step": 22595, "teacher_loss": 0.18331030011177063 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.5142922401428223, "learning_rate": 4.20559142258567e-06, "loss": 0.2751, "step": 22596, "teacher_loss": 0.24852584302425385 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.4100603759288788, "learning_rate": 4.204014480003083e-06, "loss": 0.2553, "step": 22597, "teacher_loss": 0.23806911706924438 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.1011427789926529, "learning_rate": 4.202437784940082e-06, "loss": 0.1133, "step": 22598, "teacher_loss": 0.11459873616695404 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.338405966758728, "learning_rate": 4.200861337432809e-06, "loss": 0.1848, "step": 22599, "teacher_loss": 0.16774345934391022 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.3041019141674042, "learning_rate": 4.199285137517398e-06, "loss": 0.1996, "step": 22600, "teacher_loss": 0.18803206086158752 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 1.1846622228622437, "learning_rate": 4.197709185230005e-06, "loss": 0.3567, "step": 22601, "teacher_loss": 0.2646849751472473 }, { "compression_loss": 0.0, "epoch": 4.08, "label_loss": 0.24639245867729187, "learning_rate": 4.196133480606755e-06, "loss": 0.2016, "step": 22602, "teacher_loss": 0.19657376408576965 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.5760072469711304, "learning_rate": 4.194558023683762e-06, "loss": 0.2322, "step": 22603, "teacher_loss": 0.1939689815044403 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.5377370715141296, "learning_rate": 4.1929828144971686e-06, "loss": 0.1956, "step": 22604, "teacher_loss": 0.1575387716293335 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.2735484838485718, "learning_rate": 4.191407853083076e-06, "loss": 0.1553, "step": 22605, "teacher_loss": 0.1421223282814026 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.8876392841339111, "learning_rate": 4.189833139477592e-06, "loss": 0.2334, "step": 22606, "teacher_loss": 0.1607379913330078 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.4514651894569397, "learning_rate": 4.188258673716823e-06, "loss": 0.1799, "step": 22607, "teacher_loss": 0.14973172545433044 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.480631560087204, "learning_rate": 4.186684455836873e-06, "loss": 0.1757, "step": 22608, "teacher_loss": 0.14177730679512024 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.354866623878479, "learning_rate": 4.1851104858738235e-06, "loss": 0.2485, "step": 22609, "teacher_loss": 0.23664726316928864 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.3198975920677185, "learning_rate": 4.1835367638637656e-06, "loss": 0.1738, "step": 22610, "teacher_loss": 0.15756721794605255 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.3408806025981903, "learning_rate": 4.1819632898427835e-06, "loss": 0.1628, "step": 22611, "teacher_loss": 0.14302849769592285 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.7259741425514221, "learning_rate": 4.1803900638469464e-06, "loss": 0.304, "step": 22612, "teacher_loss": 0.2570857107639313 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.504450798034668, "learning_rate": 4.1788170859123245e-06, "loss": 0.2361, "step": 22613, "teacher_loss": 0.20633485913276672 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.5568752288818359, "learning_rate": 4.177244356074989e-06, "loss": 0.2157, "step": 22614, "teacher_loss": 0.1778205931186676 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.28598469495773315, "learning_rate": 4.175671874370992e-06, "loss": 0.1778, "step": 22615, "teacher_loss": 0.1658347249031067 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.6041390299797058, "learning_rate": 4.1740996408363815e-06, "loss": 0.2944, "step": 22616, "teacher_loss": 0.26003211736679077 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.2922152876853943, "learning_rate": 4.172527655507208e-06, "loss": 0.1728, "step": 22617, "teacher_loss": 0.15958666801452637 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.28521639108657837, "learning_rate": 4.1709559184195176e-06, "loss": 0.2226, "step": 22618, "teacher_loss": 0.21562987565994263 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.4649069309234619, "learning_rate": 4.1693844296093364e-06, "loss": 0.3082, "step": 22619, "teacher_loss": 0.2907332181930542 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.19252392649650574, "learning_rate": 4.167813189112698e-06, "loss": 0.1686, "step": 22620, "teacher_loss": 0.16591094434261322 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.27004939317703247, "learning_rate": 4.166242196965632e-06, "loss": 0.1527, "step": 22621, "teacher_loss": 0.13971218466758728 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.2916765511035919, "learning_rate": 4.164671453204147e-06, "loss": 0.1896, "step": 22622, "teacher_loss": 0.17824479937553406 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.2132805585861206, "learning_rate": 4.163100957864258e-06, "loss": 0.1523, "step": 22623, "teacher_loss": 0.14553149044513702 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.4529210925102234, "learning_rate": 4.1615307109819785e-06, "loss": 0.2653, "step": 22624, "teacher_loss": 0.24442198872566223 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.28582000732421875, "learning_rate": 4.159960712593301e-06, "loss": 0.2099, "step": 22625, "teacher_loss": 0.20150399208068848 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.5984143018722534, "learning_rate": 4.1583909627342225e-06, "loss": 0.1656, "step": 22626, "teacher_loss": 0.11748944222927094 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.6292798519134521, "learning_rate": 4.156821461440739e-06, "loss": 0.2242, "step": 22627, "teacher_loss": 0.1791575700044632 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.17908817529678345, "learning_rate": 4.1552522087488255e-06, "loss": 0.1565, "step": 22628, "teacher_loss": 0.15403644740581512 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.2232891470193863, "learning_rate": 4.153683204694469e-06, "loss": 0.1695, "step": 22629, "teacher_loss": 0.1635599434375763 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.6556723117828369, "learning_rate": 4.152114449313634e-06, "loss": 0.2409, "step": 22630, "teacher_loss": 0.19480371475219727 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.43753910064697266, "learning_rate": 4.150545942642292e-06, "loss": 0.1924, "step": 22631, "teacher_loss": 0.16517898440361023 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.12581047415733337, "learning_rate": 4.148977684716405e-06, "loss": 0.2022, "step": 22632, "teacher_loss": 0.21070173382759094 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.44460129737854004, "learning_rate": 4.147409675571924e-06, "loss": 0.2528, "step": 22633, "teacher_loss": 0.23150289058685303 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.3765431344509125, "learning_rate": 4.1458419152448e-06, "loss": 0.2948, "step": 22634, "teacher_loss": 0.28576457500457764 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.5270299911499023, "learning_rate": 4.144274403770984e-06, "loss": 0.2532, "step": 22635, "teacher_loss": 0.222722589969635 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.18115884065628052, "learning_rate": 4.142707141186404e-06, "loss": 0.2009, "step": 22636, "teacher_loss": 0.20312249660491943 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.27549487352371216, "learning_rate": 4.141140127527002e-06, "loss": 0.2176, "step": 22637, "teacher_loss": 0.21118830144405365 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.6282958984375, "learning_rate": 4.139573362828695e-06, "loss": 0.2048, "step": 22638, "teacher_loss": 0.15772756934165955 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.5627350807189941, "learning_rate": 4.138006847127416e-06, "loss": 0.2619, "step": 22639, "teacher_loss": 0.22848185896873474 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.6914810538291931, "learning_rate": 4.136440580459068e-06, "loss": 0.2715, "step": 22640, "teacher_loss": 0.224819153547287 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.47946155071258545, "learning_rate": 4.134874562859568e-06, "loss": 0.2389, "step": 22641, "teacher_loss": 0.2121821641921997 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.47780200839042664, "learning_rate": 4.133308794364823e-06, "loss": 0.2489, "step": 22642, "teacher_loss": 0.22352181375026703 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.24092620611190796, "learning_rate": 4.131743275010721e-06, "loss": 0.1475, "step": 22643, "teacher_loss": 0.13716727495193481 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.4969255030155182, "learning_rate": 4.130178004833164e-06, "loss": 0.2795, "step": 22644, "teacher_loss": 0.25529471039772034 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.38713592290878296, "learning_rate": 4.1286129838680384e-06, "loss": 0.1976, "step": 22645, "teacher_loss": 0.17654253542423248 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.4934028387069702, "learning_rate": 4.127048212151218e-06, "loss": 0.2459, "step": 22646, "teacher_loss": 0.21844777464866638 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.6083205342292786, "learning_rate": 4.125483689718585e-06, "loss": 0.2262, "step": 22647, "teacher_loss": 0.18374021351337433 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.26910436153411865, "learning_rate": 4.123919416606011e-06, "loss": 0.2132, "step": 22648, "teacher_loss": 0.20700596272945404 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.11657220125198364, "learning_rate": 4.122355392849357e-06, "loss": 0.1625, "step": 22649, "teacher_loss": 0.16759991645812988 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.41936200857162476, "learning_rate": 4.120791618484471e-06, "loss": 0.2536, "step": 22650, "teacher_loss": 0.23514056205749512 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.13410958647727966, "learning_rate": 4.119228093547226e-06, "loss": 0.1599, "step": 22651, "teacher_loss": 0.1627708077430725 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.5489732623100281, "learning_rate": 4.1176648180734586e-06, "loss": 0.2895, "step": 22652, "teacher_loss": 0.2606958746910095 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.4245118796825409, "learning_rate": 4.1161017920989995e-06, "loss": 0.2054, "step": 22653, "teacher_loss": 0.1810726821422577 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.39075595140457153, "learning_rate": 4.114539015659705e-06, "loss": 0.2362, "step": 22654, "teacher_loss": 0.21901929378509521 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.4327138066291809, "learning_rate": 4.112976488791395e-06, "loss": 0.2049, "step": 22655, "teacher_loss": 0.1796102523803711 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.2354084700345993, "learning_rate": 4.111414211529888e-06, "loss": 0.4395, "step": 22656, "teacher_loss": 0.46217358112335205 }, { "compression_loss": 0.0, "epoch": 4.09, "label_loss": 0.5540645718574524, "learning_rate": 4.109852183911006e-06, "loss": 0.1891, "step": 22657, "teacher_loss": 0.1485649049282074 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.6115909218788147, "learning_rate": 4.10829040597057e-06, "loss": 0.2504, "step": 22658, "teacher_loss": 0.210235595703125 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.6111552119255066, "learning_rate": 4.106728877744375e-06, "loss": 0.2149, "step": 22659, "teacher_loss": 0.17083293199539185 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.33633461594581604, "learning_rate": 4.105167599268225e-06, "loss": 0.2319, "step": 22660, "teacher_loss": 0.22028425335884094 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.4442348778247833, "learning_rate": 4.103606570577924e-06, "loss": 0.2056, "step": 22661, "teacher_loss": 0.1791044920682907 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.1828872561454773, "learning_rate": 4.102045791709254e-06, "loss": 0.1835, "step": 22662, "teacher_loss": 0.18357203900814056 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.7625223994255066, "learning_rate": 4.1004852626979925e-06, "loss": 0.2631, "step": 22663, "teacher_loss": 0.20761752128601074 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.48668938875198364, "learning_rate": 4.098924983579934e-06, "loss": 0.1966, "step": 22664, "teacher_loss": 0.16431349515914917 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.478726327419281, "learning_rate": 4.097364954390842e-06, "loss": 0.1593, "step": 22665, "teacher_loss": 0.12385260313749313 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.2811603844165802, "learning_rate": 4.09580517516648e-06, "loss": 0.1598, "step": 22666, "teacher_loss": 0.14635232090950012 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.26371073722839355, "learning_rate": 4.094245645942612e-06, "loss": 0.2452, "step": 22667, "teacher_loss": 0.2431599497795105 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.5744708776473999, "learning_rate": 4.092686366754999e-06, "loss": 0.2374, "step": 22668, "teacher_loss": 0.19991913437843323 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.6082953214645386, "learning_rate": 4.091127337639382e-06, "loss": 0.1821, "step": 22669, "teacher_loss": 0.13474726676940918 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.4010860025882721, "learning_rate": 4.089568558631508e-06, "loss": 0.2208, "step": 22670, "teacher_loss": 0.20079299807548523 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.753595232963562, "learning_rate": 4.088010029767122e-06, "loss": 0.2459, "step": 22671, "teacher_loss": 0.18946939706802368 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.5105704665184021, "learning_rate": 4.086451751081943e-06, "loss": 0.1962, "step": 22672, "teacher_loss": 0.161228209733963 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.31743037700653076, "learning_rate": 4.084893722611706e-06, "loss": 0.1785, "step": 22673, "teacher_loss": 0.1631021350622177 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.40683621168136597, "learning_rate": 4.083335944392135e-06, "loss": 0.2543, "step": 22674, "teacher_loss": 0.2372998595237732 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.35393381118774414, "learning_rate": 4.081778416458938e-06, "loss": 0.2332, "step": 22675, "teacher_loss": 0.2197376787662506 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.3269878625869751, "learning_rate": 4.0802211388478255e-06, "loss": 0.2136, "step": 22676, "teacher_loss": 0.20097634196281433 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.4012044668197632, "learning_rate": 4.078664111594507e-06, "loss": 0.177, "step": 22677, "teacher_loss": 0.1520952433347702 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.4496087431907654, "learning_rate": 4.077107334734679e-06, "loss": 0.2525, "step": 22678, "teacher_loss": 0.23055025935173035 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.4206318259239197, "learning_rate": 4.0755508083040245e-06, "loss": 0.2715, "step": 22679, "teacher_loss": 0.2548922896385193 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.6155049800872803, "learning_rate": 4.0739945323382376e-06, "loss": 0.2044, "step": 22680, "teacher_loss": 0.15873342752456665 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.09657083451747894, "learning_rate": 4.072438506873004e-06, "loss": 0.1242, "step": 22681, "teacher_loss": 0.12722717225551605 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.20979011058807373, "learning_rate": 4.070882731943987e-06, "loss": 0.223, "step": 22682, "teacher_loss": 0.2244889736175537 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.30993878841400146, "learning_rate": 4.069327207586862e-06, "loss": 0.2749, "step": 22683, "teacher_loss": 0.270952045917511 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.220865860581398, "learning_rate": 4.067771933837296e-06, "loss": 0.1962, "step": 22684, "teacher_loss": 0.19348931312561035 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.23129618167877197, "learning_rate": 4.06621691073094e-06, "loss": 0.1934, "step": 22685, "teacher_loss": 0.1891534924507141 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.4420129656791687, "learning_rate": 4.064662138303449e-06, "loss": 0.1834, "step": 22686, "teacher_loss": 0.15463536977767944 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.22357898950576782, "learning_rate": 4.063107616590473e-06, "loss": 0.187, "step": 22687, "teacher_loss": 0.18298810720443726 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.5648127794265747, "learning_rate": 4.0615533456276445e-06, "loss": 0.2803, "step": 22688, "teacher_loss": 0.24868208169937134 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.6106789112091064, "learning_rate": 4.059999325450608e-06, "loss": 0.2372, "step": 22689, "teacher_loss": 0.19574488699436188 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.4297271966934204, "learning_rate": 4.058445556094982e-06, "loss": 0.2001, "step": 22690, "teacher_loss": 0.17457827925682068 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.4158221185207367, "learning_rate": 4.056892037596394e-06, "loss": 0.2336, "step": 22691, "teacher_loss": 0.21335706114768982 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.381158709526062, "learning_rate": 4.055338769990468e-06, "loss": 0.2417, "step": 22692, "teacher_loss": 0.22625300288200378 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.28542739152908325, "learning_rate": 4.053785753312805e-06, "loss": 0.1894, "step": 22693, "teacher_loss": 0.17867901921272278 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.38618004322052, "learning_rate": 4.052232987599017e-06, "loss": 0.186, "step": 22694, "teacher_loss": 0.16373300552368164 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.4946167469024658, "learning_rate": 4.050680472884707e-06, "loss": 0.2173, "step": 22695, "teacher_loss": 0.1865355521440506 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.6046112775802612, "learning_rate": 4.049128209205462e-06, "loss": 0.2102, "step": 22696, "teacher_loss": 0.1664019227027893 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.44900938868522644, "learning_rate": 4.047576196596879e-06, "loss": 0.3147, "step": 22697, "teacher_loss": 0.2998010516166687 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.3624172806739807, "learning_rate": 4.046024435094534e-06, "loss": 0.2158, "step": 22698, "teacher_loss": 0.19955873489379883 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.1988614946603775, "learning_rate": 4.04447292473401e-06, "loss": 0.1947, "step": 22699, "teacher_loss": 0.19424524903297424 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.11881519109010696, "learning_rate": 4.04292166555087e-06, "loss": 0.1309, "step": 22700, "teacher_loss": 0.13229748606681824 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.4090690612792969, "learning_rate": 4.0413706575806865e-06, "loss": 0.2289, "step": 22701, "teacher_loss": 0.2088746875524521 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.44665592908859253, "learning_rate": 4.039819900859022e-06, "loss": 0.1961, "step": 22702, "teacher_loss": 0.1682625263929367 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.6094774007797241, "learning_rate": 4.038269395421424e-06, "loss": 0.2056, "step": 22703, "teacher_loss": 0.16069424152374268 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.7952027320861816, "learning_rate": 4.0367191413034425e-06, "loss": 0.2461, "step": 22704, "teacher_loss": 0.18507659435272217 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.3751080334186554, "learning_rate": 4.035169138540625e-06, "loss": 0.1986, "step": 22705, "teacher_loss": 0.1789853870868683 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.32459545135498047, "learning_rate": 4.033619387168502e-06, "loss": 0.2188, "step": 22706, "teacher_loss": 0.2070883810520172 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.36693960428237915, "learning_rate": 4.032069887222607e-06, "loss": 0.2494, "step": 22707, "teacher_loss": 0.2363707572221756 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 1.1638574600219727, "learning_rate": 4.030520638738471e-06, "loss": 0.3754, "step": 22708, "teacher_loss": 0.2877826690673828 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.17444932460784912, "learning_rate": 4.0289716417516035e-06, "loss": 0.1735, "step": 22709, "teacher_loss": 0.1733928620815277 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.3411153554916382, "learning_rate": 4.0274228962975235e-06, "loss": 0.1777, "step": 22710, "teacher_loss": 0.1595703512430191 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.36173027753829956, "learning_rate": 4.0258744024117435e-06, "loss": 0.1902, "step": 22711, "teacher_loss": 0.1711013913154602 }, { "compression_loss": 0.0, "epoch": 4.1, "label_loss": 0.2393421232700348, "learning_rate": 4.024326160129761e-06, "loss": 0.2, "step": 22712, "teacher_loss": 0.19560891389846802 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4350154995918274, "learning_rate": 4.022778169487063e-06, "loss": 0.2314, "step": 22713, "teacher_loss": 0.20881511270999908 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.2271440625190735, "learning_rate": 4.021230430519161e-06, "loss": 0.1387, "step": 22714, "teacher_loss": 0.12888216972351074 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.60682612657547, "learning_rate": 4.0196829432615285e-06, "loss": 0.2075, "step": 22715, "teacher_loss": 0.1631086766719818 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.15723320841789246, "learning_rate": 4.01813570774964e-06, "loss": 0.1409, "step": 22716, "teacher_loss": 0.1391250193119049 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.44588929414749146, "learning_rate": 4.016588724018976e-06, "loss": 0.228, "step": 22717, "teacher_loss": 0.20374518632888794 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4236931800842285, "learning_rate": 4.015041992105005e-06, "loss": 0.2333, "step": 22718, "teacher_loss": 0.2121088206768036 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4152297377586365, "learning_rate": 4.013495512043183e-06, "loss": 0.1781, "step": 22719, "teacher_loss": 0.15177898108959198 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.33221930265426636, "learning_rate": 4.01194928386897e-06, "loss": 0.1533, "step": 22720, "teacher_loss": 0.13343505561351776 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.5556588172912598, "learning_rate": 4.010403307617821e-06, "loss": 0.2573, "step": 22721, "teacher_loss": 0.22420406341552734 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.42407265305519104, "learning_rate": 4.008857583325175e-06, "loss": 0.1803, "step": 22722, "teacher_loss": 0.15325365960597992 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.47183385491371155, "learning_rate": 4.007312111026462e-06, "loss": 0.2451, "step": 22723, "teacher_loss": 0.21991555392742157 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.1616220325231552, "learning_rate": 4.005766890757135e-06, "loss": 0.1932, "step": 22724, "teacher_loss": 0.19666370749473572 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.48040449619293213, "learning_rate": 4.004221922552608e-06, "loss": 0.1986, "step": 22725, "teacher_loss": 0.1672540009021759 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.8530826568603516, "learning_rate": 4.002677206448299e-06, "loss": 0.2626, "step": 22726, "teacher_loss": 0.19703079760074615 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.2912660241127014, "learning_rate": 4.001132742479639e-06, "loss": 0.1731, "step": 22727, "teacher_loss": 0.15999960899353027 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.14258864521980286, "learning_rate": 3.999588530682028e-06, "loss": 0.1456, "step": 22728, "teacher_loss": 0.1459406316280365 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.2268160879611969, "learning_rate": 3.998044571090866e-06, "loss": 0.2319, "step": 22729, "teacher_loss": 0.23244377970695496 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.5110557079315186, "learning_rate": 3.996500863741556e-06, "loss": 0.2084, "step": 22730, "teacher_loss": 0.17476025223731995 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4039493203163147, "learning_rate": 3.994957408669497e-06, "loss": 0.2916, "step": 22731, "teacher_loss": 0.2791168987751007 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.2917608618736267, "learning_rate": 3.993414205910064e-06, "loss": 0.1977, "step": 22732, "teacher_loss": 0.18725427985191345 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.38904404640197754, "learning_rate": 3.991871255498645e-06, "loss": 0.2176, "step": 22733, "teacher_loss": 0.19860002398490906 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.33132603764533997, "learning_rate": 3.990328557470619e-06, "loss": 0.1764, "step": 22734, "teacher_loss": 0.15921461582183838 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.48762670159339905, "learning_rate": 3.9887861118613435e-06, "loss": 0.1866, "step": 22735, "teacher_loss": 0.15316076576709747 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.30529311299324036, "learning_rate": 3.987243918706191e-06, "loss": 0.1561, "step": 22736, "teacher_loss": 0.1394929140806198 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.13172733783721924, "learning_rate": 3.985701978040522e-06, "loss": 0.1796, "step": 22737, "teacher_loss": 0.18486396968364716 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.6441196203231812, "learning_rate": 3.98416028989968e-06, "loss": 0.2156, "step": 22738, "teacher_loss": 0.1680314689874649 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.2546910047531128, "learning_rate": 3.982618854319018e-06, "loss": 0.2944, "step": 22739, "teacher_loss": 0.2988295257091522 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 1.2795207500457764, "learning_rate": 3.981077671333871e-06, "loss": 0.3757, "step": 22740, "teacher_loss": 0.2752285599708557 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4286991357803345, "learning_rate": 3.9795367409795805e-06, "loss": 0.1569, "step": 22741, "teacher_loss": 0.12666790187358856 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.31686174869537354, "learning_rate": 3.977996063291465e-06, "loss": 0.2039, "step": 22742, "teacher_loss": 0.19130294024944305 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.24156928062438965, "learning_rate": 3.9764556383048575e-06, "loss": 0.1563, "step": 22743, "teacher_loss": 0.1468181014060974 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.6340640783309937, "learning_rate": 3.974915466055075e-06, "loss": 0.2176, "step": 22744, "teacher_loss": 0.17128895223140717 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.2559596002101898, "learning_rate": 3.9733755465774215e-06, "loss": 0.2066, "step": 22745, "teacher_loss": 0.20114782452583313 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4445616602897644, "learning_rate": 3.971835879907206e-06, "loss": 0.1895, "step": 22746, "teacher_loss": 0.1611877679824829 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.2750485837459564, "learning_rate": 3.970296466079735e-06, "loss": 0.1833, "step": 22747, "teacher_loss": 0.17309531569480896 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4285891056060791, "learning_rate": 3.968757305130294e-06, "loss": 0.2206, "step": 22748, "teacher_loss": 0.19751664996147156 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.37817591428756714, "learning_rate": 3.967218397094172e-06, "loss": 0.1622, "step": 22749, "teacher_loss": 0.1381799280643463 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.7025238275527954, "learning_rate": 3.9656797420066615e-06, "loss": 0.2427, "step": 22750, "teacher_loss": 0.19163858890533447 }, { "epoch": 4.11, "eval_exact_match": 80.2554399243141, "eval_f1": 87.68513528843437, "step": 22750 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.23433294892311096, "learning_rate": 3.964141339903026e-06, "loss": 0.1949, "step": 22751, "teacher_loss": 0.19053223729133606 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.5709946155548096, "learning_rate": 3.962603190818547e-06, "loss": 0.2487, "step": 22752, "teacher_loss": 0.2128649204969406 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.29584360122680664, "learning_rate": 3.96106529478848e-06, "loss": 0.1939, "step": 22753, "teacher_loss": 0.18259459733963013 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.28943800926208496, "learning_rate": 3.95952765184809e-06, "loss": 0.2008, "step": 22754, "teacher_loss": 0.19097205996513367 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.37591415643692017, "learning_rate": 3.957990262032633e-06, "loss": 0.2498, "step": 22755, "teacher_loss": 0.235824316740036 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.24264061450958252, "learning_rate": 3.956453125377351e-06, "loss": 0.2097, "step": 22756, "teacher_loss": 0.20604225993156433 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.2767988443374634, "learning_rate": 3.9549162419174874e-06, "loss": 0.3605, "step": 22757, "teacher_loss": 0.3698080778121948 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.11844907701015472, "learning_rate": 3.9533796116882845e-06, "loss": 0.1535, "step": 22758, "teacher_loss": 0.15740500390529633 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.7088150978088379, "learning_rate": 3.9518432347249636e-06, "loss": 0.2692, "step": 22759, "teacher_loss": 0.22033682465553284 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4092136323451996, "learning_rate": 3.950307111062757e-06, "loss": 0.1963, "step": 22760, "teacher_loss": 0.17268610000610352 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.20961187779903412, "learning_rate": 3.9487712407368755e-06, "loss": 0.1876, "step": 22761, "teacher_loss": 0.18516525626182556 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4993930757045746, "learning_rate": 3.94723562378254e-06, "loss": 0.219, "step": 22762, "teacher_loss": 0.18780581653118134 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.19580087065696716, "learning_rate": 3.94570026023495e-06, "loss": 0.2031, "step": 22763, "teacher_loss": 0.20387354493141174 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4121062755584717, "learning_rate": 3.944165150129311e-06, "loss": 0.2086, "step": 22764, "teacher_loss": 0.18594354391098022 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4467495083808899, "learning_rate": 3.942630293500821e-06, "loss": 0.2086, "step": 22765, "teacher_loss": 0.1821470707654953 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.7759133577346802, "learning_rate": 3.941095690384664e-06, "loss": 0.2585, "step": 22766, "teacher_loss": 0.2009628713130951 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.4497264623641968, "learning_rate": 3.939561340816024e-06, "loss": 0.2379, "step": 22767, "teacher_loss": 0.21431520581245422 }, { "compression_loss": 0.0, "epoch": 4.11, "label_loss": 0.2579313814640045, "learning_rate": 3.9380272448300884e-06, "loss": 0.1993, "step": 22768, "teacher_loss": 0.192830890417099 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.5114196538925171, "learning_rate": 3.9364934024620166e-06, "loss": 0.2299, "step": 22769, "teacher_loss": 0.19863177835941315 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.32903820276260376, "learning_rate": 3.934959813746981e-06, "loss": 0.192, "step": 22770, "teacher_loss": 0.17679116129875183 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.8595058917999268, "learning_rate": 3.9334264787201474e-06, "loss": 0.3886, "step": 22771, "teacher_loss": 0.33622950315475464 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.24169015884399414, "learning_rate": 3.931893397416666e-06, "loss": 0.1358, "step": 22772, "teacher_loss": 0.12403303384780884 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.32074272632598877, "learning_rate": 3.930360569871676e-06, "loss": 0.1868, "step": 22773, "teacher_loss": 0.1718747317790985 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.47327497601509094, "learning_rate": 3.928827996120336e-06, "loss": 0.2836, "step": 22774, "teacher_loss": 0.2625022530555725 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.38969460129737854, "learning_rate": 3.927295676197779e-06, "loss": 0.2707, "step": 22775, "teacher_loss": 0.25752925872802734 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.26623645424842834, "learning_rate": 3.9257636101391265e-06, "loss": 0.1638, "step": 22776, "teacher_loss": 0.15239296853542328 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.3344690203666687, "learning_rate": 3.924231797979519e-06, "loss": 0.2531, "step": 22777, "teacher_loss": 0.2441120445728302 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.404674768447876, "learning_rate": 3.9227002397540705e-06, "loss": 0.3003, "step": 22778, "teacher_loss": 0.28875136375427246 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.463502436876297, "learning_rate": 3.921168935497889e-06, "loss": 0.1806, "step": 22779, "teacher_loss": 0.14912721514701843 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.24268566071987152, "learning_rate": 3.919637885246089e-06, "loss": 0.123, "step": 22780, "teacher_loss": 0.10965050011873245 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.20628032088279724, "learning_rate": 3.918107089033776e-06, "loss": 0.2183, "step": 22781, "teacher_loss": 0.21966543793678284 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.28634029626846313, "learning_rate": 3.916576546896035e-06, "loss": 0.1698, "step": 22782, "teacher_loss": 0.15685084462165833 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.6188716888427734, "learning_rate": 3.915046258867966e-06, "loss": 0.2637, "step": 22783, "teacher_loss": 0.22423511743545532 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.49091142416000366, "learning_rate": 3.913516224984658e-06, "loss": 0.2025, "step": 22784, "teacher_loss": 0.17049023509025574 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.5912739038467407, "learning_rate": 3.911986445281182e-06, "loss": 0.2041, "step": 22785, "teacher_loss": 0.16108059883117676 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.5654439926147461, "learning_rate": 3.9104569197926045e-06, "loss": 0.2746, "step": 22786, "teacher_loss": 0.24225589632987976 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.4637376666069031, "learning_rate": 3.90892764855401e-06, "loss": 0.1675, "step": 22787, "teacher_loss": 0.13463382422924042 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.5043166279792786, "learning_rate": 3.907398631600451e-06, "loss": 0.2049, "step": 22788, "teacher_loss": 0.1715872436761856 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.6082807779312134, "learning_rate": 3.905869868966982e-06, "loss": 0.2742, "step": 22789, "teacher_loss": 0.23704344034194946 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.78838050365448, "learning_rate": 3.904341360688654e-06, "loss": 0.2434, "step": 22790, "teacher_loss": 0.18289171159267426 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.4148459732532501, "learning_rate": 3.9028131068005165e-06, "loss": 0.237, "step": 22791, "teacher_loss": 0.21728824079036713 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.22851252555847168, "learning_rate": 3.901285107337599e-06, "loss": 0.1513, "step": 22792, "teacher_loss": 0.1427510380744934 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.7189049124717712, "learning_rate": 3.8997573623349385e-06, "loss": 0.3398, "step": 22793, "teacher_loss": 0.29772549867630005 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.25955629348754883, "learning_rate": 3.898229871827565e-06, "loss": 0.2074, "step": 22794, "teacher_loss": 0.20163947343826294 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.6709094643592834, "learning_rate": 3.896702635850493e-06, "loss": 0.3215, "step": 22795, "teacher_loss": 0.28268611431121826 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.7042547464370728, "learning_rate": 3.895175654438738e-06, "loss": 0.2652, "step": 22796, "teacher_loss": 0.21640561521053314 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.2703486680984497, "learning_rate": 3.893648927627318e-06, "loss": 0.1632, "step": 22797, "teacher_loss": 0.15126243233680725 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.45809632539749146, "learning_rate": 3.892122455451224e-06, "loss": 0.2443, "step": 22798, "teacher_loss": 0.22059085965156555 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.48233088850975037, "learning_rate": 3.890596237945458e-06, "loss": 0.221, "step": 22799, "teacher_loss": 0.19200804829597473 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.22528111934661865, "learning_rate": 3.889070275145018e-06, "loss": 0.2123, "step": 22800, "teacher_loss": 0.21080918610095978 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.25084811449050903, "learning_rate": 3.887544567084884e-06, "loss": 0.1747, "step": 22801, "teacher_loss": 0.16629287600517273 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.2577691078186035, "learning_rate": 3.886019113800031e-06, "loss": 0.1868, "step": 22802, "teacher_loss": 0.17886883020401 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.3990160822868347, "learning_rate": 3.884493915325439e-06, "loss": 0.3796, "step": 22803, "teacher_loss": 0.37743836641311646 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.1225518211722374, "learning_rate": 3.882968971696081e-06, "loss": 0.1391, "step": 22804, "teacher_loss": 0.14093998074531555 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.35597896575927734, "learning_rate": 3.881444282946908e-06, "loss": 0.1923, "step": 22805, "teacher_loss": 0.17415188252925873 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.47772926092147827, "learning_rate": 3.8799198491128835e-06, "loss": 0.3342, "step": 22806, "teacher_loss": 0.318303644657135 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.36855238676071167, "learning_rate": 3.87839567022896e-06, "loss": 0.234, "step": 22807, "teacher_loss": 0.21901264786720276 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.3471972942352295, "learning_rate": 3.876871746330077e-06, "loss": 0.1795, "step": 22808, "teacher_loss": 0.16090184450149536 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.5534819960594177, "learning_rate": 3.875348077451174e-06, "loss": 0.2557, "step": 22809, "teacher_loss": 0.22263109683990479 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.7976883053779602, "learning_rate": 3.873824663627193e-06, "loss": 0.2215, "step": 22810, "teacher_loss": 0.1574346125125885 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.42875123023986816, "learning_rate": 3.872301504893047e-06, "loss": 0.1958, "step": 22811, "teacher_loss": 0.1699080765247345 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.46442925930023193, "learning_rate": 3.870778601283671e-06, "loss": 0.2063, "step": 22812, "teacher_loss": 0.1776617467403412 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.13106337189674377, "learning_rate": 3.869255952833971e-06, "loss": 0.1399, "step": 22813, "teacher_loss": 0.14090678095817566 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.7380800843238831, "learning_rate": 3.8677335595788595e-06, "loss": 0.2317, "step": 22814, "teacher_loss": 0.17545756697654724 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.4530131220817566, "learning_rate": 3.866211421553245e-06, "loss": 0.2108, "step": 22815, "teacher_loss": 0.18388238549232483 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.3775239586830139, "learning_rate": 3.864689538792017e-06, "loss": 0.1762, "step": 22816, "teacher_loss": 0.15387769043445587 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.2615325450897217, "learning_rate": 3.863167911330074e-06, "loss": 0.2206, "step": 22817, "teacher_loss": 0.2160816192626953 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.39904022216796875, "learning_rate": 3.861646539202304e-06, "loss": 0.2071, "step": 22818, "teacher_loss": 0.18580499291419983 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.3116586208343506, "learning_rate": 3.860125422443581e-06, "loss": 0.1726, "step": 22819, "teacher_loss": 0.15717604756355286 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.12355434894561768, "learning_rate": 3.858604561088782e-06, "loss": 0.182, "step": 22820, "teacher_loss": 0.18849727511405945 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.24400505423545837, "learning_rate": 3.857083955172782e-06, "loss": 0.178, "step": 22821, "teacher_loss": 0.17069856822490692 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.42347875237464905, "learning_rate": 3.855563604730439e-06, "loss": 0.1899, "step": 22822, "teacher_loss": 0.1639215052127838 }, { "compression_loss": 0.0, "epoch": 4.12, "label_loss": 0.3387252986431122, "learning_rate": 3.854043509796604e-06, "loss": 0.2224, "step": 22823, "teacher_loss": 0.20945142209529877 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.3266870975494385, "learning_rate": 3.852523670406136e-06, "loss": 0.1944, "step": 22824, "teacher_loss": 0.1796647608280182 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.4827834367752075, "learning_rate": 3.851004086593881e-06, "loss": 0.199, "step": 22825, "teacher_loss": 0.16752278804779053 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.23960530757904053, "learning_rate": 3.849484758394671e-06, "loss": 0.1918, "step": 22826, "teacher_loss": 0.18652094900608063 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.7126004695892334, "learning_rate": 3.847965685843347e-06, "loss": 0.312, "step": 22827, "teacher_loss": 0.2674831748008728 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.9634597897529602, "learning_rate": 3.846446868974737e-06, "loss": 0.3058, "step": 22828, "teacher_loss": 0.23272985219955444 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.36815857887268066, "learning_rate": 3.844928307823655e-06, "loss": 0.1865, "step": 22829, "teacher_loss": 0.1662788689136505 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.09020170569419861, "learning_rate": 3.843410002424924e-06, "loss": 0.1371, "step": 22830, "teacher_loss": 0.14226830005645752 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.724023699760437, "learning_rate": 3.841891952813356e-06, "loss": 0.2743, "step": 22831, "teacher_loss": 0.2242925763130188 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.3707645833492279, "learning_rate": 3.840374159023747e-06, "loss": 0.2122, "step": 22832, "teacher_loss": 0.19460159540176392 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.20390814542770386, "learning_rate": 3.838856621090902e-06, "loss": 0.2017, "step": 22833, "teacher_loss": 0.20148205757141113 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.25877898931503296, "learning_rate": 3.837339339049615e-06, "loss": 0.2446, "step": 22834, "teacher_loss": 0.2429705262184143 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.30381494760513306, "learning_rate": 3.835822312934669e-06, "loss": 0.2126, "step": 22835, "teacher_loss": 0.20250967144966125 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.3398551046848297, "learning_rate": 3.834305542780837e-06, "loss": 0.1522, "step": 22836, "teacher_loss": 0.13137857615947723 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.45063066482543945, "learning_rate": 3.832789028622911e-06, "loss": 0.187, "step": 22837, "teacher_loss": 0.1576966643333435 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.40712884068489075, "learning_rate": 3.831272770495653e-06, "loss": 0.2062, "step": 22838, "teacher_loss": 0.18389661610126495 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.2704983353614807, "learning_rate": 3.82975676843382e-06, "loss": 0.1996, "step": 22839, "teacher_loss": 0.1917600929737091 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.3123886287212372, "learning_rate": 3.828241022472172e-06, "loss": 0.1514, "step": 22840, "teacher_loss": 0.13354891538619995 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.530829668045044, "learning_rate": 3.82672553264547e-06, "loss": 0.2528, "step": 22841, "teacher_loss": 0.2219436764717102 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.5389578342437744, "learning_rate": 3.825210298988445e-06, "loss": 0.3024, "step": 22842, "teacher_loss": 0.2761152982711792 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.33514469861984253, "learning_rate": 3.823695321535847e-06, "loss": 0.2534, "step": 22843, "teacher_loss": 0.2442653328180313 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.1748175024986267, "learning_rate": 3.822180600322409e-06, "loss": 0.1917, "step": 22844, "teacher_loss": 0.19353771209716797 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.44219738245010376, "learning_rate": 3.820666135382858e-06, "loss": 0.3572, "step": 22845, "teacher_loss": 0.3477065861225128 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.2006331980228424, "learning_rate": 3.819151926751906e-06, "loss": 0.1618, "step": 22846, "teacher_loss": 0.1574380099773407 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.22027191519737244, "learning_rate": 3.817637974464288e-06, "loss": 0.1944, "step": 22847, "teacher_loss": 0.19151535630226135 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.463347464799881, "learning_rate": 3.816124278554705e-06, "loss": 0.433, "step": 22848, "teacher_loss": 0.42962202429771423 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.8238054513931274, "learning_rate": 3.8146108390578514e-06, "loss": 0.2932, "step": 22849, "teacher_loss": 0.23424723744392395 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.344815731048584, "learning_rate": 3.8130976560084444e-06, "loss": 0.1693, "step": 22850, "teacher_loss": 0.1497804820537567 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.5286739468574524, "learning_rate": 3.81158472944117e-06, "loss": 0.1961, "step": 22851, "teacher_loss": 0.15917927026748657 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.31880074739456177, "learning_rate": 3.8100720593907064e-06, "loss": 0.2061, "step": 22852, "teacher_loss": 0.1935739815235138 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.6812395453453064, "learning_rate": 3.8085596458917426e-06, "loss": 0.2918, "step": 22853, "teacher_loss": 0.24853505194187164 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.4880143404006958, "learning_rate": 3.8070474889789562e-06, "loss": 0.1902, "step": 22854, "teacher_loss": 0.1571383774280548 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.5628313422203064, "learning_rate": 3.8055355886870095e-06, "loss": 0.1974, "step": 22855, "teacher_loss": 0.15676581859588623 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.38051837682724, "learning_rate": 3.804023945050568e-06, "loss": 0.1904, "step": 22856, "teacher_loss": 0.16924715042114258 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.20348265767097473, "learning_rate": 3.802512558104294e-06, "loss": 0.1524, "step": 22857, "teacher_loss": 0.14675509929656982 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.5313616991043091, "learning_rate": 3.8010014278828314e-06, "loss": 0.3359, "step": 22858, "teacher_loss": 0.3142049312591553 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.4730963110923767, "learning_rate": 3.799490554420831e-06, "loss": 0.2449, "step": 22859, "teacher_loss": 0.2194899618625641 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.2631877660751343, "learning_rate": 3.797979937752933e-06, "loss": 0.178, "step": 22860, "teacher_loss": 0.16854281723499298 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.561118483543396, "learning_rate": 3.796469577913768e-06, "loss": 0.2917, "step": 22861, "teacher_loss": 0.26181483268737793 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.5119645595550537, "learning_rate": 3.794959474937969e-06, "loss": 0.2723, "step": 22862, "teacher_loss": 0.24570375680923462 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.20612607896327972, "learning_rate": 3.7934496288601493e-06, "loss": 0.1487, "step": 22863, "teacher_loss": 0.14233949780464172 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.3395610749721527, "learning_rate": 3.7919400397149366e-06, "loss": 0.204, "step": 22864, "teacher_loss": 0.18889927864074707 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.4405629634857178, "learning_rate": 3.79043070753693e-06, "loss": 0.2613, "step": 22865, "teacher_loss": 0.24136215448379517 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.8527287244796753, "learning_rate": 3.7889216323607394e-06, "loss": 0.3814, "step": 22866, "teacher_loss": 0.3290061354637146 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.39464664459228516, "learning_rate": 3.787412814220968e-06, "loss": 0.2057, "step": 22867, "teacher_loss": 0.18473097681999207 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.3150639533996582, "learning_rate": 3.7859042531521975e-06, "loss": 0.2089, "step": 22868, "teacher_loss": 0.19715292751789093 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.34170639514923096, "learning_rate": 3.784395949189024e-06, "loss": 0.3892, "step": 22869, "teacher_loss": 0.3944254517555237 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.430134117603302, "learning_rate": 3.7828879023660277e-06, "loss": 0.1757, "step": 22870, "teacher_loss": 0.14740820229053497 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.48510220646858215, "learning_rate": 3.7813801127177768e-06, "loss": 0.1808, "step": 22871, "teacher_loss": 0.14698204398155212 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.40084099769592285, "learning_rate": 3.779872580278844e-06, "loss": 0.1797, "step": 22872, "teacher_loss": 0.1551511287689209 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.3540598750114441, "learning_rate": 3.7783653050837994e-06, "loss": 0.174, "step": 22873, "teacher_loss": 0.1540299355983734 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.2955169975757599, "learning_rate": 3.7768582871671884e-06, "loss": 0.1625, "step": 22874, "teacher_loss": 0.14776170253753662 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.4249776601791382, "learning_rate": 3.7753515265635734e-06, "loss": 0.23, "step": 22875, "teacher_loss": 0.20830760896205902 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.40203937888145447, "learning_rate": 3.77384502330749e-06, "loss": 0.1452, "step": 22876, "teacher_loss": 0.11671268939971924 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.740081250667572, "learning_rate": 3.772338777433482e-06, "loss": 0.2306, "step": 22877, "teacher_loss": 0.1739429086446762 }, { "compression_loss": 0.0, "epoch": 4.13, "label_loss": 0.4419202506542206, "learning_rate": 3.770832788976089e-06, "loss": 0.1772, "step": 22878, "teacher_loss": 0.14783495664596558 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.5161177515983582, "learning_rate": 3.769327057969828e-06, "loss": 0.2114, "step": 22879, "teacher_loss": 0.17749135196208954 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.2567833960056305, "learning_rate": 3.7678215844492283e-06, "loss": 0.2184, "step": 22880, "teacher_loss": 0.21409842371940613 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.6549907922744751, "learning_rate": 3.766316368448805e-06, "loss": 0.2624, "step": 22881, "teacher_loss": 0.218819722533226 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.16947561502456665, "learning_rate": 3.7648114100030646e-06, "loss": 0.1236, "step": 22882, "teacher_loss": 0.11849543452262878 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.5413275957107544, "learning_rate": 3.763306709146514e-06, "loss": 0.2351, "step": 22883, "teacher_loss": 0.20107589662075043 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.5842666625976562, "learning_rate": 3.761802265913654e-06, "loss": 0.2478, "step": 22884, "teacher_loss": 0.21043440699577332 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.4456090033054352, "learning_rate": 3.760298080338976e-06, "loss": 0.2377, "step": 22885, "teacher_loss": 0.2146110236644745 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.6545077562332153, "learning_rate": 3.7587941524569597e-06, "loss": 0.381, "step": 22886, "teacher_loss": 0.35060250759124756 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.5028231739997864, "learning_rate": 3.7572904823020896e-06, "loss": 0.2214, "step": 22887, "teacher_loss": 0.19009234011173248 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.36871349811553955, "learning_rate": 3.755787069908847e-06, "loss": 0.1863, "step": 22888, "teacher_loss": 0.16598603129386902 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.5215179920196533, "learning_rate": 3.7542839153116903e-06, "loss": 0.2131, "step": 22889, "teacher_loss": 0.17879268527030945 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.7782065868377686, "learning_rate": 3.7527810185450855e-06, "loss": 0.312, "step": 22890, "teacher_loss": 0.26017823815345764 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.5057734847068787, "learning_rate": 3.751278379643497e-06, "loss": 0.1958, "step": 22891, "teacher_loss": 0.16130372881889343 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.34362465143203735, "learning_rate": 3.749775998641365e-06, "loss": 0.2704, "step": 22892, "teacher_loss": 0.2622174024581909 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.26865124702453613, "learning_rate": 3.7482738755731406e-06, "loss": 0.2012, "step": 22893, "teacher_loss": 0.19371531903743744 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.5001659989356995, "learning_rate": 3.7467720104732646e-06, "loss": 0.187, "step": 22894, "teacher_loss": 0.15225905179977417 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.45260554552078247, "learning_rate": 3.7452704033761676e-06, "loss": 0.212, "step": 22895, "teacher_loss": 0.1852482259273529 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.21389439702033997, "learning_rate": 3.743769054316269e-06, "loss": 0.223, "step": 22896, "teacher_loss": 0.22405506670475006 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.4227154850959778, "learning_rate": 3.742267963328006e-06, "loss": 0.1887, "step": 22897, "teacher_loss": 0.1626890003681183 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.1892571747303009, "learning_rate": 3.7407671304457865e-06, "loss": 0.1734, "step": 22898, "teacher_loss": 0.1715894639492035 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 1.0329095125198364, "learning_rate": 3.739266555704011e-06, "loss": 0.2939, "step": 22899, "teacher_loss": 0.21182504296302795 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.413347065448761, "learning_rate": 3.7377662391371e-06, "loss": 0.2234, "step": 22900, "teacher_loss": 0.20233330130577087 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.2679764926433563, "learning_rate": 3.7362661807794446e-06, "loss": 0.1821, "step": 22901, "teacher_loss": 0.17258279025554657 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.6176490187644958, "learning_rate": 3.7347663806654293e-06, "loss": 0.2498, "step": 22902, "teacher_loss": 0.2089250683784485 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.38428351283073425, "learning_rate": 3.733266838829448e-06, "loss": 0.1631, "step": 22903, "teacher_loss": 0.13850151002407074 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.4436950087547302, "learning_rate": 3.731767555305881e-06, "loss": 0.1846, "step": 22904, "teacher_loss": 0.15581044554710388 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.7048033475875854, "learning_rate": 3.730268530129097e-06, "loss": 0.2498, "step": 22905, "teacher_loss": 0.19924427568912506 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.4116630256175995, "learning_rate": 3.728769763333467e-06, "loss": 0.1368, "step": 22906, "teacher_loss": 0.10622194409370422 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.2822698652744293, "learning_rate": 3.7272712549533568e-06, "loss": 0.179, "step": 22907, "teacher_loss": 0.1675695925951004 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.32546940445899963, "learning_rate": 3.725773005023121e-06, "loss": 0.2118, "step": 22908, "teacher_loss": 0.19911660254001617 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.5028215050697327, "learning_rate": 3.7242750135770975e-06, "loss": 0.2337, "step": 22909, "teacher_loss": 0.20378778874874115 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.2757541239261627, "learning_rate": 3.7227772806496526e-06, "loss": 0.2101, "step": 22910, "teacher_loss": 0.20283447206020355 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.5202866196632385, "learning_rate": 3.7212798062751113e-06, "loss": 0.2349, "step": 22911, "teacher_loss": 0.20316708087921143 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.5867432951927185, "learning_rate": 3.719782590487807e-06, "loss": 0.2634, "step": 22912, "teacher_loss": 0.22745990753173828 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.09419476240873337, "learning_rate": 3.7182856333220664e-06, "loss": 0.1783, "step": 22913, "teacher_loss": 0.1876555234193802 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.13743966817855835, "learning_rate": 3.7167889348122165e-06, "loss": 0.2056, "step": 22914, "teacher_loss": 0.21314075589179993 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.2192869931459427, "learning_rate": 3.715292494992562e-06, "loss": 0.1874, "step": 22915, "teacher_loss": 0.18389210104942322 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.34884750843048096, "learning_rate": 3.7137963138974195e-06, "loss": 0.2103, "step": 22916, "teacher_loss": 0.19491443037986755 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.714715301990509, "learning_rate": 3.712300391561093e-06, "loss": 0.2627, "step": 22917, "teacher_loss": 0.21250218152999878 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.19638285040855408, "learning_rate": 3.710804728017872e-06, "loss": 0.2273, "step": 22918, "teacher_loss": 0.23075248301029205 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.412413626909256, "learning_rate": 3.7093093233020518e-06, "loss": 0.2304, "step": 22919, "teacher_loss": 0.21021172404289246 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.31634876132011414, "learning_rate": 3.7078141774479224e-06, "loss": 0.1683, "step": 22920, "teacher_loss": 0.15188324451446533 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.45942509174346924, "learning_rate": 3.7063192904897526e-06, "loss": 0.1824, "step": 22921, "teacher_loss": 0.15157867968082428 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.23647035658359528, "learning_rate": 3.7048246624618214e-06, "loss": 0.1158, "step": 22922, "teacher_loss": 0.10233816504478455 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.4304838180541992, "learning_rate": 3.703330293398402e-06, "loss": 0.2657, "step": 22923, "teacher_loss": 0.24741026759147644 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.26016974449157715, "learning_rate": 3.701836183333743e-06, "loss": 0.2027, "step": 22924, "teacher_loss": 0.19629809260368347 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.13636326789855957, "learning_rate": 3.700342332302113e-06, "loss": 0.1889, "step": 22925, "teacher_loss": 0.1947277933359146 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.4319711923599243, "learning_rate": 3.6988487403377504e-06, "loss": 0.2679, "step": 22926, "teacher_loss": 0.24966493248939514 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.31379184126853943, "learning_rate": 3.6973554074749077e-06, "loss": 0.1636, "step": 22927, "teacher_loss": 0.14686912298202515 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.16165509819984436, "learning_rate": 3.695862333747815e-06, "loss": 0.1801, "step": 22928, "teacher_loss": 0.18217426538467407 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.2274320125579834, "learning_rate": 3.6943695191907055e-06, "loss": 0.1805, "step": 22929, "teacher_loss": 0.17528891563415527 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.270415723323822, "learning_rate": 3.692876963837813e-06, "loss": 0.2843, "step": 22930, "teacher_loss": 0.28579825162887573 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.330597460269928, "learning_rate": 3.691384667723346e-06, "loss": 0.1358, "step": 22931, "teacher_loss": 0.11410612612962723 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.2944859266281128, "learning_rate": 3.689892630881524e-06, "loss": 0.2132, "step": 22932, "teacher_loss": 0.20418840646743774 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.47896048426628113, "learning_rate": 3.688400853346558e-06, "loss": 0.2285, "step": 22933, "teacher_loss": 0.20068971812725067 }, { "compression_loss": 0.0, "epoch": 4.14, "label_loss": 0.27607735991477966, "learning_rate": 3.6869093351526424e-06, "loss": 0.1737, "step": 22934, "teacher_loss": 0.1623132824897766 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.5528738498687744, "learning_rate": 3.685418076333983e-06, "loss": 0.2263, "step": 22935, "teacher_loss": 0.1899741142988205 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.36537253856658936, "learning_rate": 3.683927076924759e-06, "loss": 0.146, "step": 22936, "teacher_loss": 0.12161806970834732 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.4706469774246216, "learning_rate": 3.6824363369591583e-06, "loss": 0.2387, "step": 22937, "teacher_loss": 0.2129439413547516 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.43230509757995605, "learning_rate": 3.680945856471367e-06, "loss": 0.2205, "step": 22938, "teacher_loss": 0.19695062935352325 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.652461588382721, "learning_rate": 3.6794556354955462e-06, "loss": 0.2241, "step": 22939, "teacher_loss": 0.1764555275440216 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.34214848279953003, "learning_rate": 3.677965674065866e-06, "loss": 0.1961, "step": 22940, "teacher_loss": 0.17989175021648407 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.2174980789422989, "learning_rate": 3.6764759722164935e-06, "loss": 0.2382, "step": 22941, "teacher_loss": 0.24050256609916687 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.375851035118103, "learning_rate": 3.674986529981571e-06, "loss": 0.2101, "step": 22942, "teacher_loss": 0.19165174663066864 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 1.013223648071289, "learning_rate": 3.6734973473952542e-06, "loss": 0.2951, "step": 22943, "teacher_loss": 0.21527984738349915 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.27277395129203796, "learning_rate": 3.6720084244916897e-06, "loss": 0.2143, "step": 22944, "teacher_loss": 0.20775094628334045 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.3420126438140869, "learning_rate": 3.670519761305008e-06, "loss": 0.2025, "step": 22945, "teacher_loss": 0.18698303401470184 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.31995251774787903, "learning_rate": 3.669031357869332e-06, "loss": 0.2208, "step": 22946, "teacher_loss": 0.20978282392024994 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.1461927890777588, "learning_rate": 3.667543214218804e-06, "loss": 0.1272, "step": 22947, "teacher_loss": 0.1250927448272705 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.5924326181411743, "learning_rate": 3.6660553303875344e-06, "loss": 0.2495, "step": 22948, "teacher_loss": 0.21143761277198792 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.4884374737739563, "learning_rate": 3.66456770640963e-06, "loss": 0.2544, "step": 22949, "teacher_loss": 0.22844627499580383 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.39449718594551086, "learning_rate": 3.663080342319202e-06, "loss": 0.1787, "step": 22950, "teacher_loss": 0.15475699305534363 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.555443525314331, "learning_rate": 3.661593238150357e-06, "loss": 0.2134, "step": 22951, "teacher_loss": 0.17535611987113953 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.5994598865509033, "learning_rate": 3.6601063939371802e-06, "loss": 0.1891, "step": 22952, "teacher_loss": 0.14352145791053772 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.4192536473274231, "learning_rate": 3.658619809713765e-06, "loss": 0.2242, "step": 22953, "teacher_loss": 0.20252643525600433 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.5504093170166016, "learning_rate": 3.6571334855141993e-06, "loss": 0.2432, "step": 22954, "teacher_loss": 0.20905114710330963 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.36973509192466736, "learning_rate": 3.65564742137255e-06, "loss": 0.2217, "step": 22955, "teacher_loss": 0.20520079135894775 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.6982301473617554, "learning_rate": 3.6541616173228937e-06, "loss": 0.2205, "step": 22956, "teacher_loss": 0.16740819811820984 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.4887813627719879, "learning_rate": 3.6526760733992982e-06, "loss": 0.2915, "step": 22957, "teacher_loss": 0.26959437131881714 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.959095299243927, "learning_rate": 3.651190789635821e-06, "loss": 0.2965, "step": 22958, "teacher_loss": 0.22287209331989288 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.4964185357093811, "learning_rate": 3.6497057660665034e-06, "loss": 0.2361, "step": 22959, "teacher_loss": 0.2072097361087799 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.23728318512439728, "learning_rate": 3.6482210027254115e-06, "loss": 0.2387, "step": 22960, "teacher_loss": 0.2388104498386383 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.21218936145305634, "learning_rate": 3.646736499646578e-06, "loss": 0.1071, "step": 22961, "teacher_loss": 0.09545591473579407 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.16541728377342224, "learning_rate": 3.645252256864034e-06, "loss": 0.1442, "step": 22962, "teacher_loss": 0.14179465174674988 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.20861364901065826, "learning_rate": 3.6437682744118105e-06, "loss": 0.1679, "step": 22963, "teacher_loss": 0.16339004039764404 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.4187069833278656, "learning_rate": 3.642284552323939e-06, "loss": 0.1948, "step": 22964, "teacher_loss": 0.16992425918579102 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.49240052700042725, "learning_rate": 3.6408010906344254e-06, "loss": 0.1982, "step": 22965, "teacher_loss": 0.16550907492637634 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.7561980485916138, "learning_rate": 3.639317889377287e-06, "loss": 0.2299, "step": 22966, "teacher_loss": 0.1714029610157013 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.7846541404724121, "learning_rate": 3.6378349485865308e-06, "loss": 0.3005, "step": 22967, "teacher_loss": 0.24674327671527863 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.9935768246650696, "learning_rate": 3.636352268296156e-06, "loss": 0.3081, "step": 22968, "teacher_loss": 0.23196813464164734 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.7232280969619751, "learning_rate": 3.634869848540143e-06, "loss": 0.2625, "step": 22969, "teacher_loss": 0.2113342583179474 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.42201149463653564, "learning_rate": 3.633387689352499e-06, "loss": 0.2245, "step": 22970, "teacher_loss": 0.20256885886192322 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.19945359230041504, "learning_rate": 3.6319057907671966e-06, "loss": 0.1371, "step": 22971, "teacher_loss": 0.13019129633903503 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.49432605504989624, "learning_rate": 3.630424152818203e-06, "loss": 0.2798, "step": 22972, "teacher_loss": 0.25594794750213623 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.39221715927124023, "learning_rate": 3.628942775539505e-06, "loss": 0.1539, "step": 22973, "teacher_loss": 0.1274665743112564 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.6071451902389526, "learning_rate": 3.627461658965055e-06, "loss": 0.2304, "step": 22974, "teacher_loss": 0.18856005370616913 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.7591613531112671, "learning_rate": 3.6259808031288096e-06, "loss": 0.2146, "step": 22975, "teacher_loss": 0.1540401726961136 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.550673246383667, "learning_rate": 3.6245002080647227e-06, "loss": 0.2902, "step": 22976, "teacher_loss": 0.2613057494163513 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.6936201453208923, "learning_rate": 3.6230198738067462e-06, "loss": 0.374, "step": 22977, "teacher_loss": 0.3384852409362793 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.19716353714466095, "learning_rate": 3.621539800388809e-06, "loss": 0.2243, "step": 22978, "teacher_loss": 0.22733023762702942 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.9593712091445923, "learning_rate": 3.6200599878448494e-06, "loss": 0.2647, "step": 22979, "teacher_loss": 0.18754179775714874 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.7853481769561768, "learning_rate": 3.6185804362087997e-06, "loss": 0.2766, "step": 22980, "teacher_loss": 0.2201196551322937 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.39945822954177856, "learning_rate": 3.617101145514572e-06, "loss": 0.2096, "step": 22981, "teacher_loss": 0.18853068351745605 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.3145533800125122, "learning_rate": 3.615622115796088e-06, "loss": 0.2072, "step": 22982, "teacher_loss": 0.19526326656341553 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.14815108478069305, "learning_rate": 3.614143347087262e-06, "loss": 0.1279, "step": 22983, "teacher_loss": 0.12562045454978943 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.38142094016075134, "learning_rate": 3.612664839421989e-06, "loss": 0.2015, "step": 22984, "teacher_loss": 0.18151132762432098 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.29872769117355347, "learning_rate": 3.6111865928341723e-06, "loss": 0.1843, "step": 22985, "teacher_loss": 0.17154821753501892 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.9854205846786499, "learning_rate": 3.6097086073576985e-06, "loss": 0.2835, "step": 22986, "teacher_loss": 0.20552051067352295 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.5255351662635803, "learning_rate": 3.6082308830264566e-06, "loss": 0.2555, "step": 22987, "teacher_loss": 0.2255120575428009 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.5284769535064697, "learning_rate": 3.6067534198743295e-06, "loss": 0.2031, "step": 22988, "teacher_loss": 0.16697238385677338 }, { "compression_loss": 0.0, "epoch": 4.15, "label_loss": 0.5412842035293579, "learning_rate": 3.6052762179351837e-06, "loss": 0.2328, "step": 22989, "teacher_loss": 0.19850695133209229 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.5556703805923462, "learning_rate": 3.6037992772428955e-06, "loss": 0.3705, "step": 22990, "teacher_loss": 0.3499550223350525 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.19103839993476868, "learning_rate": 3.6023225978313196e-06, "loss": 0.1859, "step": 22991, "teacher_loss": 0.18531179428100586 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.1765522062778473, "learning_rate": 3.6008461797343134e-06, "loss": 0.1803, "step": 22992, "teacher_loss": 0.1807209551334381 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.2183285355567932, "learning_rate": 3.5993700229857317e-06, "loss": 0.1657, "step": 22993, "teacher_loss": 0.1598687767982483 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.1906394511461258, "learning_rate": 3.5978941276194092e-06, "loss": 0.152, "step": 22994, "teacher_loss": 0.14773677289485931 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.1109514907002449, "learning_rate": 3.596418493669191e-06, "loss": 0.1249, "step": 22995, "teacher_loss": 0.12639667093753815 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.27491211891174316, "learning_rate": 3.5949431211689104e-06, "loss": 0.1636, "step": 22996, "teacher_loss": 0.15128490328788757 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.9285383224487305, "learning_rate": 3.5934680101523865e-06, "loss": 0.2512, "step": 22997, "teacher_loss": 0.17594145238399506 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.476473331451416, "learning_rate": 3.591993160653447e-06, "loss": 0.216, "step": 22998, "teacher_loss": 0.18701039254665375 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.20172373950481415, "learning_rate": 3.590518572705896e-06, "loss": 0.1433, "step": 22999, "teacher_loss": 0.13675940036773682 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.615238606929779, "learning_rate": 3.589044246343547e-06, "loss": 0.2452, "step": 23000, "teacher_loss": 0.20402935147285461 }, { "epoch": 4.16, "eval_exact_match": 80.52034058656575, "eval_f1": 87.74671203053948, "step": 23000 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.2686135172843933, "learning_rate": 3.587570181600206e-06, "loss": 0.1752, "step": 23001, "teacher_loss": 0.16479063034057617 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.6351625323295593, "learning_rate": 3.5860963785096594e-06, "loss": 0.2881, "step": 23002, "teacher_loss": 0.24951478838920593 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.5944644808769226, "learning_rate": 3.584622837105702e-06, "loss": 0.1979, "step": 23003, "teacher_loss": 0.1538289338350296 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.6565265655517578, "learning_rate": 3.5831495574221237e-06, "loss": 0.2787, "step": 23004, "teacher_loss": 0.23667272925376892 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.3300960063934326, "learning_rate": 3.5816765394926916e-06, "loss": 0.2137, "step": 23005, "teacher_loss": 0.20072150230407715 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.41751164197921753, "learning_rate": 3.5802037833511823e-06, "loss": 0.3082, "step": 23006, "teacher_loss": 0.2960692048072815 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.3950008153915405, "learning_rate": 3.5787312890313678e-06, "loss": 0.1687, "step": 23007, "teacher_loss": 0.14360690116882324 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.5454259514808655, "learning_rate": 3.5772590565670006e-06, "loss": 0.2142, "step": 23008, "teacher_loss": 0.17738831043243408 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.640907883644104, "learning_rate": 3.5757870859918266e-06, "loss": 0.2397, "step": 23009, "teacher_loss": 0.19507327675819397 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.5833874940872192, "learning_rate": 3.574315377339613e-06, "loss": 0.2279, "step": 23010, "teacher_loss": 0.1884416937828064 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.4555339217185974, "learning_rate": 3.5728439306440926e-06, "loss": 0.2772, "step": 23011, "teacher_loss": 0.25733864307403564 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.3392251431941986, "learning_rate": 3.5713727459389944e-06, "loss": 0.2683, "step": 23012, "teacher_loss": 0.26043885946273804 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.3541724681854248, "learning_rate": 3.5699018232580556e-06, "loss": 0.1978, "step": 23013, "teacher_loss": 0.18038895726203918 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.13383889198303223, "learning_rate": 3.5684311626350017e-06, "loss": 0.1369, "step": 23014, "teacher_loss": 0.1372849941253662 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.5277363657951355, "learning_rate": 3.566960764103545e-06, "loss": 0.3301, "step": 23015, "teacher_loss": 0.30815237760543823 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.44945448637008667, "learning_rate": 3.5654906276974e-06, "loss": 0.1915, "step": 23016, "teacher_loss": 0.1628149449825287 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.1567695587873459, "learning_rate": 3.5640207534502754e-06, "loss": 0.1412, "step": 23017, "teacher_loss": 0.13943041861057281 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.4491906762123108, "learning_rate": 3.562551141395869e-06, "loss": 0.2017, "step": 23018, "teacher_loss": 0.17420132458209991 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.17944678664207458, "learning_rate": 3.5610817915678647e-06, "loss": 0.1957, "step": 23019, "teacher_loss": 0.19749949872493744 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.2116556167602539, "learning_rate": 3.559612703999967e-06, "loss": 0.1822, "step": 23020, "teacher_loss": 0.178895965218544 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.3557807505130768, "learning_rate": 3.5581438787258513e-06, "loss": 0.1876, "step": 23021, "teacher_loss": 0.16890501976013184 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.7308270931243896, "learning_rate": 3.556675315779182e-06, "loss": 0.2261, "step": 23022, "teacher_loss": 0.16999118030071259 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.34969595074653625, "learning_rate": 3.5552070151936487e-06, "loss": 0.2091, "step": 23023, "teacher_loss": 0.19350463151931763 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.3997851014137268, "learning_rate": 3.553738977002905e-06, "loss": 0.2067, "step": 23024, "teacher_loss": 0.18525460362434387 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.34718644618988037, "learning_rate": 3.552271201240605e-06, "loss": 0.1876, "step": 23025, "teacher_loss": 0.16988863050937653 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.19922097027301788, "learning_rate": 3.550803687940404e-06, "loss": 0.1322, "step": 23026, "teacher_loss": 0.1247720718383789 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.22589808702468872, "learning_rate": 3.5493364371359526e-06, "loss": 0.1569, "step": 23027, "teacher_loss": 0.1492590606212616 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.26041728258132935, "learning_rate": 3.54786944886088e-06, "loss": 0.1933, "step": 23028, "teacher_loss": 0.1857980191707611 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.24987030029296875, "learning_rate": 3.5464027231488265e-06, "loss": 0.1806, "step": 23029, "teacher_loss": 0.1728602796792984 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.6884642839431763, "learning_rate": 3.5449362600334247e-06, "loss": 0.2773, "step": 23030, "teacher_loss": 0.23156386613845825 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.33983445167541504, "learning_rate": 3.543470059548288e-06, "loss": 0.166, "step": 23031, "teacher_loss": 0.14672136306762695 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.2822462022304535, "learning_rate": 3.5420041217270276e-06, "loss": 0.1874, "step": 23032, "teacher_loss": 0.17689284682273865 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.6106287240982056, "learning_rate": 3.5405384466032676e-06, "loss": 0.3592, "step": 23033, "teacher_loss": 0.3312756419181824 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.6505919694900513, "learning_rate": 3.5390730342106024e-06, "loss": 0.2125, "step": 23034, "teacher_loss": 0.16382786631584167 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.41064906120300293, "learning_rate": 3.537607884582629e-06, "loss": 0.1987, "step": 23035, "teacher_loss": 0.17516517639160156 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.33447691798210144, "learning_rate": 3.536142997752939e-06, "loss": 0.2958, "step": 23036, "teacher_loss": 0.29151907563209534 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.6125196218490601, "learning_rate": 3.5346783737551252e-06, "loss": 0.241, "step": 23037, "teacher_loss": 0.1997355818748474 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.5986076593399048, "learning_rate": 3.5332140126227576e-06, "loss": 0.2309, "step": 23038, "teacher_loss": 0.1900341659784317 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.33399444818496704, "learning_rate": 3.5317499143894124e-06, "loss": 0.1846, "step": 23039, "teacher_loss": 0.167948380112648 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.5682659149169922, "learning_rate": 3.5302860790886627e-06, "loss": 0.227, "step": 23040, "teacher_loss": 0.18908637762069702 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.475034236907959, "learning_rate": 3.52882250675406e-06, "loss": 0.2104, "step": 23041, "teacher_loss": 0.18101096153259277 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.3575937747955322, "learning_rate": 3.5273591974191665e-06, "loss": 0.3321, "step": 23042, "teacher_loss": 0.32922661304473877 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.17881804704666138, "learning_rate": 3.525896151117533e-06, "loss": 0.1648, "step": 23043, "teacher_loss": 0.16326507925987244 }, { "compression_loss": 0.0, "epoch": 4.16, "label_loss": 0.2862161099910736, "learning_rate": 3.5244333678826944e-06, "loss": 0.1842, "step": 23044, "teacher_loss": 0.17287206649780273 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.3862951993942261, "learning_rate": 3.522970847748196e-06, "loss": 0.2788, "step": 23045, "teacher_loss": 0.26684844493865967 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.14299936592578888, "learning_rate": 3.5215085907475685e-06, "loss": 0.1527, "step": 23046, "teacher_loss": 0.153724804520607 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.6448768377304077, "learning_rate": 3.5200465969143308e-06, "loss": 0.2262, "step": 23047, "teacher_loss": 0.17962735891342163 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.21429994702339172, "learning_rate": 3.518584866282009e-06, "loss": 0.1781, "step": 23048, "teacher_loss": 0.174087792634964 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.6824623942375183, "learning_rate": 3.5171233988841103e-06, "loss": 0.2198, "step": 23049, "teacher_loss": 0.16844826936721802 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.2713364362716675, "learning_rate": 3.515662194754145e-06, "loss": 0.1622, "step": 23050, "teacher_loss": 0.1501184105873108 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.29406094551086426, "learning_rate": 3.5142012539256172e-06, "loss": 0.2143, "step": 23051, "teacher_loss": 0.20543086528778076 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 1.391892433166504, "learning_rate": 3.5127405764320136e-06, "loss": 0.4066, "step": 23052, "teacher_loss": 0.2971389591693878 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.21659645438194275, "learning_rate": 3.5112801623068334e-06, "loss": 0.1823, "step": 23053, "teacher_loss": 0.17845438420772552 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.45537999272346497, "learning_rate": 3.5098200115835486e-06, "loss": 0.2846, "step": 23054, "teacher_loss": 0.26559895277023315 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.49798262119293213, "learning_rate": 3.5083601242956424e-06, "loss": 0.2561, "step": 23055, "teacher_loss": 0.229237362742424 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.2531549334526062, "learning_rate": 3.50690050047659e-06, "loss": 0.174, "step": 23056, "teacher_loss": 0.16520947217941284 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.06606639921665192, "learning_rate": 3.5054411401598454e-06, "loss": 0.1605, "step": 23057, "teacher_loss": 0.17101889848709106 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.28669625520706177, "learning_rate": 3.503982043378877e-06, "loss": 0.1367, "step": 23058, "teacher_loss": 0.12002532184123993 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.5697584748268127, "learning_rate": 3.5025232101671307e-06, "loss": 0.2316, "step": 23059, "teacher_loss": 0.19402465224266052 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.3674541711807251, "learning_rate": 3.5010646405580553e-06, "loss": 0.2789, "step": 23060, "teacher_loss": 0.2690128684043884 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.37795090675354004, "learning_rate": 3.4996063345850963e-06, "loss": 0.2091, "step": 23061, "teacher_loss": 0.19036495685577393 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.6862949728965759, "learning_rate": 3.498148292281679e-06, "loss": 0.3393, "step": 23062, "teacher_loss": 0.3007887005805969 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.4280327260494232, "learning_rate": 3.4966905136812386e-06, "loss": 0.212, "step": 23063, "teacher_loss": 0.18798241019248962 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.38071465492248535, "learning_rate": 3.4952329988172e-06, "loss": 0.191, "step": 23064, "teacher_loss": 0.16994601488113403 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.5661048889160156, "learning_rate": 3.4937757477229703e-06, "loss": 0.1905, "step": 23065, "teacher_loss": 0.1487686038017273 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.8560075759887695, "learning_rate": 3.492318760431965e-06, "loss": 0.3051, "step": 23066, "teacher_loss": 0.24387824535369873 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.2786880135536194, "learning_rate": 3.490862036977594e-06, "loss": 0.1809, "step": 23067, "teacher_loss": 0.16999632120132446 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.36139294505119324, "learning_rate": 3.4894055773932505e-06, "loss": 0.1684, "step": 23068, "teacher_loss": 0.146906316280365 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.19137410819530487, "learning_rate": 3.4879493817123174e-06, "loss": 0.1659, "step": 23069, "teacher_loss": 0.16308638453483582 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.378268837928772, "learning_rate": 3.4864934499681987e-06, "loss": 0.2321, "step": 23070, "teacher_loss": 0.21585942804813385 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.4817790985107422, "learning_rate": 3.485037782194266e-06, "loss": 0.1819, "step": 23071, "teacher_loss": 0.14856937527656555 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.6761884093284607, "learning_rate": 3.4835823784238885e-06, "loss": 0.2285, "step": 23072, "teacher_loss": 0.17873480916023254 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.6765734553337097, "learning_rate": 3.4821272386904403e-06, "loss": 0.2071, "step": 23073, "teacher_loss": 0.15498651564121246 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.4827902913093567, "learning_rate": 3.4806723630272857e-06, "loss": 0.2411, "step": 23074, "teacher_loss": 0.21424569189548492 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.4492906928062439, "learning_rate": 3.479217751467772e-06, "loss": 0.2292, "step": 23075, "teacher_loss": 0.20478612184524536 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.5636456608772278, "learning_rate": 3.4777634040452555e-06, "loss": 0.2305, "step": 23076, "teacher_loss": 0.19347113370895386 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.2304190993309021, "learning_rate": 3.476309320793083e-06, "loss": 0.1731, "step": 23077, "teacher_loss": 0.16676658391952515 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.2625107169151306, "learning_rate": 3.474855501744583e-06, "loss": 0.162, "step": 23078, "teacher_loss": 0.15083765983581543 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.7221174836158752, "learning_rate": 3.473401946933091e-06, "loss": 0.3914, "step": 23079, "teacher_loss": 0.35461288690567017 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.3117017447948456, "learning_rate": 3.4719486563919404e-06, "loss": 0.1907, "step": 23080, "teacher_loss": 0.17723682522773743 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.7119735479354858, "learning_rate": 3.470495630154444e-06, "loss": 0.2388, "step": 23081, "teacher_loss": 0.18624058365821838 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.3469555675983429, "learning_rate": 3.469042868253906e-06, "loss": 0.159, "step": 23082, "teacher_loss": 0.13811668753623962 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.30276572704315186, "learning_rate": 3.467590370723652e-06, "loss": 0.1792, "step": 23083, "teacher_loss": 0.165448397397995 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.5348036289215088, "learning_rate": 3.4661381375969743e-06, "loss": 0.2451, "step": 23084, "teacher_loss": 0.2129574567079544 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.36367231607437134, "learning_rate": 3.4646861689071654e-06, "loss": 0.1796, "step": 23085, "teacher_loss": 0.15916860103607178 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.3639659881591797, "learning_rate": 3.4632344646875187e-06, "loss": 0.1554, "step": 23086, "teacher_loss": 0.13227801024913788 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.3779885768890381, "learning_rate": 3.4617830249713197e-06, "loss": 0.2076, "step": 23087, "teacher_loss": 0.18865551054477692 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.177667498588562, "learning_rate": 3.460331849791838e-06, "loss": 0.1902, "step": 23088, "teacher_loss": 0.19158610701560974 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.2031850814819336, "learning_rate": 3.4588809391823506e-06, "loss": 0.2337, "step": 23089, "teacher_loss": 0.23713871836662292 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.5875095725059509, "learning_rate": 3.457430293176124e-06, "loss": 0.2912, "step": 23090, "teacher_loss": 0.25826495885849 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.881023645401001, "learning_rate": 3.4559799118064123e-06, "loss": 0.304, "step": 23091, "teacher_loss": 0.23992681503295898 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.36596783995628357, "learning_rate": 3.454529795106468e-06, "loss": 0.204, "step": 23092, "teacher_loss": 0.18600502610206604 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.5663193464279175, "learning_rate": 3.4530799431095466e-06, "loss": 0.2273, "step": 23093, "teacher_loss": 0.1896408349275589 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.5457674264907837, "learning_rate": 3.4516303558488814e-06, "loss": 0.201, "step": 23094, "teacher_loss": 0.16270089149475098 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.2736748158931732, "learning_rate": 3.4501810333576996e-06, "loss": 0.1995, "step": 23095, "teacher_loss": 0.19127756357192993 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.2578657865524292, "learning_rate": 3.4487319756692485e-06, "loss": 0.1627, "step": 23096, "teacher_loss": 0.15217159688472748 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.29563191533088684, "learning_rate": 3.4472831828167394e-06, "loss": 0.1679, "step": 23097, "teacher_loss": 0.15369385480880737 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.2139589488506317, "learning_rate": 3.445834654833387e-06, "loss": 0.1381, "step": 23098, "teacher_loss": 0.1296936273574829 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.5471444129943848, "learning_rate": 3.4443863917524027e-06, "loss": 0.2578, "step": 23099, "teacher_loss": 0.22570329904556274 }, { "compression_loss": 0.0, "epoch": 4.17, "label_loss": 0.47578898072242737, "learning_rate": 3.4429383936069986e-06, "loss": 0.3296, "step": 23100, "teacher_loss": 0.31338417530059814 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.4150957465171814, "learning_rate": 3.441490660430363e-06, "loss": 0.2109, "step": 23101, "teacher_loss": 0.18826086819171906 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.7113062143325806, "learning_rate": 3.440043192255693e-06, "loss": 0.2363, "step": 23102, "teacher_loss": 0.18353629112243652 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.20287221670150757, "learning_rate": 3.4385959891161767e-06, "loss": 0.1397, "step": 23103, "teacher_loss": 0.13266320526599884 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.3885650336742401, "learning_rate": 3.437149051044989e-06, "loss": 0.1565, "step": 23104, "teacher_loss": 0.1306624412536621 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.2524203062057495, "learning_rate": 3.4357023780753062e-06, "loss": 0.2213, "step": 23105, "teacher_loss": 0.21788470447063446 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.5326588749885559, "learning_rate": 3.4342559702402986e-06, "loss": 0.2417, "step": 23106, "teacher_loss": 0.2094140350818634 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.38849279284477234, "learning_rate": 3.432809827573122e-06, "loss": 0.2148, "step": 23107, "teacher_loss": 0.19549046456813812 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.8679608106613159, "learning_rate": 3.4313639501069423e-06, "loss": 0.3087, "step": 23108, "teacher_loss": 0.24660570919513702 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.6019550561904907, "learning_rate": 3.4299183378748964e-06, "loss": 0.3181, "step": 23109, "teacher_loss": 0.2865433692932129 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.482722669839859, "learning_rate": 3.4284729909101343e-06, "loss": 0.2053, "step": 23110, "teacher_loss": 0.17447781562805176 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.16539128124713898, "learning_rate": 3.4270279092457983e-06, "loss": 0.1799, "step": 23111, "teacher_loss": 0.18150529265403748 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.4296663999557495, "learning_rate": 3.4255830929150094e-06, "loss": 0.2324, "step": 23112, "teacher_loss": 0.21045240759849548 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.2847111225128174, "learning_rate": 3.424138541950899e-06, "loss": 0.199, "step": 23113, "teacher_loss": 0.18947505950927734 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.425641804933548, "learning_rate": 3.4226942563865894e-06, "loss": 0.2387, "step": 23114, "teacher_loss": 0.21792790293693542 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.20858192443847656, "learning_rate": 3.4212502362551865e-06, "loss": 0.1683, "step": 23115, "teacher_loss": 0.16383719444274902 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.41641461849212646, "learning_rate": 3.419806481589805e-06, "loss": 0.1497, "step": 23116, "teacher_loss": 0.12008485943078995 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.3464733958244324, "learning_rate": 3.418362992423536e-06, "loss": 0.1677, "step": 23117, "teacher_loss": 0.1478014588356018 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.26356565952301025, "learning_rate": 3.4169197687894817e-06, "loss": 0.1459, "step": 23118, "teacher_loss": 0.13278725743293762 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.4236745834350586, "learning_rate": 3.415476810720732e-06, "loss": 0.1926, "step": 23119, "teacher_loss": 0.16697412729263306 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.49042966961860657, "learning_rate": 3.4140341182503636e-06, "loss": 0.2033, "step": 23120, "teacher_loss": 0.17136478424072266 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.793599009513855, "learning_rate": 3.412591691411461e-06, "loss": 0.2855, "step": 23121, "teacher_loss": 0.22906804084777832 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.48320770263671875, "learning_rate": 3.4111495302370847e-06, "loss": 0.2039, "step": 23122, "teacher_loss": 0.17289745807647705 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.4167281985282898, "learning_rate": 3.4097076347603045e-06, "loss": 0.2219, "step": 23123, "teacher_loss": 0.20024336874485016 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.39261752367019653, "learning_rate": 3.4082660050141834e-06, "loss": 0.3335, "step": 23124, "teacher_loss": 0.3268764019012451 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.5964981317520142, "learning_rate": 3.406824641031765e-06, "loss": 0.2728, "step": 23125, "teacher_loss": 0.2368173599243164 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.7356168031692505, "learning_rate": 3.4053835428460994e-06, "loss": 0.2088, "step": 23126, "teacher_loss": 0.1502305269241333 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.2757972478866577, "learning_rate": 3.4039427104902303e-06, "loss": 0.1441, "step": 23127, "teacher_loss": 0.12949460744857788 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.2848433554172516, "learning_rate": 3.4025021439971847e-06, "loss": 0.2147, "step": 23128, "teacher_loss": 0.20693716406822205 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.09261728078126907, "learning_rate": 3.4010618433999928e-06, "loss": 0.1254, "step": 23129, "teacher_loss": 0.12908291816711426 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.4172527492046356, "learning_rate": 3.399621808731683e-06, "loss": 0.1994, "step": 23130, "teacher_loss": 0.17517180740833282 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.3694298565387726, "learning_rate": 3.3981820400252644e-06, "loss": 0.1904, "step": 23131, "teacher_loss": 0.17045944929122925 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.6036772727966309, "learning_rate": 3.3967425373137402e-06, "loss": 0.198, "step": 23132, "teacher_loss": 0.1529236137866974 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.3890681266784668, "learning_rate": 3.3953033006301286e-06, "loss": 0.2426, "step": 23133, "teacher_loss": 0.226350799202919 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.6746411323547363, "learning_rate": 3.393864330007419e-06, "loss": 0.2963, "step": 23134, "teacher_loss": 0.2542327344417572 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.5366601347923279, "learning_rate": 3.392425625478601e-06, "loss": 0.251, "step": 23135, "teacher_loss": 0.21928879618644714 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.7229174375534058, "learning_rate": 3.3909871870766607e-06, "loss": 0.2026, "step": 23136, "teacher_loss": 0.14477306604385376 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.16683848202228546, "learning_rate": 3.3895490148345824e-06, "loss": 0.2173, "step": 23137, "teacher_loss": 0.2228713035583496 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.5871052742004395, "learning_rate": 3.3881111087853324e-06, "loss": 0.2727, "step": 23138, "teacher_loss": 0.2377534806728363 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.42284300923347473, "learning_rate": 3.38667346896188e-06, "loss": 0.2072, "step": 23139, "teacher_loss": 0.18329398334026337 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.4151493310928345, "learning_rate": 3.385236095397191e-06, "loss": 0.1943, "step": 23140, "teacher_loss": 0.16978409886360168 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.41488131880760193, "learning_rate": 3.3837989881242142e-06, "loss": 0.1417, "step": 23141, "teacher_loss": 0.11133973300457001 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.565300703048706, "learning_rate": 3.3823621471758915e-06, "loss": 0.2247, "step": 23142, "teacher_loss": 0.18681365251541138 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.9429868459701538, "learning_rate": 3.380925572585183e-06, "loss": 0.2981, "step": 23143, "teacher_loss": 0.22640720009803772 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.25328555703163147, "learning_rate": 3.3794892643850138e-06, "loss": 0.1962, "step": 23144, "teacher_loss": 0.1898488700389862 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.269192099571228, "learning_rate": 3.3780532226083084e-06, "loss": 0.2468, "step": 23145, "teacher_loss": 0.24431046843528748 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.40381455421447754, "learning_rate": 3.3766174472880046e-06, "loss": 0.2304, "step": 23146, "teacher_loss": 0.2111397087574005 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.4522622525691986, "learning_rate": 3.3751819384570164e-06, "loss": 0.1818, "step": 23147, "teacher_loss": 0.1517300307750702 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.3893733620643616, "learning_rate": 3.3737466961482477e-06, "loss": 0.3172, "step": 23148, "teacher_loss": 0.30912572145462036 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.18304932117462158, "learning_rate": 3.3723117203946102e-06, "loss": 0.188, "step": 23149, "teacher_loss": 0.18857598304748535 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.10586945712566376, "learning_rate": 3.3708770112290076e-06, "loss": 0.173, "step": 23150, "teacher_loss": 0.18046292662620544 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.16860730946063995, "learning_rate": 3.3694425686843257e-06, "loss": 0.192, "step": 23151, "teacher_loss": 0.19455364346504211 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.33729445934295654, "learning_rate": 3.368008392793454e-06, "loss": 0.2079, "step": 23152, "teacher_loss": 0.19349642097949982 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.6588553190231323, "learning_rate": 3.3665744835892804e-06, "loss": 0.2214, "step": 23153, "teacher_loss": 0.17283034324645996 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.23850327730178833, "learning_rate": 3.3651408411046713e-06, "loss": 0.1432, "step": 23154, "teacher_loss": 0.13265465199947357 }, { "compression_loss": 0.0, "epoch": 4.18, "label_loss": 0.4854457378387451, "learning_rate": 3.363707465372499e-06, "loss": 0.2116, "step": 23155, "teacher_loss": 0.18115684390068054 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.2723848223686218, "learning_rate": 3.362274356425631e-06, "loss": 0.1509, "step": 23156, "teacher_loss": 0.13745540380477905 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 1.0670541524887085, "learning_rate": 3.3608415142969216e-06, "loss": 0.2708, "step": 23157, "teacher_loss": 0.1823194921016693 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.19311510026454926, "learning_rate": 3.359408939019214e-06, "loss": 0.1402, "step": 23158, "teacher_loss": 0.13434293866157532 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.3140757083892822, "learning_rate": 3.357976630625361e-06, "loss": 0.1709, "step": 23159, "teacher_loss": 0.15504290163516998 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.30423688888549805, "learning_rate": 3.3565445891482022e-06, "loss": 0.2463, "step": 23160, "teacher_loss": 0.23991422355175018 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.47882339358329773, "learning_rate": 3.355112814620564e-06, "loss": 0.2395, "step": 23161, "teacher_loss": 0.21287930011749268 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.3179911673069, "learning_rate": 3.353681307075275e-06, "loss": 0.1748, "step": 23162, "teacher_loss": 0.15884165465831757 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.3309684097766876, "learning_rate": 3.352250066545162e-06, "loss": 0.2097, "step": 23163, "teacher_loss": 0.1961754560470581 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.2728070020675659, "learning_rate": 3.3508190930630267e-06, "loss": 0.1953, "step": 23164, "teacher_loss": 0.18664434552192688 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.2471676468849182, "learning_rate": 3.3493883866616844e-06, "loss": 0.2007, "step": 23165, "teacher_loss": 0.19557629525661469 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.3898439407348633, "learning_rate": 3.3479579473739396e-06, "loss": 0.2091, "step": 23166, "teacher_loss": 0.18903875350952148 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.526746392250061, "learning_rate": 3.346527775232582e-06, "loss": 0.2977, "step": 23167, "teacher_loss": 0.2722380757331848 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.30383870005607605, "learning_rate": 3.3450978702704026e-06, "loss": 0.1954, "step": 23168, "teacher_loss": 0.18331778049468994 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.7254639863967896, "learning_rate": 3.3436682325201894e-06, "loss": 0.2012, "step": 23169, "teacher_loss": 0.14290496706962585 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.28834739327430725, "learning_rate": 3.3422388620147126e-06, "loss": 0.206, "step": 23170, "teacher_loss": 0.1968344897031784 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.3278650641441345, "learning_rate": 3.3408097587867508e-06, "loss": 0.1576, "step": 23171, "teacher_loss": 0.13863252103328705 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.5140213966369629, "learning_rate": 3.3393809228690615e-06, "loss": 0.2292, "step": 23172, "teacher_loss": 0.19749847054481506 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.3850061297416687, "learning_rate": 3.3379523542944064e-06, "loss": 0.2267, "step": 23173, "teacher_loss": 0.20907780528068542 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.4654046595096588, "learning_rate": 3.336524053095544e-06, "loss": 0.1822, "step": 23174, "teacher_loss": 0.15067876875400543 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.6908611059188843, "learning_rate": 3.3350960193052128e-06, "loss": 0.2103, "step": 23175, "teacher_loss": 0.15692821145057678 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.36671182513237, "learning_rate": 3.33366825295616e-06, "loss": 0.1861, "step": 23176, "teacher_loss": 0.16601984202861786 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.12948335707187653, "learning_rate": 3.332240754081112e-06, "loss": 0.1298, "step": 23177, "teacher_loss": 0.12986937165260315 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.47745347023010254, "learning_rate": 3.330813522712803e-06, "loss": 0.1947, "step": 23178, "teacher_loss": 0.1632394790649414 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.4009149372577667, "learning_rate": 3.3293865588839572e-06, "loss": 0.1994, "step": 23179, "teacher_loss": 0.17702603340148926 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.4606701135635376, "learning_rate": 3.327959862627283e-06, "loss": 0.2311, "step": 23180, "teacher_loss": 0.2056129425764084 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.39936357736587524, "learning_rate": 3.326533433975498e-06, "loss": 0.214, "step": 23181, "teacher_loss": 0.19342336058616638 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.4957432150840759, "learning_rate": 3.325107272961301e-06, "loss": 0.285, "step": 23182, "teacher_loss": 0.2616129517555237 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.3253798186779022, "learning_rate": 3.3236813796173874e-06, "loss": 0.1679, "step": 23183, "teacher_loss": 0.1504509598016739 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.41809338331222534, "learning_rate": 3.322255753976459e-06, "loss": 0.2774, "step": 23184, "teacher_loss": 0.26179519295692444 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.2753937244415283, "learning_rate": 3.3208303960711895e-06, "loss": 0.2219, "step": 23185, "teacher_loss": 0.21598400175571442 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.4757876396179199, "learning_rate": 3.319405305934264e-06, "loss": 0.22, "step": 23186, "teacher_loss": 0.19157151877880096 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.28362560272216797, "learning_rate": 3.317980483598358e-06, "loss": 0.1972, "step": 23187, "teacher_loss": 0.18758606910705566 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.4339344799518585, "learning_rate": 3.3165559290961305e-06, "loss": 0.2347, "step": 23188, "teacher_loss": 0.21256569027900696 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.22173359990119934, "learning_rate": 3.3151316424602473e-06, "loss": 0.3242, "step": 23189, "teacher_loss": 0.33562013506889343 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.3903476595878601, "learning_rate": 3.313707623723367e-06, "loss": 0.1981, "step": 23190, "teacher_loss": 0.176687091588974 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.3558241128921509, "learning_rate": 3.3122838729181337e-06, "loss": 0.2486, "step": 23191, "teacher_loss": 0.23665130138397217 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.17406710982322693, "learning_rate": 3.3108603900771796e-06, "loss": 0.1564, "step": 23192, "teacher_loss": 0.15439894795417786 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.23174987733364105, "learning_rate": 3.3094371752331614e-06, "loss": 0.1345, "step": 23193, "teacher_loss": 0.123715341091156 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.2956083416938782, "learning_rate": 3.308014228418697e-06, "loss": 0.2285, "step": 23194, "teacher_loss": 0.22104580700397491 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.5295372605323792, "learning_rate": 3.3065915496664036e-06, "loss": 0.2556, "step": 23195, "teacher_loss": 0.22513464093208313 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.3629109263420105, "learning_rate": 3.305169139008916e-06, "loss": 0.1687, "step": 23196, "teacher_loss": 0.1470872014760971 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.5343495011329651, "learning_rate": 3.3037469964788375e-06, "loss": 0.2891, "step": 23197, "teacher_loss": 0.26180732250213623 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.5343927145004272, "learning_rate": 3.3023251221087696e-06, "loss": 0.1842, "step": 23198, "teacher_loss": 0.14534254372119904 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.13145811855793, "learning_rate": 3.3009035159313133e-06, "loss": 0.1611, "step": 23199, "teacher_loss": 0.16436973214149475 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.45168089866638184, "learning_rate": 3.2994821779790692e-06, "loss": 0.2444, "step": 23200, "teacher_loss": 0.22133547067642212 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.24440333247184753, "learning_rate": 3.2980611082846155e-06, "loss": 0.2401, "step": 23201, "teacher_loss": 0.23964785039424896 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.34215888381004333, "learning_rate": 3.2966403068805354e-06, "loss": 0.1438, "step": 23202, "teacher_loss": 0.12181514501571655 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.7256677746772766, "learning_rate": 3.295219773799408e-06, "loss": 0.2474, "step": 23203, "teacher_loss": 0.19421373307704926 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.4638415277004242, "learning_rate": 3.2937995090737994e-06, "loss": 0.1843, "step": 23204, "teacher_loss": 0.1532231718301773 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.4606209695339203, "learning_rate": 3.292379512736263e-06, "loss": 0.2317, "step": 23205, "teacher_loss": 0.20623856782913208 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.7658290863037109, "learning_rate": 3.2909597848193703e-06, "loss": 0.2832, "step": 23206, "teacher_loss": 0.2295701801776886 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.43750715255737305, "learning_rate": 3.2895403253556637e-06, "loss": 0.2398, "step": 23207, "teacher_loss": 0.21785637736320496 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.30773842334747314, "learning_rate": 3.2881211343776845e-06, "loss": 0.2379, "step": 23208, "teacher_loss": 0.23010963201522827 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.4041637182235718, "learning_rate": 3.286702211917975e-06, "loss": 0.2011, "step": 23209, "teacher_loss": 0.17848192155361176 }, { "compression_loss": 0.0, "epoch": 4.19, "label_loss": 0.36244457960128784, "learning_rate": 3.285283558009068e-06, "loss": 0.2011, "step": 23210, "teacher_loss": 0.18321293592453003 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.46524864435195923, "learning_rate": 3.2838651726834818e-06, "loss": 0.3107, "step": 23211, "teacher_loss": 0.29352104663848877 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.41521868109703064, "learning_rate": 3.2824470559737417e-06, "loss": 0.1819, "step": 23212, "teacher_loss": 0.15600429475307465 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.48029810190200806, "learning_rate": 3.281029207912364e-06, "loss": 0.2252, "step": 23213, "teacher_loss": 0.19683459401130676 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.41815704107284546, "learning_rate": 3.279611628531845e-06, "loss": 0.1873, "step": 23214, "teacher_loss": 0.16170324385166168 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.22239267826080322, "learning_rate": 3.278194317864693e-06, "loss": 0.1346, "step": 23215, "teacher_loss": 0.12484588474035263 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.3809296190738678, "learning_rate": 3.276777275943406e-06, "loss": 0.2397, "step": 23216, "teacher_loss": 0.22396604716777802 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.42656710743904114, "learning_rate": 3.275360502800464e-06, "loss": 0.2285, "step": 23217, "teacher_loss": 0.20649024844169617 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.34348562359809875, "learning_rate": 3.273943998468354e-06, "loss": 0.2613, "step": 23218, "teacher_loss": 0.25216394662857056 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.1662067323923111, "learning_rate": 3.272527762979553e-06, "loss": 0.1574, "step": 23219, "teacher_loss": 0.15641099214553833 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.4828524589538574, "learning_rate": 3.2711117963665322e-06, "loss": 0.1973, "step": 23220, "teacher_loss": 0.16558346152305603 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.4261665940284729, "learning_rate": 3.2696960986617486e-06, "loss": 0.181, "step": 23221, "teacher_loss": 0.15380455553531647 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.653442919254303, "learning_rate": 3.2682806698976633e-06, "loss": 0.2423, "step": 23222, "teacher_loss": 0.19660450518131256 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.36236006021499634, "learning_rate": 3.266865510106733e-06, "loss": 0.168, "step": 23223, "teacher_loss": 0.14637230336666107 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.9944034218788147, "learning_rate": 3.265450619321394e-06, "loss": 0.298, "step": 23224, "teacher_loss": 0.22064675390720367 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.2331797480583191, "learning_rate": 3.264035997574092e-06, "loss": 0.2021, "step": 23225, "teacher_loss": 0.19866512715816498 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.16841256618499756, "learning_rate": 3.262621644897261e-06, "loss": 0.1946, "step": 23226, "teacher_loss": 0.19747957587242126 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.8005828857421875, "learning_rate": 3.261207561323321e-06, "loss": 0.3342, "step": 23227, "teacher_loss": 0.28238290548324585 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.27268239855766296, "learning_rate": 3.259793746884697e-06, "loss": 0.193, "step": 23228, "teacher_loss": 0.1841348111629486 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.27460533380508423, "learning_rate": 3.258380201613808e-06, "loss": 0.1354, "step": 23229, "teacher_loss": 0.11998498439788818 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.4222524166107178, "learning_rate": 3.2569669255430545e-06, "loss": 0.24, "step": 23230, "teacher_loss": 0.21979591250419617 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.23753851652145386, "learning_rate": 3.2555539187048455e-06, "loss": 0.1285, "step": 23231, "teacher_loss": 0.11642719060182571 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.5014715790748596, "learning_rate": 3.2541411811315685e-06, "loss": 0.248, "step": 23232, "teacher_loss": 0.21984541416168213 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.3061332106590271, "learning_rate": 3.2527287128556195e-06, "loss": 0.1705, "step": 23233, "teacher_loss": 0.15537869930267334 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.5836219787597656, "learning_rate": 3.2513165139093837e-06, "loss": 0.2233, "step": 23234, "teacher_loss": 0.1832505315542221 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.4008907973766327, "learning_rate": 3.2499045843252324e-06, "loss": 0.1946, "step": 23235, "teacher_loss": 0.1716565489768982 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.2025545835494995, "learning_rate": 3.248492924135541e-06, "loss": 0.1781, "step": 23236, "teacher_loss": 0.17538204789161682 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.07936612516641617, "learning_rate": 3.247081533372677e-06, "loss": 0.1837, "step": 23237, "teacher_loss": 0.1952945590019226 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.4083752930164337, "learning_rate": 3.2456704120689933e-06, "loss": 0.2179, "step": 23238, "teacher_loss": 0.1967182457447052 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.23880809545516968, "learning_rate": 3.2442595602568486e-06, "loss": 0.2351, "step": 23239, "teacher_loss": 0.23464131355285645 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.1807381957769394, "learning_rate": 3.2428489779685837e-06, "loss": 0.141, "step": 23240, "teacher_loss": 0.1366264373064041 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.6207588315010071, "learning_rate": 3.241438665236541e-06, "loss": 0.5384, "step": 23241, "teacher_loss": 0.5292384624481201 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.539265513420105, "learning_rate": 3.240028622093062e-06, "loss": 0.2652, "step": 23242, "teacher_loss": 0.234774649143219 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.22803576290607452, "learning_rate": 3.2386188485704614e-06, "loss": 0.1169, "step": 23243, "teacher_loss": 0.10460447520017624 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 1.2326433658599854, "learning_rate": 3.237209344701074e-06, "loss": 0.3174, "step": 23244, "teacher_loss": 0.21570897102355957 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.47658029198646545, "learning_rate": 3.2358001105172057e-06, "loss": 0.2152, "step": 23245, "teacher_loss": 0.1861284077167511 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.3026275038719177, "learning_rate": 3.234391146051169e-06, "loss": 0.1742, "step": 23246, "teacher_loss": 0.1599264144897461 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.4877398610115051, "learning_rate": 3.2329824513352723e-06, "loss": 0.277, "step": 23247, "teacher_loss": 0.25361716747283936 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.5098623633384705, "learning_rate": 3.2315740264018054e-06, "loss": 0.2111, "step": 23248, "teacher_loss": 0.17791599035263062 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.17468449473381042, "learning_rate": 3.2301658712830627e-06, "loss": 0.1834, "step": 23249, "teacher_loss": 0.1843147575855255 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.16486650705337524, "learning_rate": 3.2287579860113314e-06, "loss": 0.1183, "step": 23250, "teacher_loss": 0.113125279545784 }, { "epoch": 4.2, "eval_exact_match": 80.32166508987702, "eval_f1": 87.6927754004118, "step": 23250 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.462316632270813, "learning_rate": 3.2273503706188854e-06, "loss": 0.2153, "step": 23251, "teacher_loss": 0.1878376007080078 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.4400089979171753, "learning_rate": 3.2259430251379978e-06, "loss": 0.2136, "step": 23252, "teacher_loss": 0.1884135603904724 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.16478344798088074, "learning_rate": 3.2245359496009407e-06, "loss": 0.1776, "step": 23253, "teacher_loss": 0.17907708883285522 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.22347962856292725, "learning_rate": 3.2231291440399685e-06, "loss": 0.1682, "step": 23254, "teacher_loss": 0.16203506290912628 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 1.0127646923065186, "learning_rate": 3.2217226084873284e-06, "loss": 0.3603, "step": 23255, "teacher_loss": 0.28783291578292847 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.29524099826812744, "learning_rate": 3.2203163429752853e-06, "loss": 0.2296, "step": 23256, "teacher_loss": 0.2222689986228943 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.23543590307235718, "learning_rate": 3.218910347536069e-06, "loss": 0.1237, "step": 23257, "teacher_loss": 0.11124849319458008 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.1931852400302887, "learning_rate": 3.217504622201915e-06, "loss": 0.143, "step": 23258, "teacher_loss": 0.13747794926166534 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.4095076024532318, "learning_rate": 3.2160991670050535e-06, "loss": 0.1933, "step": 23259, "teacher_loss": 0.1692245602607727 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.482041597366333, "learning_rate": 3.2146939819777115e-06, "loss": 0.2066, "step": 23260, "teacher_loss": 0.17600588500499725 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.7865533828735352, "learning_rate": 3.2132890671520992e-06, "loss": 0.2448, "step": 23261, "teacher_loss": 0.1845979243516922 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.6000989675521851, "learning_rate": 3.211884422560429e-06, "loss": 0.2525, "step": 23262, "teacher_loss": 0.21391861140727997 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 1.0954487323760986, "learning_rate": 3.210480048234911e-06, "loss": 0.2698, "step": 23263, "teacher_loss": 0.17809641361236572 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.3608379065990448, "learning_rate": 3.2090759442077406e-06, "loss": 0.2233, "step": 23264, "teacher_loss": 0.2079797387123108 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.4619797170162201, "learning_rate": 3.2076721105110964e-06, "loss": 0.2726, "step": 23265, "teacher_loss": 0.2515532970428467 }, { "compression_loss": 0.0, "epoch": 4.2, "label_loss": 0.7716729640960693, "learning_rate": 3.206268547177186e-06, "loss": 0.2792, "step": 23266, "teacher_loss": 0.22444219887256622 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.7559354305267334, "learning_rate": 3.2048652542381767e-06, "loss": 0.2383, "step": 23267, "teacher_loss": 0.18077275156974792 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.4962038993835449, "learning_rate": 3.203462231726237e-06, "loss": 0.2798, "step": 23268, "teacher_loss": 0.25574052333831787 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.40254688262939453, "learning_rate": 3.202059479673547e-06, "loss": 0.1964, "step": 23269, "teacher_loss": 0.17348355054855347 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.30453866720199585, "learning_rate": 3.200656998112263e-06, "loss": 0.2514, "step": 23270, "teacher_loss": 0.24550817906856537 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.3186757564544678, "learning_rate": 3.1992547870745326e-06, "loss": 0.2608, "step": 23271, "teacher_loss": 0.25441813468933105 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 1.0288543701171875, "learning_rate": 3.197852846592508e-06, "loss": 0.2775, "step": 23272, "teacher_loss": 0.1940314769744873 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.5421605706214905, "learning_rate": 3.1964511766983394e-06, "loss": 0.232, "step": 23273, "teacher_loss": 0.19748297333717346 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 1.2474877834320068, "learning_rate": 3.1950497774241504e-06, "loss": 0.4592, "step": 23274, "teacher_loss": 0.3715963661670685 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 1.3529140949249268, "learning_rate": 3.193648648802079e-06, "loss": 0.2799, "step": 23275, "teacher_loss": 0.16063448786735535 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.6370127201080322, "learning_rate": 3.192247790864249e-06, "loss": 0.2675, "step": 23276, "teacher_loss": 0.22643055021762848 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.2137598842382431, "learning_rate": 3.1908472036427733e-06, "loss": 0.2129, "step": 23277, "teacher_loss": 0.21281951665878296 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.360918253660202, "learning_rate": 3.1894468871697647e-06, "loss": 0.1479, "step": 23278, "teacher_loss": 0.12421050667762756 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.46242764592170715, "learning_rate": 3.188046841477332e-06, "loss": 0.2461, "step": 23279, "teacher_loss": 0.22201907634735107 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.6964929103851318, "learning_rate": 3.186647066597569e-06, "loss": 0.2589, "step": 23280, "teacher_loss": 0.21029508113861084 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.38443073630332947, "learning_rate": 3.185247562562574e-06, "loss": 0.3303, "step": 23281, "teacher_loss": 0.3242555260658264 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.28113818168640137, "learning_rate": 3.1838483294044246e-06, "loss": 0.248, "step": 23282, "teacher_loss": 0.24429091811180115 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.21890994906425476, "learning_rate": 3.1824493671552123e-06, "loss": 0.1295, "step": 23283, "teacher_loss": 0.11961519718170166 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.13107125461101532, "learning_rate": 3.1810506758469997e-06, "loss": 0.1761, "step": 23284, "teacher_loss": 0.18107828497886658 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.21407577395439148, "learning_rate": 3.179652255511861e-06, "loss": 0.1282, "step": 23285, "teacher_loss": 0.11870527267456055 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.43053627014160156, "learning_rate": 3.178254106181859e-06, "loss": 0.1841, "step": 23286, "teacher_loss": 0.15671546757221222 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.36164426803588867, "learning_rate": 3.176856227889045e-06, "loss": 0.1909, "step": 23287, "teacher_loss": 0.17188404500484467 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.6132554411888123, "learning_rate": 3.1754586206654695e-06, "loss": 0.2177, "step": 23288, "teacher_loss": 0.17375633120536804 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.41749054193496704, "learning_rate": 3.1740612845431806e-06, "loss": 0.2477, "step": 23289, "teacher_loss": 0.2288322150707245 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.21687300503253937, "learning_rate": 3.1726642195542057e-06, "loss": 0.2279, "step": 23290, "teacher_loss": 0.2291523665189743 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.2819981575012207, "learning_rate": 3.171267425730579e-06, "loss": 0.1648, "step": 23291, "teacher_loss": 0.15180012583732605 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.3829386532306671, "learning_rate": 3.169870903104332e-06, "loss": 0.2372, "step": 23292, "teacher_loss": 0.22096776962280273 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.29942187666893005, "learning_rate": 3.168474651707471e-06, "loss": 0.2127, "step": 23293, "teacher_loss": 0.2031051218509674 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.44084835052490234, "learning_rate": 3.167078671572018e-06, "loss": 0.2754, "step": 23294, "teacher_loss": 0.2570471167564392 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.27950647473335266, "learning_rate": 3.165682962729971e-06, "loss": 0.1566, "step": 23295, "teacher_loss": 0.14290569722652435 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.6405006647109985, "learning_rate": 3.1642875252133315e-06, "loss": 0.2043, "step": 23296, "teacher_loss": 0.1558743715286255 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.8259952664375305, "learning_rate": 3.162892359054098e-06, "loss": 0.3018, "step": 23297, "teacher_loss": 0.24350564181804657 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.24093598127365112, "learning_rate": 3.1614974642842493e-06, "loss": 0.132, "step": 23298, "teacher_loss": 0.11984948068857193 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.28399449586868286, "learning_rate": 3.160102840935769e-06, "loss": 0.1869, "step": 23299, "teacher_loss": 0.17612066864967346 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.2547873258590698, "learning_rate": 3.1587084890406386e-06, "loss": 0.1779, "step": 23300, "teacher_loss": 0.1693544238805771 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.5891237258911133, "learning_rate": 3.157314408630816e-06, "loss": 0.2638, "step": 23301, "teacher_loss": 0.22761720418930054 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.3508014678955078, "learning_rate": 3.15592059973827e-06, "loss": 0.2122, "step": 23302, "teacher_loss": 0.19679348170757294 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.3223114311695099, "learning_rate": 3.1545270623949526e-06, "loss": 0.2913, "step": 23303, "teacher_loss": 0.2878662347793579 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.44774654507637024, "learning_rate": 3.153133796632819e-06, "loss": 0.1954, "step": 23304, "teacher_loss": 0.16733747720718384 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.16391853988170624, "learning_rate": 3.151740802483802e-06, "loss": 0.1794, "step": 23305, "teacher_loss": 0.18116486072540283 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.312136173248291, "learning_rate": 3.1503480799798475e-06, "loss": 0.193, "step": 23306, "teacher_loss": 0.17973214387893677 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.45220401883125305, "learning_rate": 3.1489556291528883e-06, "loss": 0.1993, "step": 23307, "teacher_loss": 0.17115429043769836 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.2904306650161743, "learning_rate": 3.147563450034841e-06, "loss": 0.1493, "step": 23308, "teacher_loss": 0.13363364338874817 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.8664333820343018, "learning_rate": 3.1461715426576287e-06, "loss": 0.2861, "step": 23309, "teacher_loss": 0.2216053456068039 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.1970183253288269, "learning_rate": 3.1447799070531674e-06, "loss": 0.1642, "step": 23310, "teacher_loss": 0.16052848100662231 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.4456264078617096, "learning_rate": 3.143388543253355e-06, "loss": 0.22, "step": 23311, "teacher_loss": 0.19491145014762878 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.5036458373069763, "learning_rate": 3.1419974512900956e-06, "loss": 0.1892, "step": 23312, "teacher_loss": 0.1542593240737915 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.20315611362457275, "learning_rate": 3.1406066311952856e-06, "loss": 0.1628, "step": 23313, "teacher_loss": 0.15834292769432068 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.48135173320770264, "learning_rate": 3.1392160830008097e-06, "loss": 0.169, "step": 23314, "teacher_loss": 0.13425378501415253 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.412717342376709, "learning_rate": 3.137825806738541e-06, "loss": 0.2068, "step": 23315, "teacher_loss": 0.18387356400489807 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.2765898108482361, "learning_rate": 3.1364358024403693e-06, "loss": 0.2047, "step": 23316, "teacher_loss": 0.19671399891376495 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.49432748556137085, "learning_rate": 3.1350460701381554e-06, "loss": 0.2564, "step": 23317, "teacher_loss": 0.2300100326538086 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.27979224920272827, "learning_rate": 3.1336566098637553e-06, "loss": 0.2189, "step": 23318, "teacher_loss": 0.21210408210754395 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.28960633277893066, "learning_rate": 3.132267421649038e-06, "loss": 0.2297, "step": 23319, "teacher_loss": 0.22302685678005219 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.3693777322769165, "learning_rate": 3.1308785055258506e-06, "loss": 0.1768, "step": 23320, "teacher_loss": 0.15540450811386108 }, { "compression_loss": 0.0, "epoch": 4.21, "label_loss": 0.38619837164878845, "learning_rate": 3.1294898615260263e-06, "loss": 0.1737, "step": 23321, "teacher_loss": 0.15005414187908173 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.6213864088058472, "learning_rate": 3.1281014896814126e-06, "loss": 0.2326, "step": 23322, "teacher_loss": 0.18942174315452576 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.3797953128814697, "learning_rate": 3.12671339002384e-06, "loss": 0.1887, "step": 23323, "teacher_loss": 0.16748473048210144 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.4347117841243744, "learning_rate": 3.125325562585128e-06, "loss": 0.1879, "step": 23324, "teacher_loss": 0.16047623753547668 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.32720768451690674, "learning_rate": 3.1239380073971e-06, "loss": 0.2056, "step": 23325, "teacher_loss": 0.1921185702085495 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.2594717741012573, "learning_rate": 3.1225507244915696e-06, "loss": 0.1398, "step": 23326, "teacher_loss": 0.1264829784631729 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.29920393228530884, "learning_rate": 3.121163713900341e-06, "loss": 0.159, "step": 23327, "teacher_loss": 0.14343662559986115 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.8305924534797668, "learning_rate": 3.119776975655206e-06, "loss": 0.3161, "step": 23328, "teacher_loss": 0.25896286964416504 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.2483414113521576, "learning_rate": 3.118390509787973e-06, "loss": 0.1554, "step": 23329, "teacher_loss": 0.14505809545516968 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.3258824646472931, "learning_rate": 3.1170043163304245e-06, "loss": 0.1936, "step": 23330, "teacher_loss": 0.1788853108882904 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.2784022092819214, "learning_rate": 3.115618395314336e-06, "loss": 0.2354, "step": 23331, "teacher_loss": 0.23062962293624878 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.3397368788719177, "learning_rate": 3.114232746771484e-06, "loss": 0.2037, "step": 23332, "teacher_loss": 0.18862146139144897 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.5959842205047607, "learning_rate": 3.1128473707336463e-06, "loss": 0.1875, "step": 23333, "teacher_loss": 0.1421494483947754 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.3117603063583374, "learning_rate": 3.1114622672325747e-06, "loss": 0.199, "step": 23334, "teacher_loss": 0.18651226162910461 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.3570409119129181, "learning_rate": 3.110077436300028e-06, "loss": 0.2134, "step": 23335, "teacher_loss": 0.1973954141139984 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.4300231337547302, "learning_rate": 3.108692877967763e-06, "loss": 0.1775, "step": 23336, "teacher_loss": 0.1494424045085907 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.290812611579895, "learning_rate": 3.107308592267514e-06, "loss": 0.1688, "step": 23337, "teacher_loss": 0.1552354097366333 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.4182751774787903, "learning_rate": 3.1059245792310224e-06, "loss": 0.2881, "step": 23338, "teacher_loss": 0.27362892031669617 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.35841047763824463, "learning_rate": 3.104540838890026e-06, "loss": 0.2121, "step": 23339, "teacher_loss": 0.19583261013031006 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.2748536169528961, "learning_rate": 3.1031573712762368e-06, "loss": 0.2067, "step": 23340, "teacher_loss": 0.19914984703063965 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.22894063591957092, "learning_rate": 3.1017741764213832e-06, "loss": 0.1727, "step": 23341, "teacher_loss": 0.16640454530715942 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.569707453250885, "learning_rate": 3.100391254357178e-06, "loss": 0.2365, "step": 23342, "teacher_loss": 0.19945085048675537 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.32030758261680603, "learning_rate": 3.0990086051153236e-06, "loss": 0.1776, "step": 23343, "teacher_loss": 0.1617041975259781 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.13490645587444305, "learning_rate": 3.0976262287275175e-06, "loss": 0.1313, "step": 23344, "teacher_loss": 0.13087283074855804 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.5579209923744202, "learning_rate": 3.096244125225457e-06, "loss": 0.2962, "step": 23345, "teacher_loss": 0.26711922883987427 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.33370986580848694, "learning_rate": 3.0948622946408317e-06, "loss": 0.2387, "step": 23346, "teacher_loss": 0.2281380295753479 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.43069136142730713, "learning_rate": 3.093480737005318e-06, "loss": 0.1978, "step": 23347, "teacher_loss": 0.17197491228580475 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.1749725043773651, "learning_rate": 3.0920994523505943e-06, "loss": 0.1197, "step": 23348, "teacher_loss": 0.11355361342430115 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.20738239586353302, "learning_rate": 3.090718440708331e-06, "loss": 0.1404, "step": 23349, "teacher_loss": 0.13300752639770508 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.468522310256958, "learning_rate": 3.0893377021101854e-06, "loss": 0.2255, "step": 23350, "teacher_loss": 0.19853244721889496 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.30323949456214905, "learning_rate": 3.0879572365878152e-06, "loss": 0.1649, "step": 23351, "teacher_loss": 0.1495271772146225 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.5940144062042236, "learning_rate": 3.0865770441728747e-06, "loss": 0.282, "step": 23352, "teacher_loss": 0.24735905230045319 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.27005207538604736, "learning_rate": 3.0851971248970005e-06, "loss": 0.1364, "step": 23353, "teacher_loss": 0.12155468761920929 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.27472546696662903, "learning_rate": 3.0838174787918395e-06, "loss": 0.1369, "step": 23354, "teacher_loss": 0.12157252430915833 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.2313234806060791, "learning_rate": 3.0824381058890123e-06, "loss": 0.2621, "step": 23355, "teacher_loss": 0.26557034254074097 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.3240395188331604, "learning_rate": 3.0810590062201487e-06, "loss": 0.1828, "step": 23356, "teacher_loss": 0.1671273410320282 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.6955951452255249, "learning_rate": 3.0796801798168707e-06, "loss": 0.2239, "step": 23357, "teacher_loss": 0.17153066396713257 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.4184282422065735, "learning_rate": 3.078301626710782e-06, "loss": 0.2141, "step": 23358, "teacher_loss": 0.19143730401992798 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.424055278301239, "learning_rate": 3.0769233469334967e-06, "loss": 0.2649, "step": 23359, "teacher_loss": 0.24721673130989075 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.43032824993133545, "learning_rate": 3.075545340516614e-06, "loss": 0.2277, "step": 23360, "teacher_loss": 0.2052096128463745 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.4023629426956177, "learning_rate": 3.0741676074917217e-06, "loss": 0.2298, "step": 23361, "teacher_loss": 0.2106795608997345 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.2856014668941498, "learning_rate": 3.0727901478904103e-06, "loss": 0.2378, "step": 23362, "teacher_loss": 0.23252660036087036 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.5177009701728821, "learning_rate": 3.0714129617442652e-06, "loss": 0.2168, "step": 23363, "teacher_loss": 0.18339797854423523 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.591826319694519, "learning_rate": 3.0700360490848588e-06, "loss": 0.2162, "step": 23364, "teacher_loss": 0.17446091771125793 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.393990695476532, "learning_rate": 3.0686594099437522e-06, "loss": 0.1877, "step": 23365, "teacher_loss": 0.1647799015045166 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.4505118727684021, "learning_rate": 3.067283044352513e-06, "loss": 0.2317, "step": 23366, "teacher_loss": 0.2074400782585144 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.5250614881515503, "learning_rate": 3.065906952342703e-06, "loss": 0.2482, "step": 23367, "teacher_loss": 0.2174263298511505 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.2600533366203308, "learning_rate": 3.064531133945862e-06, "loss": 0.157, "step": 23368, "teacher_loss": 0.14558903872966766 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.6682767271995544, "learning_rate": 3.0631555891935363e-06, "loss": 0.2881, "step": 23369, "teacher_loss": 0.24584899842739105 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.30913203954696655, "learning_rate": 3.061780318117269e-06, "loss": 0.1823, "step": 23370, "teacher_loss": 0.1681535542011261 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.7967884540557861, "learning_rate": 3.0604053207485838e-06, "loss": 0.3123, "step": 23371, "teacher_loss": 0.25841328501701355 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.5161321759223938, "learning_rate": 3.059030597119006e-06, "loss": 0.2631, "step": 23372, "teacher_loss": 0.23493850231170654 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.33540183305740356, "learning_rate": 3.05765614726006e-06, "loss": 0.187, "step": 23373, "teacher_loss": 0.17056095600128174 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.5006957054138184, "learning_rate": 3.0562819712032513e-06, "loss": 0.2427, "step": 23374, "teacher_loss": 0.214012011885643 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.5709456205368042, "learning_rate": 3.054908068980085e-06, "loss": 0.3275, "step": 23375, "teacher_loss": 0.30042925477027893 }, { "compression_loss": 0.0, "epoch": 4.22, "label_loss": 0.36636725068092346, "learning_rate": 3.0535344406220695e-06, "loss": 0.2296, "step": 23376, "teacher_loss": 0.21442541480064392 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.3970506191253662, "learning_rate": 3.0521610861606926e-06, "loss": 0.2222, "step": 23377, "teacher_loss": 0.20282377302646637 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.5968098640441895, "learning_rate": 3.050788005627431e-06, "loss": 0.2538, "step": 23378, "teacher_loss": 0.2156357318162918 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.4083077311515808, "learning_rate": 3.049415199053782e-06, "loss": 0.2184, "step": 23379, "teacher_loss": 0.19726793467998505 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.2398323267698288, "learning_rate": 3.0480426664712153e-06, "loss": 0.1646, "step": 23380, "teacher_loss": 0.15623663365840912 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.3097718358039856, "learning_rate": 3.0466704079111913e-06, "loss": 0.1497, "step": 23381, "teacher_loss": 0.13187578320503235 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.21565264463424683, "learning_rate": 3.045298423405175e-06, "loss": 0.2184, "step": 23382, "teacher_loss": 0.2186783105134964 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.3526037335395813, "learning_rate": 3.0439267129846297e-06, "loss": 0.1984, "step": 23383, "teacher_loss": 0.18125870823860168 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.4021896719932556, "learning_rate": 3.0425552766809957e-06, "loss": 0.2102, "step": 23384, "teacher_loss": 0.18887048959732056 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.3450854420661926, "learning_rate": 3.041184114525717e-06, "loss": 0.1842, "step": 23385, "teacher_loss": 0.1662786304950714 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.29741767048835754, "learning_rate": 3.0398132265502373e-06, "loss": 0.1874, "step": 23386, "teacher_loss": 0.17521792650222778 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.47177982330322266, "learning_rate": 3.038442612785981e-06, "loss": 0.2488, "step": 23387, "teacher_loss": 0.2240654081106186 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.09591615200042725, "learning_rate": 3.037072273264365e-06, "loss": 0.1572, "step": 23388, "teacher_loss": 0.16404278576374054 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.2959417700767517, "learning_rate": 3.0357022080168246e-06, "loss": 0.2325, "step": 23389, "teacher_loss": 0.22539573907852173 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.5060272216796875, "learning_rate": 3.0343324170747605e-06, "loss": 0.2102, "step": 23390, "teacher_loss": 0.1773017942905426 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.22305548191070557, "learning_rate": 3.0329629004695715e-06, "loss": 0.12, "step": 23391, "teacher_loss": 0.10858811438083649 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.2350330352783203, "learning_rate": 3.03159365823267e-06, "loss": 0.1733, "step": 23392, "teacher_loss": 0.16648122668266296 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.13447819650173187, "learning_rate": 3.0302246903954456e-06, "loss": 0.1698, "step": 23393, "teacher_loss": 0.17375323176383972 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.6864510774612427, "learning_rate": 3.0288559969892773e-06, "loss": 0.3051, "step": 23394, "teacher_loss": 0.26276645064353943 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.4945695996284485, "learning_rate": 3.0274875780455478e-06, "loss": 0.2678, "step": 23395, "teacher_loss": 0.24258771538734436 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.6094739437103271, "learning_rate": 3.0261194335956384e-06, "loss": 0.2003, "step": 23396, "teacher_loss": 0.15486091375350952 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.29347124695777893, "learning_rate": 3.0247515636709077e-06, "loss": 0.2107, "step": 23397, "teacher_loss": 0.2015039324760437 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.33906489610671997, "learning_rate": 3.023383968302718e-06, "loss": 0.1807, "step": 23398, "teacher_loss": 0.16311398148536682 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.8298303484916687, "learning_rate": 3.022016647522431e-06, "loss": 0.2633, "step": 23399, "teacher_loss": 0.20037055015563965 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.6456891298294067, "learning_rate": 3.020649601361386e-06, "loss": 0.3382, "step": 23400, "teacher_loss": 0.304018497467041 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.31144827604293823, "learning_rate": 3.01928282985093e-06, "loss": 0.2068, "step": 23401, "teacher_loss": 0.1951437145471573 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.4323830306529999, "learning_rate": 3.017916333022403e-06, "loss": 0.1992, "step": 23402, "teacher_loss": 0.17325958609580994 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.2369326949119568, "learning_rate": 3.0165501109071263e-06, "loss": 0.2104, "step": 23403, "teacher_loss": 0.20749524235725403 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.35950973629951477, "learning_rate": 3.0151841635364304e-06, "loss": 0.1902, "step": 23404, "teacher_loss": 0.1713523268699646 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.3136492967605591, "learning_rate": 3.013818490941626e-06, "loss": 0.2182, "step": 23405, "teacher_loss": 0.20759734511375427 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.5813215970993042, "learning_rate": 3.0124530931540325e-06, "loss": 0.2478, "step": 23406, "teacher_loss": 0.21069425344467163 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.37100541591644287, "learning_rate": 3.011087970204942e-06, "loss": 0.2366, "step": 23407, "teacher_loss": 0.22165217995643616 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.39068782329559326, "learning_rate": 3.009723122125662e-06, "loss": 0.1696, "step": 23408, "teacher_loss": 0.14508675038814545 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.6514219641685486, "learning_rate": 3.008358548947486e-06, "loss": 0.2707, "step": 23409, "teacher_loss": 0.22835254669189453 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.3497718572616577, "learning_rate": 3.0069942507016913e-06, "loss": 0.2642, "step": 23410, "teacher_loss": 0.2547234892845154 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.35071495175361633, "learning_rate": 3.00563022741956e-06, "loss": 0.1666, "step": 23411, "teacher_loss": 0.14615866541862488 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.39620763063430786, "learning_rate": 3.0042664791323724e-06, "loss": 0.2217, "step": 23412, "teacher_loss": 0.20229363441467285 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.856957197189331, "learning_rate": 3.002903005871385e-06, "loss": 0.3126, "step": 23413, "teacher_loss": 0.2520948052406311 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.32809919118881226, "learning_rate": 3.001539807667863e-06, "loss": 0.1934, "step": 23414, "teacher_loss": 0.17840775847434998 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.16026541590690613, "learning_rate": 3.0001768845530626e-06, "loss": 0.2411, "step": 23415, "teacher_loss": 0.2500517964363098 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.22602596879005432, "learning_rate": 2.9988142365582256e-06, "loss": 0.2008, "step": 23416, "teacher_loss": 0.19803878664970398 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.5577503442764282, "learning_rate": 2.997451863714602e-06, "loss": 0.2244, "step": 23417, "teacher_loss": 0.18734243512153625 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.8251248598098755, "learning_rate": 2.9960897660534155e-06, "loss": 0.2436, "step": 23418, "teacher_loss": 0.1789398491382599 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 1.0075130462646484, "learning_rate": 2.9947279436059023e-06, "loss": 0.2727, "step": 23419, "teacher_loss": 0.19105108082294464 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.5820173621177673, "learning_rate": 2.9933663964032877e-06, "loss": 0.2449, "step": 23420, "teacher_loss": 0.20742951333522797 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.6726857423782349, "learning_rate": 2.99200512447678e-06, "loss": 0.2352, "step": 23421, "teacher_loss": 0.18657097220420837 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.5102705955505371, "learning_rate": 2.9906441278575924e-06, "loss": 0.2191, "step": 23422, "teacher_loss": 0.1867186725139618 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.29545438289642334, "learning_rate": 2.989283406576932e-06, "loss": 0.1853, "step": 23423, "teacher_loss": 0.1730896234512329 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.606796383857727, "learning_rate": 2.9879229606659903e-06, "loss": 0.1818, "step": 23424, "teacher_loss": 0.13458675146102905 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.16351720690727234, "learning_rate": 2.9865627901559623e-06, "loss": 0.15, "step": 23425, "teacher_loss": 0.1484694480895996 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.23135894536972046, "learning_rate": 2.985202895078033e-06, "loss": 0.2008, "step": 23426, "teacher_loss": 0.19740816950798035 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.45961982011795044, "learning_rate": 2.983843275463381e-06, "loss": 0.2194, "step": 23427, "teacher_loss": 0.19275733828544617 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.35334160923957825, "learning_rate": 2.9824839313431712e-06, "loss": 0.1806, "step": 23428, "teacher_loss": 0.16142696142196655 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.44127005338668823, "learning_rate": 2.9811248627485754e-06, "loss": 0.2098, "step": 23429, "teacher_loss": 0.18412485718727112 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.2662040591239929, "learning_rate": 2.9797660697107554e-06, "loss": 0.1837, "step": 23430, "teacher_loss": 0.174494206905365 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.34847182035446167, "learning_rate": 2.978407552260856e-06, "loss": 0.2067, "step": 23431, "teacher_loss": 0.19097909331321716 }, { "compression_loss": 0.0, "epoch": 4.23, "label_loss": 0.18753407895565033, "learning_rate": 2.977049310430031e-06, "loss": 0.1502, "step": 23432, "teacher_loss": 0.14602018892765045 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.37232252955436707, "learning_rate": 2.9756913442494204e-06, "loss": 0.18, "step": 23433, "teacher_loss": 0.15865576267242432 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5214018821716309, "learning_rate": 2.974333653750154e-06, "loss": 0.1875, "step": 23434, "teacher_loss": 0.15038429200649261 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.3771612048149109, "learning_rate": 2.9729762389633625e-06, "loss": 0.3124, "step": 23435, "teacher_loss": 0.3051683306694031 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.3065650463104248, "learning_rate": 2.9716190999201687e-06, "loss": 0.2311, "step": 23436, "teacher_loss": 0.2226601541042328 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.7297235131263733, "learning_rate": 2.970262236651688e-06, "loss": 0.275, "step": 23437, "teacher_loss": 0.22447621822357178 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.6516412496566772, "learning_rate": 2.968905649189019e-06, "loss": 0.214, "step": 23438, "teacher_loss": 0.1653345823287964 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.35279861092567444, "learning_rate": 2.9675493375632796e-06, "loss": 0.225, "step": 23439, "teacher_loss": 0.21080461144447327 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.37092798948287964, "learning_rate": 2.9661933018055586e-06, "loss": 0.273, "step": 23440, "teacher_loss": 0.26208558678627014 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5320628881454468, "learning_rate": 2.9648375419469397e-06, "loss": 0.2064, "step": 23441, "teacher_loss": 0.17024663090705872 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.1468735933303833, "learning_rate": 2.9634820580185195e-06, "loss": 0.1586, "step": 23442, "teacher_loss": 0.15995411574840546 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.2772144675254822, "learning_rate": 2.962126850051368e-06, "loss": 0.148, "step": 23443, "teacher_loss": 0.13365775346755981 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.6996981501579285, "learning_rate": 2.9607719180765524e-06, "loss": 0.4558, "step": 23444, "teacher_loss": 0.4287361800670624 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.765461802482605, "learning_rate": 2.9594172621251408e-06, "loss": 0.2478, "step": 23445, "teacher_loss": 0.19028347730636597 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.4473523795604706, "learning_rate": 2.958062882228197e-06, "loss": 0.1918, "step": 23446, "teacher_loss": 0.16336822509765625 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.3348191976547241, "learning_rate": 2.9567087784167642e-06, "loss": 0.2289, "step": 23447, "teacher_loss": 0.21716171503067017 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.1583988219499588, "learning_rate": 2.9553549507218893e-06, "loss": 0.1777, "step": 23448, "teacher_loss": 0.17988064885139465 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.765503466129303, "learning_rate": 2.954001399174619e-06, "loss": 0.2211, "step": 23449, "teacher_loss": 0.1606222540140152 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.29384398460388184, "learning_rate": 2.9526481238059803e-06, "loss": 0.2113, "step": 23450, "teacher_loss": 0.20210814476013184 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.4396364092826843, "learning_rate": 2.951295124646994e-06, "loss": 0.2042, "step": 23451, "teacher_loss": 0.17804238200187683 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.13660049438476562, "learning_rate": 2.9499424017286926e-06, "loss": 0.1589, "step": 23452, "teacher_loss": 0.1613357961177826 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5394812822341919, "learning_rate": 2.948589955082085e-06, "loss": 0.1788, "step": 23453, "teacher_loss": 0.13869966566562653 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.19466203451156616, "learning_rate": 2.9472377847381733e-06, "loss": 0.1793, "step": 23454, "teacher_loss": 0.17760831117630005 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.4476737678050995, "learning_rate": 2.945885890727964e-06, "loss": 0.1966, "step": 23455, "teacher_loss": 0.16869601607322693 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.2089543342590332, "learning_rate": 2.944534273082454e-06, "loss": 0.1628, "step": 23456, "teacher_loss": 0.1577066034078598 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.22364526987075806, "learning_rate": 2.943182931832626e-06, "loss": 0.1641, "step": 23457, "teacher_loss": 0.15749265253543854 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5768837928771973, "learning_rate": 2.941831867009464e-06, "loss": 0.2473, "step": 23458, "teacher_loss": 0.21065039932727814 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.14140355587005615, "learning_rate": 2.940481078643949e-06, "loss": 0.1988, "step": 23459, "teacher_loss": 0.20523279905319214 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.744377076625824, "learning_rate": 2.9391305667670443e-06, "loss": 0.2079, "step": 23460, "teacher_loss": 0.14824740588665009 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.8212010264396667, "learning_rate": 2.9377803314097136e-06, "loss": 0.2824, "step": 23461, "teacher_loss": 0.22252118587493896 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.4029175043106079, "learning_rate": 2.93643037260292e-06, "loss": 0.2731, "step": 23462, "teacher_loss": 0.2586822211742401 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5088284015655518, "learning_rate": 2.935080690377606e-06, "loss": 0.2472, "step": 23463, "teacher_loss": 0.2181018590927124 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.38877072930336, "learning_rate": 2.9337312847647187e-06, "loss": 0.2055, "step": 23464, "teacher_loss": 0.18508122861385345 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.31336140632629395, "learning_rate": 2.9323821557952007e-06, "loss": 0.1863, "step": 23465, "teacher_loss": 0.1722373068332672 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5628397464752197, "learning_rate": 2.931033303499975e-06, "loss": 0.3491, "step": 23466, "teacher_loss": 0.32539820671081543 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.49442926049232483, "learning_rate": 2.929684727909974e-06, "loss": 0.2618, "step": 23467, "teacher_loss": 0.23598593473434448 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.3347659707069397, "learning_rate": 2.928336429056111e-06, "loss": 0.2031, "step": 23468, "teacher_loss": 0.18851131200790405 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.3287886381149292, "learning_rate": 2.9269884069693053e-06, "loss": 0.1659, "step": 23469, "teacher_loss": 0.1477912813425064 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5561468005180359, "learning_rate": 2.925640661680454e-06, "loss": 0.244, "step": 23470, "teacher_loss": 0.20930270850658417 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.1431950330734253, "learning_rate": 2.9242931932204614e-06, "loss": 0.1461, "step": 23471, "teacher_loss": 0.14641883969306946 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5328336954116821, "learning_rate": 2.9229460016202254e-06, "loss": 0.2277, "step": 23472, "teacher_loss": 0.19380444288253784 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.6786975860595703, "learning_rate": 2.9215990869106248e-06, "loss": 0.2125, "step": 23473, "teacher_loss": 0.1606781780719757 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.10365578532218933, "learning_rate": 2.920252449122543e-06, "loss": 0.1479, "step": 23474, "teacher_loss": 0.15279698371887207 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.28274479508399963, "learning_rate": 2.9189060882868606e-06, "loss": 0.2363, "step": 23475, "teacher_loss": 0.23114481568336487 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.339595764875412, "learning_rate": 2.9175600044344364e-06, "loss": 0.1642, "step": 23476, "teacher_loss": 0.1447519212961197 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.1088632345199585, "learning_rate": 2.9162141975961404e-06, "loss": 0.1823, "step": 23477, "teacher_loss": 0.1904122531414032 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5974847078323364, "learning_rate": 2.9148686678028197e-06, "loss": 0.5551, "step": 23478, "teacher_loss": 0.55040442943573 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.4665134847164154, "learning_rate": 2.913523415085328e-06, "loss": 0.23, "step": 23479, "teacher_loss": 0.20372800529003143 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.2874928116798401, "learning_rate": 2.9121784394745105e-06, "loss": 0.1422, "step": 23480, "teacher_loss": 0.126010924577713 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5245132446289062, "learning_rate": 2.910833741001196e-06, "loss": 0.2764, "step": 23481, "teacher_loss": 0.24888616800308228 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.21251609921455383, "learning_rate": 2.9094893196962197e-06, "loss": 0.1747, "step": 23482, "teacher_loss": 0.17052951455116272 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.24154171347618103, "learning_rate": 2.908145175590407e-06, "loss": 0.1326, "step": 23483, "teacher_loss": 0.12048736214637756 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.5144477486610413, "learning_rate": 2.9068013087145684e-06, "loss": 0.3048, "step": 23484, "teacher_loss": 0.2815244495868683 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.4105534851551056, "learning_rate": 2.9054577190995175e-06, "loss": 0.2053, "step": 23485, "teacher_loss": 0.1824992597103119 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.42366823554039, "learning_rate": 2.9041144067760662e-06, "loss": 0.1809, "step": 23486, "teacher_loss": 0.15392887592315674 }, { "compression_loss": 0.0, "epoch": 4.24, "label_loss": 0.7007098197937012, "learning_rate": 2.9027713717750046e-06, "loss": 0.2716, "step": 23487, "teacher_loss": 0.22394943237304688 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.2165856659412384, "learning_rate": 2.9014286141271173e-06, "loss": 0.1716, "step": 23488, "teacher_loss": 0.16656649112701416 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.23297366499900818, "learning_rate": 2.9000861338632074e-06, "loss": 0.2154, "step": 23489, "teacher_loss": 0.21348971128463745 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.4030412435531616, "learning_rate": 2.898743931014045e-06, "loss": 0.2045, "step": 23490, "teacher_loss": 0.18242451548576355 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.4813503921031952, "learning_rate": 2.8974020056103994e-06, "loss": 0.1929, "step": 23491, "teacher_loss": 0.16082313656806946 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.7757197618484497, "learning_rate": 2.8960603576830408e-06, "loss": 0.2883, "step": 23492, "teacher_loss": 0.23414504528045654 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.15720973908901215, "learning_rate": 2.894718987262731e-06, "loss": 0.2029, "step": 23493, "teacher_loss": 0.20799612998962402 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.2559800148010254, "learning_rate": 2.8933778943802193e-06, "loss": 0.1612, "step": 23494, "teacher_loss": 0.15063440799713135 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.5785425901412964, "learning_rate": 2.892037079066252e-06, "loss": 0.4117, "step": 23495, "teacher_loss": 0.39316946268081665 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.8459206819534302, "learning_rate": 2.8906965413515803e-06, "loss": 0.2007, "step": 23496, "teacher_loss": 0.12906435132026672 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.08664538711309433, "learning_rate": 2.8893562812669254e-06, "loss": 0.1448, "step": 23497, "teacher_loss": 0.15126188099384308 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.35848987102508545, "learning_rate": 2.888016298843022e-06, "loss": 0.161, "step": 23498, "teacher_loss": 0.13903038203716278 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.5023984313011169, "learning_rate": 2.886676594110595e-06, "loss": 0.2098, "step": 23499, "teacher_loss": 0.17725443840026855 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.636722207069397, "learning_rate": 2.8853371671003547e-06, "loss": 0.2279, "step": 23500, "teacher_loss": 0.1824699342250824 }, { "epoch": 4.25, "eval_exact_match": 80.47303689687796, "eval_f1": 87.72751099830998, "step": 23500 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.17988750338554382, "learning_rate": 2.8839980178430055e-06, "loss": 0.1606, "step": 23501, "teacher_loss": 0.1584232747554779 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.20040926337242126, "learning_rate": 2.8826591463692638e-06, "loss": 0.1946, "step": 23502, "teacher_loss": 0.193989560008049 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.6451300382614136, "learning_rate": 2.881320552709817e-06, "loss": 0.2256, "step": 23503, "teacher_loss": 0.1790080964565277 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.4379386305809021, "learning_rate": 2.879982236895354e-06, "loss": 0.1983, "step": 23504, "teacher_loss": 0.17168211936950684 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.8508151769638062, "learning_rate": 2.878644198956558e-06, "loss": 0.2294, "step": 23505, "teacher_loss": 0.16032646596431732 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.18794825673103333, "learning_rate": 2.8773064389241153e-06, "loss": 0.1556, "step": 23506, "teacher_loss": 0.1519886553287506 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.35050997138023376, "learning_rate": 2.8759689568286863e-06, "loss": 0.2118, "step": 23507, "teacher_loss": 0.19633370637893677 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.6564970016479492, "learning_rate": 2.8746317527009375e-06, "loss": 0.2841, "step": 23508, "teacher_loss": 0.24274101853370667 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.3475725054740906, "learning_rate": 2.873294826571535e-06, "loss": 0.1603, "step": 23509, "teacher_loss": 0.13952046632766724 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.4035768210887909, "learning_rate": 2.8719581784711217e-06, "loss": 0.221, "step": 23510, "teacher_loss": 0.20070582628250122 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.3082427382469177, "learning_rate": 2.8706218084303393e-06, "loss": 0.198, "step": 23511, "teacher_loss": 0.18576562404632568 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.45154741406440735, "learning_rate": 2.869285716479841e-06, "loss": 0.2966, "step": 23512, "teacher_loss": 0.279381662607193 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.37201517820358276, "learning_rate": 2.867949902650252e-06, "loss": 0.2036, "step": 23513, "teacher_loss": 0.18483304977416992 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.3767436742782593, "learning_rate": 2.86661436697219e-06, "loss": 0.1954, "step": 23514, "teacher_loss": 0.1752319037914276 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.3800477981567383, "learning_rate": 2.8652791094762886e-06, "loss": 0.1916, "step": 23515, "teacher_loss": 0.1706707924604416 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.4575875997543335, "learning_rate": 2.8639441301931587e-06, "loss": 0.2909, "step": 23516, "teacher_loss": 0.2723338007926941 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.9758557677268982, "learning_rate": 2.8626094291533965e-06, "loss": 0.2709, "step": 23517, "teacher_loss": 0.19253957271575928 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.3562992513179779, "learning_rate": 2.8612750063876135e-06, "loss": 0.1919, "step": 23518, "teacher_loss": 0.17361171543598175 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.41558390855789185, "learning_rate": 2.8599408619264036e-06, "loss": 0.2252, "step": 23519, "teacher_loss": 0.20408935844898224 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.20573261380195618, "learning_rate": 2.858606995800348e-06, "loss": 0.177, "step": 23520, "teacher_loss": 0.17384850978851318 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.37960296869277954, "learning_rate": 2.8572734080400315e-06, "loss": 0.2047, "step": 23521, "teacher_loss": 0.18526498973369598 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.6667327880859375, "learning_rate": 2.8559400986760344e-06, "loss": 0.3079, "step": 23522, "teacher_loss": 0.2679884135723114 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.5486441254615784, "learning_rate": 2.854607067738917e-06, "loss": 0.2358, "step": 23523, "teacher_loss": 0.20101585984230042 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.47195035219192505, "learning_rate": 2.8532743152592467e-06, "loss": 0.1769, "step": 23524, "teacher_loss": 0.1441117227077484 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.38555091619491577, "learning_rate": 2.851941841267581e-06, "loss": 0.2055, "step": 23525, "teacher_loss": 0.18544459342956543 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.20950210094451904, "learning_rate": 2.850609645794465e-06, "loss": 0.219, "step": 23526, "teacher_loss": 0.22002992033958435 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.26769882440567017, "learning_rate": 2.849277728870446e-06, "loss": 0.2137, "step": 23527, "teacher_loss": 0.2076951563358307 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.48645126819610596, "learning_rate": 2.847946090526056e-06, "loss": 0.2054, "step": 23528, "teacher_loss": 0.17412686347961426 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.36318206787109375, "learning_rate": 2.8466147307918283e-06, "loss": 0.245, "step": 23529, "teacher_loss": 0.23185402154922485 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.4255761504173279, "learning_rate": 2.845283649698291e-06, "loss": 0.2257, "step": 23530, "teacher_loss": 0.2034575641155243 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.2936986982822418, "learning_rate": 2.843952847275955e-06, "loss": 0.2339, "step": 23531, "teacher_loss": 0.2272147536277771 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.4340837895870209, "learning_rate": 2.8426223235553367e-06, "loss": 0.2342, "step": 23532, "teacher_loss": 0.2120259553194046 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.19881585240364075, "learning_rate": 2.841292078566936e-06, "loss": 0.1729, "step": 23533, "teacher_loss": 0.17004451155662537 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.6113169193267822, "learning_rate": 2.839962112341253e-06, "loss": 0.2142, "step": 23534, "teacher_loss": 0.1700935661792755 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.09825917333364487, "learning_rate": 2.838632424908786e-06, "loss": 0.1861, "step": 23535, "teacher_loss": 0.19589583575725555 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.3130182921886444, "learning_rate": 2.8373030163000126e-06, "loss": 0.1466, "step": 23536, "teacher_loss": 0.12810730934143066 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.13756752014160156, "learning_rate": 2.835973886545414e-06, "loss": 0.1571, "step": 23537, "teacher_loss": 0.1592382788658142 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.2803334593772888, "learning_rate": 2.834645035675469e-06, "loss": 0.2013, "step": 23538, "teacher_loss": 0.1924774944782257 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.5822422504425049, "learning_rate": 2.8333164637206367e-06, "loss": 0.3283, "step": 23539, "teacher_loss": 0.30004915595054626 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.3407885432243347, "learning_rate": 2.8319881707113825e-06, "loss": 0.2123, "step": 23540, "teacher_loss": 0.1980486661195755 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.46780824661254883, "learning_rate": 2.8306601566781542e-06, "loss": 0.2421, "step": 23541, "teacher_loss": 0.21704059839248657 }, { "compression_loss": 0.0, "epoch": 4.25, "label_loss": 0.47814232110977173, "learning_rate": 2.829332421651404e-06, "loss": 0.1951, "step": 23542, "teacher_loss": 0.163700670003891 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.15735790133476257, "learning_rate": 2.8280049656615755e-06, "loss": 0.1926, "step": 23543, "teacher_loss": 0.19653186202049255 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.3475435972213745, "learning_rate": 2.826677788739096e-06, "loss": 0.1892, "step": 23544, "teacher_loss": 0.1716184914112091 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.4281400144100189, "learning_rate": 2.8253508909143967e-06, "loss": 0.2027, "step": 23545, "teacher_loss": 0.17761413753032684 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.29032742977142334, "learning_rate": 2.8240242722179042e-06, "loss": 0.2031, "step": 23546, "teacher_loss": 0.19338306784629822 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.3970162272453308, "learning_rate": 2.822697932680025e-06, "loss": 0.1833, "step": 23547, "teacher_loss": 0.15956871211528778 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.26178571581840515, "learning_rate": 2.8213718723311728e-06, "loss": 0.2235, "step": 23548, "teacher_loss": 0.21921241283416748 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.49028852581977844, "learning_rate": 2.8200460912017545e-06, "loss": 0.2256, "step": 23549, "teacher_loss": 0.19617339968681335 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.29364901781082153, "learning_rate": 2.818720589322163e-06, "loss": 0.1964, "step": 23550, "teacher_loss": 0.18561695516109467 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.5711995363235474, "learning_rate": 2.817395366722782e-06, "loss": 0.251, "step": 23551, "teacher_loss": 0.21544548869132996 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.39511638879776, "learning_rate": 2.816070423433999e-06, "loss": 0.2, "step": 23552, "teacher_loss": 0.1782768964767456 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.2171490341424942, "learning_rate": 2.814745759486198e-06, "loss": 0.1814, "step": 23553, "teacher_loss": 0.17737993597984314 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.4558674693107605, "learning_rate": 2.8134213749097382e-06, "loss": 0.2352, "step": 23554, "teacher_loss": 0.21068216860294342 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.4121059775352478, "learning_rate": 2.8120972697349886e-06, "loss": 0.1769, "step": 23555, "teacher_loss": 0.15071095526218414 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.3422475755214691, "learning_rate": 2.810773443992313e-06, "loss": 0.222, "step": 23556, "teacher_loss": 0.20867910981178284 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.32564103603363037, "learning_rate": 2.8094498977120504e-06, "loss": 0.1964, "step": 23557, "teacher_loss": 0.18203064799308777 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.2715091109275818, "learning_rate": 2.808126630924555e-06, "loss": 0.2567, "step": 23558, "teacher_loss": 0.2550812363624573 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.5449018478393555, "learning_rate": 2.806803643660166e-06, "loss": 0.1872, "step": 23559, "teacher_loss": 0.1474830061197281 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.43778133392333984, "learning_rate": 2.805480935949211e-06, "loss": 0.2486, "step": 23560, "teacher_loss": 0.2275719940662384 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.6413131952285767, "learning_rate": 2.8041585078220105e-06, "loss": 0.2427, "step": 23561, "teacher_loss": 0.19837774336338043 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.3337388038635254, "learning_rate": 2.8028363593088985e-06, "loss": 0.2628, "step": 23562, "teacher_loss": 0.2548964321613312 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.5354351997375488, "learning_rate": 2.801514490440179e-06, "loss": 0.1855, "step": 23563, "teacher_loss": 0.14658969640731812 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.24030691385269165, "learning_rate": 2.800192901246153e-06, "loss": 0.1664, "step": 23564, "teacher_loss": 0.15816043317317963 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.9059640765190125, "learning_rate": 2.7988715917571327e-06, "loss": 0.2796, "step": 23565, "teacher_loss": 0.20996958017349243 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.47720617055892944, "learning_rate": 2.7975505620034073e-06, "loss": 0.2224, "step": 23566, "teacher_loss": 0.19410720467567444 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.3822779059410095, "learning_rate": 2.7962298120152578e-06, "loss": 0.1807, "step": 23567, "teacher_loss": 0.15834113955497742 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.4422036409378052, "learning_rate": 2.7949093418229717e-06, "loss": 0.2449, "step": 23568, "teacher_loss": 0.22297346591949463 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.35386139154434204, "learning_rate": 2.793589151456824e-06, "loss": 0.1909, "step": 23569, "teacher_loss": 0.1728121042251587 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.2762065827846527, "learning_rate": 2.792269240947076e-06, "loss": 0.1712, "step": 23570, "teacher_loss": 0.1594999134540558 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.25592854619026184, "learning_rate": 2.790949610323994e-06, "loss": 0.1631, "step": 23571, "teacher_loss": 0.15273168683052063 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.791628360748291, "learning_rate": 2.789630259617838e-06, "loss": 0.2369, "step": 23572, "teacher_loss": 0.17526771128177643 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.4047967791557312, "learning_rate": 2.7883111888588507e-06, "loss": 0.2387, "step": 23573, "teacher_loss": 0.22019270062446594 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.3563660979270935, "learning_rate": 2.7869923980772667e-06, "loss": 0.1992, "step": 23574, "teacher_loss": 0.1817789375782013 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.9154502153396606, "learning_rate": 2.7856738873033395e-06, "loss": 0.2635, "step": 23575, "teacher_loss": 0.1910582184791565 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.861147403717041, "learning_rate": 2.7843556565672885e-06, "loss": 0.298, "step": 23576, "teacher_loss": 0.23542526364326477 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.4469071626663208, "learning_rate": 2.7830377058993344e-06, "loss": 0.2288, "step": 23577, "teacher_loss": 0.2045501470565796 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.37797120213508606, "learning_rate": 2.7817200353296986e-06, "loss": 0.185, "step": 23578, "teacher_loss": 0.163558691740036 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.7755628824234009, "learning_rate": 2.7804026448885926e-06, "loss": 0.2373, "step": 23579, "teacher_loss": 0.17747154831886292 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.2284199297428131, "learning_rate": 2.7790855346062135e-06, "loss": 0.2513, "step": 23580, "teacher_loss": 0.2538015842437744 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.08206374943256378, "learning_rate": 2.7777687045127644e-06, "loss": 0.1526, "step": 23581, "teacher_loss": 0.16046173870563507 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.8750606775283813, "learning_rate": 2.7764521546384366e-06, "loss": 0.2926, "step": 23582, "teacher_loss": 0.22783727943897247 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.6559481620788574, "learning_rate": 2.7751358850134106e-06, "loss": 0.1822, "step": 23583, "teacher_loss": 0.1295488327741623 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.6263271570205688, "learning_rate": 2.7738198956678663e-06, "loss": 0.2224, "step": 23584, "teacher_loss": 0.17753730714321136 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.27602440118789673, "learning_rate": 2.7725041866319788e-06, "loss": 0.2094, "step": 23585, "teacher_loss": 0.20200704038143158 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.14994439482688904, "learning_rate": 2.771188757935908e-06, "loss": 0.1787, "step": 23586, "teacher_loss": 0.18185602128505707 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.5035929679870605, "learning_rate": 2.7698736096098144e-06, "loss": 0.4151, "step": 23587, "teacher_loss": 0.40522313117980957 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.601744532585144, "learning_rate": 2.7685587416838535e-06, "loss": 0.197, "step": 23588, "teacher_loss": 0.1520429253578186 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.7705152630805969, "learning_rate": 2.767244154188167e-06, "loss": 0.4206, "step": 23589, "teacher_loss": 0.38169431686401367 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.7192791700363159, "learning_rate": 2.7659298471529005e-06, "loss": 0.2534, "step": 23590, "teacher_loss": 0.20158948004245758 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.5837382674217224, "learning_rate": 2.7646158206081777e-06, "loss": 0.2236, "step": 23591, "teacher_loss": 0.183608278632164 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.22908684611320496, "learning_rate": 2.763302074584132e-06, "loss": 0.2104, "step": 23592, "teacher_loss": 0.20830616354942322 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.6608120203018188, "learning_rate": 2.761988609110884e-06, "loss": 0.2222, "step": 23593, "teacher_loss": 0.17351379990577698 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.9826804399490356, "learning_rate": 2.7606754242185437e-06, "loss": 0.2686, "step": 23594, "teacher_loss": 0.1892877072095871 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.42528021335601807, "learning_rate": 2.7593625199372236e-06, "loss": 0.2119, "step": 23595, "teacher_loss": 0.18822228908538818 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.353785902261734, "learning_rate": 2.7580498962970187e-06, "loss": 0.1658, "step": 23596, "teacher_loss": 0.14488321542739868 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.6266546845436096, "learning_rate": 2.756737553328025e-06, "loss": 0.2645, "step": 23597, "teacher_loss": 0.22423425316810608 }, { "compression_loss": 0.0, "epoch": 4.26, "label_loss": 0.5388700366020203, "learning_rate": 2.7554254910603364e-06, "loss": 0.2224, "step": 23598, "teacher_loss": 0.18726500868797302 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.48659372329711914, "learning_rate": 2.7541137095240263e-06, "loss": 0.2489, "step": 23599, "teacher_loss": 0.22252321243286133 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.6074894666671753, "learning_rate": 2.752802208749177e-06, "loss": 0.1936, "step": 23600, "teacher_loss": 0.14758385717868805 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.6128398180007935, "learning_rate": 2.7514909887658514e-06, "loss": 0.1831, "step": 23601, "teacher_loss": 0.13529758155345917 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.5379999876022339, "learning_rate": 2.750180049604113e-06, "loss": 0.2137, "step": 23602, "teacher_loss": 0.17761120200157166 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.5924365520477295, "learning_rate": 2.748869391294025e-06, "loss": 0.275, "step": 23603, "teacher_loss": 0.2397579848766327 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.1752086728811264, "learning_rate": 2.7475590138656266e-06, "loss": 0.1171, "step": 23604, "teacher_loss": 0.11063025891780853 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.3447811007499695, "learning_rate": 2.7462489173489636e-06, "loss": 0.2136, "step": 23605, "teacher_loss": 0.19902482628822327 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.38671019673347473, "learning_rate": 2.7449391017740806e-06, "loss": 0.1852, "step": 23606, "teacher_loss": 0.16286440193653107 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.5928007960319519, "learning_rate": 2.743629567170995e-06, "loss": 0.2178, "step": 23607, "teacher_loss": 0.176174595952034 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.5205186009407043, "learning_rate": 2.7423203135697396e-06, "loss": 0.2643, "step": 23608, "teacher_loss": 0.2357889860868454 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.29240235686302185, "learning_rate": 2.74101134100033e-06, "loss": 0.2249, "step": 23609, "teacher_loss": 0.2174319624900818 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.5066640973091125, "learning_rate": 2.739702649492778e-06, "loss": 0.3548, "step": 23610, "teacher_loss": 0.3379090428352356 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.17395423352718353, "learning_rate": 2.738394239077079e-06, "loss": 0.1831, "step": 23611, "teacher_loss": 0.18415626883506775 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.15302030742168427, "learning_rate": 2.737086109783244e-06, "loss": 0.1991, "step": 23612, "teacher_loss": 0.20417320728302002 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.2503766715526581, "learning_rate": 2.735778261641259e-06, "loss": 0.1552, "step": 23613, "teacher_loss": 0.14459940791130066 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.16209635138511658, "learning_rate": 2.734470694681104e-06, "loss": 0.1768, "step": 23614, "teacher_loss": 0.17839598655700684 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.43903863430023193, "learning_rate": 2.733163408932762e-06, "loss": 0.2001, "step": 23615, "teacher_loss": 0.17356491088867188 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.6795310974121094, "learning_rate": 2.731856404426209e-06, "loss": 0.2194, "step": 23616, "teacher_loss": 0.16825395822525024 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.5856727361679077, "learning_rate": 2.7305496811914033e-06, "loss": 0.204, "step": 23617, "teacher_loss": 0.16160088777542114 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.34873586893081665, "learning_rate": 2.7292432392583077e-06, "loss": 0.2508, "step": 23618, "teacher_loss": 0.23995625972747803 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.39873868227005005, "learning_rate": 2.7279370786568785e-06, "loss": 0.1802, "step": 23619, "teacher_loss": 0.15586526691913605 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.6993827819824219, "learning_rate": 2.726631199417055e-06, "loss": 0.3163, "step": 23620, "teacher_loss": 0.2737899124622345 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.4949374794960022, "learning_rate": 2.7253256015687818e-06, "loss": 0.2702, "step": 23621, "teacher_loss": 0.24525438249111176 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.3132849335670471, "learning_rate": 2.7240202851419944e-06, "loss": 0.224, "step": 23622, "teacher_loss": 0.21411174535751343 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.49436455965042114, "learning_rate": 2.722715250166616e-06, "loss": 0.3305, "step": 23623, "teacher_loss": 0.31229573488235474 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.23616701364517212, "learning_rate": 2.721410496672559e-06, "loss": 0.1642, "step": 23624, "teacher_loss": 0.15620103478431702 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.6335381865501404, "learning_rate": 2.720106024689755e-06, "loss": 0.1876, "step": 23625, "teacher_loss": 0.13805922865867615 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.33473601937294006, "learning_rate": 2.7188018342481025e-06, "loss": 0.1575, "step": 23626, "teacher_loss": 0.1377830058336258 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.3796151876449585, "learning_rate": 2.7174979253775e-06, "loss": 0.2598, "step": 23627, "teacher_loss": 0.24652042984962463 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.29367536306381226, "learning_rate": 2.7161942981078453e-06, "loss": 0.1931, "step": 23628, "teacher_loss": 0.18192274868488312 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.2525085508823395, "learning_rate": 2.714890952469029e-06, "loss": 0.1905, "step": 23629, "teacher_loss": 0.18362337350845337 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.4037827253341675, "learning_rate": 2.713587888490928e-06, "loss": 0.2182, "step": 23630, "teacher_loss": 0.19760766625404358 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.8041902780532837, "learning_rate": 2.7122851062034186e-06, "loss": 0.2514, "step": 23631, "teacher_loss": 0.18992453813552856 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.4077351689338684, "learning_rate": 2.710982605636377e-06, "loss": 0.2412, "step": 23632, "teacher_loss": 0.222703754901886 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.41937679052352905, "learning_rate": 2.7096803868196546e-06, "loss": 0.2662, "step": 23633, "teacher_loss": 0.24914045631885529 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.3842611312866211, "learning_rate": 2.708378449783113e-06, "loss": 0.1823, "step": 23634, "teacher_loss": 0.15986892580986023 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.15760532021522522, "learning_rate": 2.7070767945566054e-06, "loss": 0.1607, "step": 23635, "teacher_loss": 0.16107967495918274 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.3653634190559387, "learning_rate": 2.705775421169971e-06, "loss": 0.2363, "step": 23636, "teacher_loss": 0.22196470201015472 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.857946515083313, "learning_rate": 2.704474329653037e-06, "loss": 0.3949, "step": 23637, "teacher_loss": 0.34349387884140015 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.22090455889701843, "learning_rate": 2.7031735200356523e-06, "loss": 0.1788, "step": 23638, "teacher_loss": 0.17416216433048248 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.3113109767436981, "learning_rate": 2.7018729923476306e-06, "loss": 0.1889, "step": 23639, "teacher_loss": 0.1753501296043396 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.537804901599884, "learning_rate": 2.7005727466187847e-06, "loss": 0.1807, "step": 23640, "teacher_loss": 0.14101248979568481 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.32615751028060913, "learning_rate": 2.699272782878931e-06, "loss": 0.2167, "step": 23641, "teacher_loss": 0.20454160869121552 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.5487974882125854, "learning_rate": 2.697973101157877e-06, "loss": 0.2565, "step": 23642, "teacher_loss": 0.2240055501461029 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.6533028483390808, "learning_rate": 2.6966737014854114e-06, "loss": 0.1986, "step": 23643, "teacher_loss": 0.14804288744926453 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.20450858771800995, "learning_rate": 2.6953745838913314e-06, "loss": 0.2051, "step": 23644, "teacher_loss": 0.20514845848083496 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.573708176612854, "learning_rate": 2.6940757484054246e-06, "loss": 0.2153, "step": 23645, "teacher_loss": 0.1755288541316986 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.23085537552833557, "learning_rate": 2.6927771950574625e-06, "loss": 0.1832, "step": 23646, "teacher_loss": 0.17785832285881042 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.6519721150398254, "learning_rate": 2.69147892387722e-06, "loss": 0.3159, "step": 23647, "teacher_loss": 0.2785448431968689 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.9092443585395813, "learning_rate": 2.6901809348944674e-06, "loss": 0.2889, "step": 23648, "teacher_loss": 0.2200058549642563 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.31982648372650146, "learning_rate": 2.688883228138955e-06, "loss": 0.1735, "step": 23649, "teacher_loss": 0.15729407966136932 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.3426324725151062, "learning_rate": 2.6875858036404418e-06, "loss": 0.1749, "step": 23650, "teacher_loss": 0.1563049554824829 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.3827723264694214, "learning_rate": 2.6862886614286693e-06, "loss": 0.1838, "step": 23651, "teacher_loss": 0.16174717247486115 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.5085392594337463, "learning_rate": 2.6849918015333787e-06, "loss": 0.2858, "step": 23652, "teacher_loss": 0.2610986828804016 }, { "compression_loss": 0.0, "epoch": 4.27, "label_loss": 0.2684899568557739, "learning_rate": 2.6836952239843077e-06, "loss": 0.189, "step": 23653, "teacher_loss": 0.1801636815071106 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.303982675075531, "learning_rate": 2.682398928811176e-06, "loss": 0.1968, "step": 23654, "teacher_loss": 0.18491417169570923 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.3728879690170288, "learning_rate": 2.68110291604371e-06, "loss": 0.2079, "step": 23655, "teacher_loss": 0.18956203758716583 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.33532893657684326, "learning_rate": 2.679807185711616e-06, "loss": 0.226, "step": 23656, "teacher_loss": 0.2139042615890503 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.20250120759010315, "learning_rate": 2.678511737844606e-06, "loss": 0.1454, "step": 23657, "teacher_loss": 0.13900502026081085 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.3045519292354584, "learning_rate": 2.677216572472384e-06, "loss": 0.1534, "step": 23658, "teacher_loss": 0.13655096292495728 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.2404729723930359, "learning_rate": 2.675921689624636e-06, "loss": 0.2556, "step": 23659, "teacher_loss": 0.25723642110824585 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.16769632697105408, "learning_rate": 2.6746270893310545e-06, "loss": 0.2022, "step": 23660, "teacher_loss": 0.20600873231887817 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.577185869216919, "learning_rate": 2.673332771621324e-06, "loss": 0.1911, "step": 23661, "teacher_loss": 0.14820022881031036 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.547667384147644, "learning_rate": 2.672038736525113e-06, "loss": 0.3426, "step": 23662, "teacher_loss": 0.3198525309562683 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.2580743432044983, "learning_rate": 2.670744984072094e-06, "loss": 0.2016, "step": 23663, "teacher_loss": 0.19527548551559448 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.4142838716506958, "learning_rate": 2.6694515142919258e-06, "loss": 0.2567, "step": 23664, "teacher_loss": 0.23914137482643127 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.26650261878967285, "learning_rate": 2.6681583272142653e-06, "loss": 0.1881, "step": 23665, "teacher_loss": 0.17933568358421326 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.8403988480567932, "learning_rate": 2.6668654228687657e-06, "loss": 0.3408, "step": 23666, "teacher_loss": 0.28531789779663086 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.526668906211853, "learning_rate": 2.66557280128506e-06, "loss": 0.1951, "step": 23667, "teacher_loss": 0.15823514759540558 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.5754711627960205, "learning_rate": 2.6642804624927895e-06, "loss": 0.2397, "step": 23668, "teacher_loss": 0.20234721899032593 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.2596701979637146, "learning_rate": 2.662988406521589e-06, "loss": 0.1621, "step": 23669, "teacher_loss": 0.15130555629730225 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.27532607316970825, "learning_rate": 2.6616966334010716e-06, "loss": 0.1559, "step": 23670, "teacher_loss": 0.14258795976638794 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.5407602190971375, "learning_rate": 2.6604051431608584e-06, "loss": 0.292, "step": 23671, "teacher_loss": 0.2643534541130066 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.6901336908340454, "learning_rate": 2.6591139358305623e-06, "loss": 0.2529, "step": 23672, "teacher_loss": 0.2043488621711731 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.10922780632972717, "learning_rate": 2.6578230114397845e-06, "loss": 0.16, "step": 23673, "teacher_loss": 0.16559310257434845 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.3906305432319641, "learning_rate": 2.656532370018113e-06, "loss": 0.1816, "step": 23674, "teacher_loss": 0.15834026038646698 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.5325765609741211, "learning_rate": 2.6552420115951546e-06, "loss": 0.2156, "step": 23675, "teacher_loss": 0.1804354041814804 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.5849790573120117, "learning_rate": 2.6539519362004856e-06, "loss": 0.2685, "step": 23676, "teacher_loss": 0.2333393096923828 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.7238998413085938, "learning_rate": 2.6526621438636784e-06, "loss": 0.3253, "step": 23677, "teacher_loss": 0.28098270297050476 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.6053875684738159, "learning_rate": 2.6513726346143096e-06, "loss": 0.2313, "step": 23678, "teacher_loss": 0.18977703154087067 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.36253345012664795, "learning_rate": 2.6500834084819476e-06, "loss": 0.1852, "step": 23679, "teacher_loss": 0.16548995673656464 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.3586357831954956, "learning_rate": 2.6487944654961416e-06, "loss": 0.2256, "step": 23680, "teacher_loss": 0.210833340883255 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.2890626788139343, "learning_rate": 2.647505805686448e-06, "loss": 0.1437, "step": 23681, "teacher_loss": 0.12753267586231232 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 1.1699092388153076, "learning_rate": 2.6462174290824152e-06, "loss": 0.3426, "step": 23682, "teacher_loss": 0.25064951181411743 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.23057463765144348, "learning_rate": 2.6449293357135796e-06, "loss": 0.1693, "step": 23683, "teacher_loss": 0.16252447664737701 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.5053479671478271, "learning_rate": 2.643641525609462e-06, "loss": 0.2386, "step": 23684, "teacher_loss": 0.20893272757530212 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.4053344428539276, "learning_rate": 2.6423539987996077e-06, "loss": 0.2278, "step": 23685, "teacher_loss": 0.20809870958328247 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.12880100309848785, "learning_rate": 2.6410667553135244e-06, "loss": 0.1432, "step": 23686, "teacher_loss": 0.14477944374084473 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.4257130026817322, "learning_rate": 2.63977979518072e-06, "loss": 0.2553, "step": 23687, "teacher_loss": 0.23636209964752197 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.36245274543762207, "learning_rate": 2.6384931184307133e-06, "loss": 0.1946, "step": 23688, "teacher_loss": 0.1759561002254486 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.2478124499320984, "learning_rate": 2.6372067250929984e-06, "loss": 0.1998, "step": 23689, "teacher_loss": 0.19450706243515015 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.22109420597553253, "learning_rate": 2.6359206151970634e-06, "loss": 0.1813, "step": 23690, "teacher_loss": 0.17692360281944275 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.7232908010482788, "learning_rate": 2.6346347887723997e-06, "loss": 0.2453, "step": 23691, "teacher_loss": 0.19214873015880585 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.5417847633361816, "learning_rate": 2.6333492458484908e-06, "loss": 0.1883, "step": 23692, "teacher_loss": 0.14897063374519348 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.7968742847442627, "learning_rate": 2.632063986454803e-06, "loss": 0.2435, "step": 23693, "teacher_loss": 0.1820240020751953 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.6277574896812439, "learning_rate": 2.6307790106208076e-06, "loss": 0.2771, "step": 23694, "teacher_loss": 0.23808756470680237 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.13025175034999847, "learning_rate": 2.6294943183759673e-06, "loss": 0.1703, "step": 23695, "teacher_loss": 0.17476221919059753 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.1661679744720459, "learning_rate": 2.628209909749731e-06, "loss": 0.1862, "step": 23696, "teacher_loss": 0.1883726418018341 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.6914597749710083, "learning_rate": 2.6269257847715476e-06, "loss": 0.2839, "step": 23697, "teacher_loss": 0.2385876476764679 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.3294179439544678, "learning_rate": 2.6256419434708628e-06, "loss": 0.1925, "step": 23698, "teacher_loss": 0.17728745937347412 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.3500949740409851, "learning_rate": 2.6243583858771093e-06, "loss": 0.2113, "step": 23699, "teacher_loss": 0.19583293795585632 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 1.0988705158233643, "learning_rate": 2.62307511201971e-06, "loss": 0.2112, "step": 23700, "teacher_loss": 0.11261972784996033 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.523552417755127, "learning_rate": 2.62179212192809e-06, "loss": 0.2003, "step": 23701, "teacher_loss": 0.16437458992004395 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.6344937682151794, "learning_rate": 2.620509415631669e-06, "loss": 0.2464, "step": 23702, "teacher_loss": 0.20329588651657104 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.8018810749053955, "learning_rate": 2.619226993159847e-06, "loss": 0.2492, "step": 23703, "teacher_loss": 0.1877986341714859 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.24137923121452332, "learning_rate": 2.6179448545420315e-06, "loss": 0.129, "step": 23704, "teacher_loss": 0.1164843812584877 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.22313034534454346, "learning_rate": 2.6166629998076214e-06, "loss": 0.2637, "step": 23705, "teacher_loss": 0.26822540163993835 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.4842255711555481, "learning_rate": 2.6153814289859963e-06, "loss": 0.2132, "step": 23706, "teacher_loss": 0.18303081393241882 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.35062384605407715, "learning_rate": 2.614100142106544e-06, "loss": 0.2177, "step": 23707, "teacher_loss": 0.20291483402252197 }, { "compression_loss": 0.0, "epoch": 4.28, "label_loss": 0.3233954906463623, "learning_rate": 2.612819139198645e-06, "loss": 0.1846, "step": 23708, "teacher_loss": 0.16922539472579956 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.3108372688293457, "learning_rate": 2.611538420291662e-06, "loss": 0.1851, "step": 23709, "teacher_loss": 0.17108561098575592 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.2696700096130371, "learning_rate": 2.6102579854149582e-06, "loss": 0.1846, "step": 23710, "teacher_loss": 0.17516444623470306 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.2796294093132019, "learning_rate": 2.608977834597897e-06, "loss": 0.1645, "step": 23711, "teacher_loss": 0.1516929566860199 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.19252705574035645, "learning_rate": 2.60769796786982e-06, "loss": 0.1332, "step": 23712, "teacher_loss": 0.1265571266412735 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.262068510055542, "learning_rate": 2.60641838526008e-06, "loss": 0.1729, "step": 23713, "teacher_loss": 0.16300079226493835 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.435613214969635, "learning_rate": 2.605139086798002e-06, "loss": 0.1881, "step": 23714, "teacher_loss": 0.16055706143379211 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.2595844268798828, "learning_rate": 2.603860072512924e-06, "loss": 0.182, "step": 23715, "teacher_loss": 0.17342691123485565 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.3657636046409607, "learning_rate": 2.602581342434173e-06, "loss": 0.2395, "step": 23716, "teacher_loss": 0.2254914492368698 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.5975039005279541, "learning_rate": 2.601302896591059e-06, "loss": 0.2866, "step": 23717, "teacher_loss": 0.2520139813423157 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.43074923753738403, "learning_rate": 2.6000247350128996e-06, "loss": 0.189, "step": 23718, "teacher_loss": 0.1621120423078537 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.31292399764060974, "learning_rate": 2.5987468577289936e-06, "loss": 0.2344, "step": 23719, "teacher_loss": 0.22562751173973083 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.45082762837409973, "learning_rate": 2.5974692647686405e-06, "loss": 0.2171, "step": 23720, "teacher_loss": 0.19108451902866364 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.6646035313606262, "learning_rate": 2.596191956161135e-06, "loss": 0.314, "step": 23721, "teacher_loss": 0.2750244736671448 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.5501041412353516, "learning_rate": 2.5949149319357562e-06, "loss": 0.2738, "step": 23722, "teacher_loss": 0.24308286607265472 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.34966331720352173, "learning_rate": 2.59363819212179e-06, "loss": 0.1685, "step": 23723, "teacher_loss": 0.14837335050106049 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.5191367864608765, "learning_rate": 2.5923617367484998e-06, "loss": 0.5544, "step": 23724, "teacher_loss": 0.5583438873291016 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.34102100133895874, "learning_rate": 2.5910855658451556e-06, "loss": 0.1879, "step": 23725, "teacher_loss": 0.17089171707630157 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.3432924449443817, "learning_rate": 2.5898096794410185e-06, "loss": 0.2075, "step": 23726, "teacher_loss": 0.19237539172172546 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.8545480370521545, "learning_rate": 2.5885340775653345e-06, "loss": 0.3668, "step": 23727, "teacher_loss": 0.31264132261276245 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.4476233124732971, "learning_rate": 2.5872587602473507e-06, "loss": 0.209, "step": 23728, "teacher_loss": 0.18244101107120514 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.377361923456192, "learning_rate": 2.5859837275163147e-06, "loss": 0.2144, "step": 23729, "teacher_loss": 0.19633352756500244 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.4266466498374939, "learning_rate": 2.584708979401447e-06, "loss": 0.1645, "step": 23730, "teacher_loss": 0.13542108237743378 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.3693513870239258, "learning_rate": 2.5834345159319807e-06, "loss": 0.1725, "step": 23731, "teacher_loss": 0.15059027075767517 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.519351065158844, "learning_rate": 2.5821603371371376e-06, "loss": 0.2482, "step": 23732, "teacher_loss": 0.2181241363286972 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.33885496854782104, "learning_rate": 2.580886443046127e-06, "loss": 0.1912, "step": 23733, "teacher_loss": 0.17476439476013184 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.3547120988368988, "learning_rate": 2.5796128336881487e-06, "loss": 0.2199, "step": 23734, "teacher_loss": 0.20493239164352417 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.481080025434494, "learning_rate": 2.5783395090924155e-06, "loss": 0.1663, "step": 23735, "teacher_loss": 0.13136087357997894 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.6012880802154541, "learning_rate": 2.577066469288118e-06, "loss": 0.2527, "step": 23736, "teacher_loss": 0.21392026543617249 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.588262140750885, "learning_rate": 2.5757937143044307e-06, "loss": 0.2237, "step": 23737, "teacher_loss": 0.18317997455596924 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.1728990077972412, "learning_rate": 2.574521244170554e-06, "loss": 0.174, "step": 23738, "teacher_loss": 0.17415770888328552 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.5041546821594238, "learning_rate": 2.5732490589156495e-06, "loss": 0.1955, "step": 23739, "teacher_loss": 0.1611739993095398 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.4104025363922119, "learning_rate": 2.5719771585688835e-06, "loss": 0.2404, "step": 23740, "teacher_loss": 0.22147688269615173 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.4538041353225708, "learning_rate": 2.5707055431594193e-06, "loss": 0.2093, "step": 23741, "teacher_loss": 0.18208365142345428 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.4469044804573059, "learning_rate": 2.569434212716418e-06, "loss": 0.2544, "step": 23742, "teacher_loss": 0.2330143302679062 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.42900365591049194, "learning_rate": 2.5681631672690165e-06, "loss": 0.1987, "step": 23743, "teacher_loss": 0.1731296330690384 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.45275551080703735, "learning_rate": 2.5668924068463605e-06, "loss": 0.2662, "step": 23744, "teacher_loss": 0.24547737836837769 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.6328752636909485, "learning_rate": 2.5656219314775886e-06, "loss": 0.3254, "step": 23745, "teacher_loss": 0.2912237048149109 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.38416627049446106, "learning_rate": 2.5643517411918273e-06, "loss": 0.2084, "step": 23746, "teacher_loss": 0.1888643503189087 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.31591060757637024, "learning_rate": 2.5630818360181877e-06, "loss": 0.1826, "step": 23747, "teacher_loss": 0.16783419251441956 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.1476793736219406, "learning_rate": 2.5618122159858025e-06, "loss": 0.1522, "step": 23748, "teacher_loss": 0.15265515446662903 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.2059365212917328, "learning_rate": 2.5605428811237696e-06, "loss": 0.1338, "step": 23749, "teacher_loss": 0.1258166879415512 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.7105777263641357, "learning_rate": 2.5592738314611906e-06, "loss": 0.3561, "step": 23750, "teacher_loss": 0.31666165590286255 }, { "epoch": 4.29, "eval_exact_match": 80.35950804162725, "eval_f1": 87.73413237727307, "step": 23750 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.6139076948165894, "learning_rate": 2.558005067027164e-06, "loss": 0.2031, "step": 23751, "teacher_loss": 0.15742570161819458 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.6187229156494141, "learning_rate": 2.5567365878507805e-06, "loss": 0.2342, "step": 23752, "teacher_loss": 0.19143350422382355 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.5888549089431763, "learning_rate": 2.5554683939611172e-06, "loss": 0.2146, "step": 23753, "teacher_loss": 0.17300280928611755 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.6130135655403137, "learning_rate": 2.5542004853872537e-06, "loss": 0.2386, "step": 23754, "teacher_loss": 0.19705167412757874 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.5799407958984375, "learning_rate": 2.552932862158261e-06, "loss": 0.2496, "step": 23755, "teacher_loss": 0.21286708116531372 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.12938010692596436, "learning_rate": 2.5516655243031962e-06, "loss": 0.1551, "step": 23756, "teacher_loss": 0.15790340304374695 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.5885113477706909, "learning_rate": 2.5503984718511193e-06, "loss": 0.2381, "step": 23757, "teacher_loss": 0.19914071261882782 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.3127386271953583, "learning_rate": 2.5491317048310834e-06, "loss": 0.3631, "step": 23758, "teacher_loss": 0.3687482178211212 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.4303865134716034, "learning_rate": 2.5478652232721245e-06, "loss": 0.2308, "step": 23759, "teacher_loss": 0.2086786925792694 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.4042850434780121, "learning_rate": 2.546599027203282e-06, "loss": 0.1974, "step": 23760, "teacher_loss": 0.1743844747543335 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.4434024691581726, "learning_rate": 2.545333116653589e-06, "loss": 0.1982, "step": 23761, "teacher_loss": 0.17091898620128632 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.45673567056655884, "learning_rate": 2.5440674916520682e-06, "loss": 0.2736, "step": 23762, "teacher_loss": 0.25323793292045593 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.4874110221862793, "learning_rate": 2.542802152227731e-06, "loss": 0.1773, "step": 23763, "teacher_loss": 0.1428537219762802 }, { "compression_loss": 0.0, "epoch": 4.29, "label_loss": 0.5292913913726807, "learning_rate": 2.5415370984095928e-06, "loss": 0.2236, "step": 23764, "teacher_loss": 0.18958157300949097 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.4316334128379822, "learning_rate": 2.540272330226658e-06, "loss": 0.2063, "step": 23765, "teacher_loss": 0.18131625652313232 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.24590478837490082, "learning_rate": 2.5390078477079204e-06, "loss": 0.1999, "step": 23766, "teacher_loss": 0.19474168121814728 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.2540549635887146, "learning_rate": 2.537743650882372e-06, "loss": 0.1719, "step": 23767, "teacher_loss": 0.1628093123435974 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.20236222445964813, "learning_rate": 2.536479739779003e-06, "loss": 0.1402, "step": 23768, "teacher_loss": 0.1332767903804779 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.7536660432815552, "learning_rate": 2.5352161144267798e-06, "loss": 0.3642, "step": 23769, "teacher_loss": 0.32095837593078613 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.20999716222286224, "learning_rate": 2.5339527748546814e-06, "loss": 0.1621, "step": 23770, "teacher_loss": 0.1567956805229187 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.5529207587242126, "learning_rate": 2.5326897210916746e-06, "loss": 0.2539, "step": 23771, "teacher_loss": 0.22064395248889923 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.21606501936912537, "learning_rate": 2.5314269531667108e-06, "loss": 0.145, "step": 23772, "teacher_loss": 0.1371353566646576 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.30948394536972046, "learning_rate": 2.530164471108745e-06, "loss": 0.2303, "step": 23773, "teacher_loss": 0.22154507040977478 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.7455642819404602, "learning_rate": 2.52890227494672e-06, "loss": 0.323, "step": 23774, "teacher_loss": 0.27603402733802795 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.5764243602752686, "learning_rate": 2.5276403647095752e-06, "loss": 0.2465, "step": 23775, "teacher_loss": 0.20983752608299255 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.2964906692504883, "learning_rate": 2.526378740426246e-06, "loss": 0.2291, "step": 23776, "teacher_loss": 0.22162553668022156 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.3521164059638977, "learning_rate": 2.5251174021256514e-06, "loss": 0.2235, "step": 23777, "teacher_loss": 0.20921283960342407 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.6573386192321777, "learning_rate": 2.5238563498367123e-06, "loss": 0.3724, "step": 23778, "teacher_loss": 0.3407594561576843 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.260146826505661, "learning_rate": 2.5225955835883465e-06, "loss": 0.1641, "step": 23779, "teacher_loss": 0.15339714288711548 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.21703004837036133, "learning_rate": 2.52133510340945e-06, "loss": 0.1598, "step": 23780, "teacher_loss": 0.1534246951341629 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.4301258325576782, "learning_rate": 2.5200749093289306e-06, "loss": 0.2057, "step": 23781, "teacher_loss": 0.18078739941120148 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.19837181270122528, "learning_rate": 2.5188150013756727e-06, "loss": 0.1856, "step": 23782, "teacher_loss": 0.18417882919311523 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.2406182587146759, "learning_rate": 2.5175553795785657e-06, "loss": 0.1599, "step": 23783, "teacher_loss": 0.15089094638824463 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.7943019270896912, "learning_rate": 2.516296043966494e-06, "loss": 0.2647, "step": 23784, "teacher_loss": 0.20584538578987122 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.2987392544746399, "learning_rate": 2.5150369945683233e-06, "loss": 0.1882, "step": 23785, "teacher_loss": 0.17588341236114502 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.3883625864982605, "learning_rate": 2.5137782314129255e-06, "loss": 0.2015, "step": 23786, "teacher_loss": 0.18076877295970917 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.45821377635002136, "learning_rate": 2.5125197545291524e-06, "loss": 0.1958, "step": 23787, "teacher_loss": 0.16664782166481018 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.5024231672286987, "learning_rate": 2.5112615639458626e-06, "loss": 0.2038, "step": 23788, "teacher_loss": 0.17067420482635498 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.673396110534668, "learning_rate": 2.5100036596919064e-06, "loss": 0.2418, "step": 23789, "teacher_loss": 0.1938217133283615 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.2702580690383911, "learning_rate": 2.5087460417961155e-06, "loss": 0.2336, "step": 23790, "teacher_loss": 0.22954952716827393 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.5539103746414185, "learning_rate": 2.5074887102873255e-06, "loss": 0.191, "step": 23791, "teacher_loss": 0.1506558507680893 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.8834432363510132, "learning_rate": 2.50623166519437e-06, "loss": 0.2313, "step": 23792, "teacher_loss": 0.15885570645332336 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.25771063566207886, "learning_rate": 2.504974906546061e-06, "loss": 0.206, "step": 23793, "teacher_loss": 0.20021876692771912 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.5256583094596863, "learning_rate": 2.5037184343712147e-06, "loss": 0.2653, "step": 23794, "teacher_loss": 0.23638774454593658 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.4426930546760559, "learning_rate": 2.5024622486986425e-06, "loss": 0.1829, "step": 23795, "teacher_loss": 0.1540175974369049 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.41718316078186035, "learning_rate": 2.501206349557141e-06, "loss": 0.2059, "step": 23796, "teacher_loss": 0.18241557478904724 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.5743406414985657, "learning_rate": 2.499950736975498e-06, "loss": 0.2322, "step": 23797, "teacher_loss": 0.1941579282283783 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.4761444926261902, "learning_rate": 2.498695410982515e-06, "loss": 0.2226, "step": 23798, "teacher_loss": 0.19440913200378418 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.18100130558013916, "learning_rate": 2.4974403716069648e-06, "loss": 0.1724, "step": 23799, "teacher_loss": 0.17143908143043518 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.15965360403060913, "learning_rate": 2.4961856188776185e-06, "loss": 0.1512, "step": 23800, "teacher_loss": 0.15023568272590637 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.1399000734090805, "learning_rate": 2.494931152823247e-06, "loss": 0.1403, "step": 23801, "teacher_loss": 0.14036494493484497 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.17357763648033142, "learning_rate": 2.4936769734726166e-06, "loss": 0.1539, "step": 23802, "teacher_loss": 0.15173056721687317 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.1769634187221527, "learning_rate": 2.492423080854474e-06, "loss": 0.1605, "step": 23803, "teacher_loss": 0.1586201786994934 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.43957751989364624, "learning_rate": 2.4911694749975693e-06, "loss": 0.2639, "step": 23804, "teacher_loss": 0.24432754516601562 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.6758774518966675, "learning_rate": 2.48991615593065e-06, "loss": 0.2416, "step": 23805, "teacher_loss": 0.1933761090040207 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.24798092246055603, "learning_rate": 2.4886631236824457e-06, "loss": 0.1635, "step": 23806, "teacher_loss": 0.15416103601455688 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.8544960021972656, "learning_rate": 2.4874103782816777e-06, "loss": 0.3367, "step": 23807, "teacher_loss": 0.2792031764984131 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.4296036958694458, "learning_rate": 2.4861579197570805e-06, "loss": 0.263, "step": 23808, "teacher_loss": 0.24443665146827698 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.5193527936935425, "learning_rate": 2.4849057481373665e-06, "loss": 0.2197, "step": 23809, "teacher_loss": 0.18639755249023438 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.5704477429389954, "learning_rate": 2.4836538634512327e-06, "loss": 0.2567, "step": 23810, "teacher_loss": 0.22183957695960999 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.6073932647705078, "learning_rate": 2.4824022657273982e-06, "loss": 0.223, "step": 23811, "teacher_loss": 0.18032227456569672 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.5477582216262817, "learning_rate": 2.4811509549945495e-06, "loss": 0.3636, "step": 23812, "teacher_loss": 0.3431586027145386 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.5420389175415039, "learning_rate": 2.479899931281372e-06, "loss": 0.2945, "step": 23813, "teacher_loss": 0.2669578790664673 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.6106827855110168, "learning_rate": 2.4786491946165513e-06, "loss": 0.2727, "step": 23814, "teacher_loss": 0.23517176508903503 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.6508402824401855, "learning_rate": 2.4773987450287676e-06, "loss": 0.3378, "step": 23815, "teacher_loss": 0.3030053377151489 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.26316964626312256, "learning_rate": 2.4761485825466833e-06, "loss": 0.2458, "step": 23816, "teacher_loss": 0.24381719529628754 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.20813339948654175, "learning_rate": 2.4748987071989637e-06, "loss": 0.1682, "step": 23817, "teacher_loss": 0.16370925307273865 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.747562050819397, "learning_rate": 2.4736491190142673e-06, "loss": 0.4468, "step": 23818, "teacher_loss": 0.4133702516555786 }, { "compression_loss": 0.0, "epoch": 4.3, "label_loss": 0.2551780939102173, "learning_rate": 2.4723998180212366e-06, "loss": 0.1741, "step": 23819, "teacher_loss": 0.16505509614944458 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.2800525426864624, "learning_rate": 2.47115080424852e-06, "loss": 0.1634, "step": 23820, "teacher_loss": 0.15049193799495697 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.4618319272994995, "learning_rate": 2.4699020777247545e-06, "loss": 0.3396, "step": 23821, "teacher_loss": 0.32604295015335083 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.46803539991378784, "learning_rate": 2.468653638478567e-06, "loss": 0.2079, "step": 23822, "teacher_loss": 0.17895999550819397 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.568474531173706, "learning_rate": 2.4674054865385775e-06, "loss": 0.2291, "step": 23823, "teacher_loss": 0.19134452939033508 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.10933741182088852, "learning_rate": 2.4661576219334054e-06, "loss": 0.1574, "step": 23824, "teacher_loss": 0.16276784241199493 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.1125771701335907, "learning_rate": 2.4649100446916646e-06, "loss": 0.1662, "step": 23825, "teacher_loss": 0.17211046814918518 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.5954536199569702, "learning_rate": 2.4636627548419486e-06, "loss": 0.2485, "step": 23826, "teacher_loss": 0.2099829912185669 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.42176496982574463, "learning_rate": 2.462415752412862e-06, "loss": 0.2178, "step": 23827, "teacher_loss": 0.19509148597717285 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.21014374494552612, "learning_rate": 2.4611690374329965e-06, "loss": 0.2016, "step": 23828, "teacher_loss": 0.20066046714782715 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.34823089838027954, "learning_rate": 2.4599226099309274e-06, "loss": 0.1566, "step": 23829, "teacher_loss": 0.13529442250728607 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.5129961371421814, "learning_rate": 2.4586764699352353e-06, "loss": 0.2196, "step": 23830, "teacher_loss": 0.18697024881839752 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.9168434739112854, "learning_rate": 2.4574306174744943e-06, "loss": 0.3457, "step": 23831, "teacher_loss": 0.2822802662849426 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.3887714743614197, "learning_rate": 2.4561850525772613e-06, "loss": 0.2082, "step": 23832, "teacher_loss": 0.1880871057510376 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.2686925530433655, "learning_rate": 2.4549397752720966e-06, "loss": 0.2274, "step": 23833, "teacher_loss": 0.22280171513557434 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.10843300074338913, "learning_rate": 2.4536947855875558e-06, "loss": 0.1213, "step": 23834, "teacher_loss": 0.12276796251535416 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.4365948438644409, "learning_rate": 2.452450083552173e-06, "loss": 0.2118, "step": 23835, "teacher_loss": 0.18684130907058716 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.6035574674606323, "learning_rate": 2.4512056691944932e-06, "loss": 0.2798, "step": 23836, "teacher_loss": 0.2438754439353943 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.19577571749687195, "learning_rate": 2.449961542543041e-06, "loss": 0.206, "step": 23837, "teacher_loss": 0.20716840028762817 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.4716640114784241, "learning_rate": 2.4487177036263448e-06, "loss": 0.1613, "step": 23838, "teacher_loss": 0.12685352563858032 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.3927239775657654, "learning_rate": 2.447474152472923e-06, "loss": 0.1667, "step": 23839, "teacher_loss": 0.14162956178188324 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.6211049556732178, "learning_rate": 2.446230889111282e-06, "loss": 0.1933, "step": 23840, "teacher_loss": 0.14578421413898468 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.2738092243671417, "learning_rate": 2.4449879135699286e-06, "loss": 0.1901, "step": 23841, "teacher_loss": 0.18084776401519775 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.49023595452308655, "learning_rate": 2.4437452258773635e-06, "loss": 0.2198, "step": 23842, "teacher_loss": 0.18974299728870392 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.45602354407310486, "learning_rate": 2.442502826062072e-06, "loss": 0.1889, "step": 23843, "teacher_loss": 0.15919554233551025 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.8991382718086243, "learning_rate": 2.441260714152543e-06, "loss": 0.2489, "step": 23844, "teacher_loss": 0.17661425471305847 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.36079782247543335, "learning_rate": 2.44001889017725e-06, "loss": 0.2013, "step": 23845, "teacher_loss": 0.18355578184127808 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.47617724537849426, "learning_rate": 2.4387773541646713e-06, "loss": 0.2382, "step": 23846, "teacher_loss": 0.21178226172924042 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.47051021456718445, "learning_rate": 2.437536106143265e-06, "loss": 0.2486, "step": 23847, "teacher_loss": 0.22398701310157776 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.4085085690021515, "learning_rate": 2.43629514614149e-06, "loss": 0.213, "step": 23848, "teacher_loss": 0.19133064150810242 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.3996298313140869, "learning_rate": 2.435054474187802e-06, "loss": 0.174, "step": 23849, "teacher_loss": 0.1488867998123169 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.13941237330436707, "learning_rate": 2.433814090310642e-06, "loss": 0.1385, "step": 23850, "teacher_loss": 0.13840574026107788 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.4587949514389038, "learning_rate": 2.4325739945384484e-06, "loss": 0.1645, "step": 23851, "teacher_loss": 0.13180112838745117 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.2695939838886261, "learning_rate": 2.431334186899657e-06, "loss": 0.1906, "step": 23852, "teacher_loss": 0.18180054426193237 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.5554072856903076, "learning_rate": 2.4300946674226864e-06, "loss": 0.3261, "step": 23853, "teacher_loss": 0.30063754320144653 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.4812760353088379, "learning_rate": 2.4288554361359592e-06, "loss": 0.2257, "step": 23854, "teacher_loss": 0.19732257723808289 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.3424815535545349, "learning_rate": 2.4276164930678895e-06, "loss": 0.1917, "step": 23855, "teacher_loss": 0.174946129322052 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.38485413789749146, "learning_rate": 2.426377838246881e-06, "loss": 0.2172, "step": 23856, "teacher_loss": 0.19852669537067413 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.2820977568626404, "learning_rate": 2.4251394717013225e-06, "loss": 0.1675, "step": 23857, "teacher_loss": 0.15473651885986328 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.33201009035110474, "learning_rate": 2.4239013934596217e-06, "loss": 0.1816, "step": 23858, "teacher_loss": 0.16483500599861145 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.4268755614757538, "learning_rate": 2.422663603550157e-06, "loss": 0.179, "step": 23859, "teacher_loss": 0.15141290426254272 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.45509421825408936, "learning_rate": 2.4214261020013013e-06, "loss": 0.2218, "step": 23860, "teacher_loss": 0.1958651840686798 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.760026216506958, "learning_rate": 2.420188888841438e-06, "loss": 0.3222, "step": 23861, "teacher_loss": 0.2735786437988281 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.24350839853286743, "learning_rate": 2.4189519640989282e-06, "loss": 0.1629, "step": 23862, "teacher_loss": 0.15390709042549133 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.6660348176956177, "learning_rate": 2.4177153278021258e-06, "loss": 0.2341, "step": 23863, "teacher_loss": 0.1861265003681183 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.23576444387435913, "learning_rate": 2.416478979979388e-06, "loss": 0.1866, "step": 23864, "teacher_loss": 0.18108394742012024 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.41180574893951416, "learning_rate": 2.415242920659062e-06, "loss": 0.1702, "step": 23865, "teacher_loss": 0.14333011209964752 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.44124647974967957, "learning_rate": 2.414007149869482e-06, "loss": 0.2053, "step": 23866, "teacher_loss": 0.17912161350250244 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.469724178314209, "learning_rate": 2.4127716676389837e-06, "loss": 0.4436, "step": 23867, "teacher_loss": 0.44067060947418213 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.8521972894668579, "learning_rate": 2.411536473995896e-06, "loss": 0.254, "step": 23868, "teacher_loss": 0.18750128149986267 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.4770563840866089, "learning_rate": 2.410301568968535e-06, "loss": 0.2269, "step": 23869, "teacher_loss": 0.1990727186203003 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.2516653537750244, "learning_rate": 2.4090669525852057e-06, "loss": 0.1767, "step": 23870, "teacher_loss": 0.1684255599975586 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.33466997742652893, "learning_rate": 2.407832624874227e-06, "loss": 0.1801, "step": 23871, "teacher_loss": 0.16287216544151306 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.48133599758148193, "learning_rate": 2.4065985858638955e-06, "loss": 0.2529, "step": 23872, "teacher_loss": 0.22756756842136383 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.3731071352958679, "learning_rate": 2.4053648355824964e-06, "loss": 0.216, "step": 23873, "teacher_loss": 0.1985270380973816 }, { "compression_loss": 0.0, "epoch": 4.31, "label_loss": 0.31552666425704956, "learning_rate": 2.404131374058322e-06, "loss": 0.1858, "step": 23874, "teacher_loss": 0.17139732837677002 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.9457638263702393, "learning_rate": 2.4028982013196533e-06, "loss": 0.2622, "step": 23875, "teacher_loss": 0.18627455830574036 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.4866615831851959, "learning_rate": 2.401665317394759e-06, "loss": 0.2059, "step": 23876, "teacher_loss": 0.17465436458587646 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.3103378415107727, "learning_rate": 2.400432722311905e-06, "loss": 0.1828, "step": 23877, "teacher_loss": 0.16863971948623657 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.37522751092910767, "learning_rate": 2.3992004160993587e-06, "loss": 0.1762, "step": 23878, "teacher_loss": 0.15412789583206177 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.2887871265411377, "learning_rate": 2.3979683987853623e-06, "loss": 0.1717, "step": 23879, "teacher_loss": 0.15871457755565643 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.7413889169692993, "learning_rate": 2.3967366703981687e-06, "loss": 0.2301, "step": 23880, "teacher_loss": 0.17330613732337952 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.24267703294754028, "learning_rate": 2.3955052309660215e-06, "loss": 0.1614, "step": 23881, "teacher_loss": 0.15238264203071594 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.42451900243759155, "learning_rate": 2.394274080517143e-06, "loss": 0.2171, "step": 23882, "teacher_loss": 0.19400498270988464 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.33517399430274963, "learning_rate": 2.393043219079768e-06, "loss": 0.2242, "step": 23883, "teacher_loss": 0.2118556797504425 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.4546704888343811, "learning_rate": 2.3918126466821165e-06, "loss": 0.2494, "step": 23884, "teacher_loss": 0.22662681341171265 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.5795440673828125, "learning_rate": 2.3905823633523997e-06, "loss": 0.2605, "step": 23885, "teacher_loss": 0.2250637710094452 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.6300973296165466, "learning_rate": 2.3893523691188216e-06, "loss": 0.3525, "step": 23886, "teacher_loss": 0.32168614864349365 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.19654031097888947, "learning_rate": 2.3881226640095847e-06, "loss": 0.1843, "step": 23887, "teacher_loss": 0.18289095163345337 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.37306350469589233, "learning_rate": 2.3868932480528867e-06, "loss": 0.181, "step": 23888, "teacher_loss": 0.159670889377594 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.9259461164474487, "learning_rate": 2.3856641212769057e-06, "loss": 0.2978, "step": 23889, "teacher_loss": 0.2279871553182602 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.16115117073059082, "learning_rate": 2.3844352837098286e-06, "loss": 0.1227, "step": 23890, "teacher_loss": 0.11840471625328064 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.29567861557006836, "learning_rate": 2.3832067353798294e-06, "loss": 0.21, "step": 23891, "teacher_loss": 0.2004910707473755 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.1721012145280838, "learning_rate": 2.381978476315068e-06, "loss": 0.142, "step": 23892, "teacher_loss": 0.1386529803276062 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.6027420163154602, "learning_rate": 2.3807505065437113e-06, "loss": 0.2101, "step": 23893, "teacher_loss": 0.1664503663778305 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.27796274423599243, "learning_rate": 2.3795228260939146e-06, "loss": 0.1618, "step": 23894, "teacher_loss": 0.14889875054359436 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.18522487580776215, "learning_rate": 2.378295434993817e-06, "loss": 0.2036, "step": 23895, "teacher_loss": 0.20562222599983215 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.43860286474227905, "learning_rate": 2.377068333271566e-06, "loss": 0.2588, "step": 23896, "teacher_loss": 0.23887313902378082 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.3722781240940094, "learning_rate": 2.3758415209552893e-06, "loss": 0.1865, "step": 23897, "teacher_loss": 0.16590192914009094 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.35110148787498474, "learning_rate": 2.374614998073119e-06, "loss": 0.1886, "step": 23898, "teacher_loss": 0.17056719958782196 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.6511486172676086, "learning_rate": 2.3733887646531767e-06, "loss": 0.2218, "step": 23899, "teacher_loss": 0.17406833171844482 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.31316378712654114, "learning_rate": 2.372162820723569e-06, "loss": 0.2009, "step": 23900, "teacher_loss": 0.18847988545894623 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.3417724072933197, "learning_rate": 2.3709371663124073e-06, "loss": 0.1842, "step": 23901, "teacher_loss": 0.16670268774032593 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.47425639629364014, "learning_rate": 2.3697118014477974e-06, "loss": 0.1739, "step": 23902, "teacher_loss": 0.14055943489074707 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.5473437309265137, "learning_rate": 2.3684867261578236e-06, "loss": 0.2306, "step": 23903, "teacher_loss": 0.1954355239868164 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.3548859655857086, "learning_rate": 2.36726194047058e-06, "loss": 0.179, "step": 23904, "teacher_loss": 0.1594819873571396 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.5557812452316284, "learning_rate": 2.3660374444141468e-06, "loss": 0.246, "step": 23905, "teacher_loss": 0.2116013467311859 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.3208061456680298, "learning_rate": 2.3648132380165927e-06, "loss": 0.217, "step": 23906, "teacher_loss": 0.2054111659526825 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.10957368463277817, "learning_rate": 2.363589321305993e-06, "loss": 0.2082, "step": 23907, "teacher_loss": 0.21916519105434418 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.4408269226551056, "learning_rate": 2.3623656943104015e-06, "loss": 0.2311, "step": 23908, "teacher_loss": 0.20784947276115417 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.38090503215789795, "learning_rate": 2.361142357057878e-06, "loss": 0.2389, "step": 23909, "teacher_loss": 0.22308112680912018 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.26425451040267944, "learning_rate": 2.359919309576462e-06, "loss": 0.1898, "step": 23910, "teacher_loss": 0.18153586983680725 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.41465017199516296, "learning_rate": 2.3586965518942024e-06, "loss": 0.2, "step": 23911, "teacher_loss": 0.1762000024318695 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.42134973406791687, "learning_rate": 2.357474084039131e-06, "loss": 0.2102, "step": 23912, "teacher_loss": 0.1867617517709732 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.637391209602356, "learning_rate": 2.356251906039272e-06, "loss": 0.2311, "step": 23913, "teacher_loss": 0.18595662713050842 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.2097444087266922, "learning_rate": 2.3550300179226495e-06, "loss": 0.2016, "step": 23914, "teacher_loss": 0.20068207383155823 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.2666570544242859, "learning_rate": 2.3538084197172793e-06, "loss": 0.1414, "step": 23915, "teacher_loss": 0.12745575606822968 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.5167502760887146, "learning_rate": 2.352587111451165e-06, "loss": 0.2202, "step": 23916, "teacher_loss": 0.18729762732982635 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.2968178987503052, "learning_rate": 2.3513660931523073e-06, "loss": 0.1681, "step": 23917, "teacher_loss": 0.15382464230060577 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.5659321546554565, "learning_rate": 2.3501453648487075e-06, "loss": 0.2714, "step": 23918, "teacher_loss": 0.23864081501960754 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.32961222529411316, "learning_rate": 2.348924926568348e-06, "loss": 0.18, "step": 23919, "teacher_loss": 0.16337840259075165 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.5843708515167236, "learning_rate": 2.347704778339202e-06, "loss": 0.2299, "step": 23920, "teacher_loss": 0.19050216674804688 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.190871000289917, "learning_rate": 2.3464849201892596e-06, "loss": 0.2396, "step": 23921, "teacher_loss": 0.24503040313720703 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.5453879237174988, "learning_rate": 2.3452653521464824e-06, "loss": 0.2554, "step": 23922, "teacher_loss": 0.22319284081459045 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.633758008480072, "learning_rate": 2.3440460742388243e-06, "loss": 0.2428, "step": 23923, "teacher_loss": 0.19933396577835083 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.9109911918640137, "learning_rate": 2.3428270864942446e-06, "loss": 0.296, "step": 23924, "teacher_loss": 0.22764885425567627 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.2388249635696411, "learning_rate": 2.3416083889406963e-06, "loss": 0.1824, "step": 23925, "teacher_loss": 0.176140695810318 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.333368182182312, "learning_rate": 2.3403899816061135e-06, "loss": 0.2113, "step": 23926, "teacher_loss": 0.19775158166885376 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.3298936188220978, "learning_rate": 2.3391718645184307e-06, "loss": 0.2278, "step": 23927, "teacher_loss": 0.21647566556930542 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.6233240365982056, "learning_rate": 2.3379540377055834e-06, "loss": 0.1815, "step": 23928, "teacher_loss": 0.13236692547798157 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.9931387901306152, "learning_rate": 2.3367365011954865e-06, "loss": 0.2743, "step": 23929, "teacher_loss": 0.1943785548210144 }, { "compression_loss": 0.0, "epoch": 4.32, "label_loss": 0.30954569578170776, "learning_rate": 2.3355192550160475e-06, "loss": 0.1665, "step": 23930, "teacher_loss": 0.15056119859218597 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.20499663054943085, "learning_rate": 2.334302299195189e-06, "loss": 0.1465, "step": 23931, "teacher_loss": 0.13994693756103516 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.34827184677124023, "learning_rate": 2.3330856337608054e-06, "loss": 0.3122, "step": 23932, "teacher_loss": 0.30821114778518677 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.5599461793899536, "learning_rate": 2.3318692587407842e-06, "loss": 0.1621, "step": 23933, "teacher_loss": 0.11793524026870728 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.48645541071891785, "learning_rate": 2.3306531741630265e-06, "loss": 0.2693, "step": 23934, "teacher_loss": 0.2451891303062439 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.612858235836029, "learning_rate": 2.329437380055407e-06, "loss": 0.2005, "step": 23935, "teacher_loss": 0.15466400980949402 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.7014422416687012, "learning_rate": 2.328221876445796e-06, "loss": 0.2589, "step": 23936, "teacher_loss": 0.2097308486700058 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.8429751396179199, "learning_rate": 2.3270066633620656e-06, "loss": 0.2519, "step": 23937, "teacher_loss": 0.1862192451953888 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.514324963092804, "learning_rate": 2.325791740832081e-06, "loss": 0.2607, "step": 23938, "teacher_loss": 0.23248812556266785 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.5227470993995667, "learning_rate": 2.324577108883689e-06, "loss": 0.234, "step": 23939, "teacher_loss": 0.20190829038619995 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.6845039129257202, "learning_rate": 2.3233627675447394e-06, "loss": 0.2624, "step": 23940, "teacher_loss": 0.2155291736125946 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.21631315350532532, "learning_rate": 2.322148716843081e-06, "loss": 0.1552, "step": 23941, "teacher_loss": 0.14838644862174988 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.31922048330307007, "learning_rate": 2.320934956806536e-06, "loss": 0.1448, "step": 23942, "teacher_loss": 0.1253708451986313 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.6853855848312378, "learning_rate": 2.3197214874629413e-06, "loss": 0.2634, "step": 23943, "teacher_loss": 0.21648722887039185 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.18802130222320557, "learning_rate": 2.318508308840117e-06, "loss": 0.1938, "step": 23944, "teacher_loss": 0.19443315267562866 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.3451688289642334, "learning_rate": 2.3172954209658748e-06, "loss": 0.2163, "step": 23945, "teacher_loss": 0.20197612047195435 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.6660696268081665, "learning_rate": 2.3160828238680256e-06, "loss": 0.2442, "step": 23946, "teacher_loss": 0.1973493993282318 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.5614475011825562, "learning_rate": 2.314870517574365e-06, "loss": 0.216, "step": 23947, "teacher_loss": 0.1776343584060669 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.3613615036010742, "learning_rate": 2.3136585021126965e-06, "loss": 0.2308, "step": 23948, "teacher_loss": 0.21627968549728394 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.5383799076080322, "learning_rate": 2.3124467775107975e-06, "loss": 0.2569, "step": 23949, "teacher_loss": 0.22563251852989197 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.9112308025360107, "learning_rate": 2.311235343796456e-06, "loss": 0.3528, "step": 23950, "teacher_loss": 0.29072660207748413 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.47813916206359863, "learning_rate": 2.31002420099745e-06, "loss": 0.1961, "step": 23951, "teacher_loss": 0.16476452350616455 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.5244801640510559, "learning_rate": 2.3088133491415365e-06, "loss": 0.2348, "step": 23952, "teacher_loss": 0.20258218050003052 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.2677934169769287, "learning_rate": 2.307602788256484e-06, "loss": 0.2032, "step": 23953, "teacher_loss": 0.19604431092739105 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.2846108675003052, "learning_rate": 2.3063925183700494e-06, "loss": 0.2086, "step": 23954, "teacher_loss": 0.20016473531723022 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.46850287914276123, "learning_rate": 2.3051825395099734e-06, "loss": 0.2324, "step": 23955, "teacher_loss": 0.20621876418590546 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.3939194679260254, "learning_rate": 2.3039728517039998e-06, "loss": 0.2034, "step": 23956, "teacher_loss": 0.1822763979434967 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.40213149785995483, "learning_rate": 2.302763454979868e-06, "loss": 0.2674, "step": 23957, "teacher_loss": 0.2524672746658325 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.3609432578086853, "learning_rate": 2.301554349365298e-06, "loss": 0.2937, "step": 23958, "teacher_loss": 0.2861797511577606 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.5048679113388062, "learning_rate": 2.3003455348880205e-06, "loss": 0.2381, "step": 23959, "teacher_loss": 0.2084188163280487 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.28921329975128174, "learning_rate": 2.2991370115757383e-06, "loss": 0.1617, "step": 23960, "teacher_loss": 0.14748495817184448 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.6287018060684204, "learning_rate": 2.297928779456166e-06, "loss": 0.2208, "step": 23961, "teacher_loss": 0.17549774050712585 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.5425493717193604, "learning_rate": 2.2967208385570084e-06, "loss": 0.2184, "step": 23962, "teacher_loss": 0.18235903978347778 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.11756950616836548, "learning_rate": 2.295513188905951e-06, "loss": 0.1751, "step": 23963, "teacher_loss": 0.18150639533996582 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.6002629399299622, "learning_rate": 2.2943058305306856e-06, "loss": 0.2558, "step": 23964, "teacher_loss": 0.2175242304801941 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.24027487635612488, "learning_rate": 2.2930987634588983e-06, "loss": 0.1978, "step": 23965, "teacher_loss": 0.1931256204843521 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.3135644793510437, "learning_rate": 2.291891987718257e-06, "loss": 0.2693, "step": 23966, "teacher_loss": 0.2643556594848633 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.19681869447231293, "learning_rate": 2.290685503336429e-06, "loss": 0.1835, "step": 23967, "teacher_loss": 0.18202456831932068 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.38821619749069214, "learning_rate": 2.289479310341083e-06, "loss": 0.2654, "step": 23968, "teacher_loss": 0.2517406940460205 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.2564573585987091, "learning_rate": 2.2882734087598685e-06, "loss": 0.2019, "step": 23969, "teacher_loss": 0.19582590460777283 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.14615774154663086, "learning_rate": 2.287067798620429e-06, "loss": 0.1553, "step": 23970, "teacher_loss": 0.15634512901306152 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.32785242795944214, "learning_rate": 2.2858624799504086e-06, "loss": 0.1947, "step": 23971, "teacher_loss": 0.17985573410987854 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.20455320179462433, "learning_rate": 2.2846574527774476e-06, "loss": 0.1582, "step": 23972, "teacher_loss": 0.1530318558216095 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.5766096115112305, "learning_rate": 2.283452717129163e-06, "loss": 0.2153, "step": 23973, "teacher_loss": 0.17520524561405182 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.7136630415916443, "learning_rate": 2.282248273033184e-06, "loss": 0.2746, "step": 23974, "teacher_loss": 0.22585183382034302 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 1.1593677997589111, "learning_rate": 2.2810441205171236e-06, "loss": 0.3139, "step": 23975, "teacher_loss": 0.21996553242206573 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.5279066562652588, "learning_rate": 2.279840259608586e-06, "loss": 0.1853, "step": 23976, "teacher_loss": 0.14726190268993378 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.5876827239990234, "learning_rate": 2.2786366903351747e-06, "loss": 0.1734, "step": 23977, "teacher_loss": 0.1273704171180725 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.383453369140625, "learning_rate": 2.2774334127244856e-06, "loss": 0.2264, "step": 23978, "teacher_loss": 0.20899195969104767 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.26491016149520874, "learning_rate": 2.2762304268041053e-06, "loss": 0.2031, "step": 23979, "teacher_loss": 0.19623857736587524 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.20869015157222748, "learning_rate": 2.2750277326016043e-06, "loss": 0.1346, "step": 23980, "teacher_loss": 0.12638501822948456 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.3733154833316803, "learning_rate": 2.273825330144575e-06, "loss": 0.2116, "step": 23981, "teacher_loss": 0.1936642825603485 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.13564841449260712, "learning_rate": 2.2726232194605747e-06, "loss": 0.1347, "step": 23982, "teacher_loss": 0.13454213738441467 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.6553156971931458, "learning_rate": 2.2714214005771595e-06, "loss": 0.185, "step": 23983, "teacher_loss": 0.1326877623796463 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.41339778900146484, "learning_rate": 2.270219873521896e-06, "loss": 0.2617, "step": 23984, "teacher_loss": 0.24485361576080322 }, { "compression_loss": 0.0, "epoch": 4.33, "label_loss": 0.28764328360557556, "learning_rate": 2.269018638322324e-06, "loss": 0.1743, "step": 23985, "teacher_loss": 0.16166174411773682 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.33214449882507324, "learning_rate": 2.2678176950059826e-06, "loss": 0.2087, "step": 23986, "teacher_loss": 0.19494515657424927 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.2930101156234741, "learning_rate": 2.2666170436004084e-06, "loss": 0.1355, "step": 23987, "teacher_loss": 0.11795730888843536 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.4901461601257324, "learning_rate": 2.2654166841331315e-06, "loss": 0.197, "step": 23988, "teacher_loss": 0.1644403040409088 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.41059041023254395, "learning_rate": 2.2642166166316676e-06, "loss": 0.1769, "step": 23989, "teacher_loss": 0.15092208981513977 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.2745780348777771, "learning_rate": 2.2630168411235315e-06, "loss": 0.2375, "step": 23990, "teacher_loss": 0.23334476351737976 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 1.0527151823043823, "learning_rate": 2.261817357636236e-06, "loss": 0.3216, "step": 23991, "teacher_loss": 0.240375816822052 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.4498971700668335, "learning_rate": 2.260618166197276e-06, "loss": 0.2159, "step": 23992, "teacher_loss": 0.1898496150970459 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.36716943979263306, "learning_rate": 2.2594192668341417e-06, "loss": 0.1754, "step": 23993, "teacher_loss": 0.1540842056274414 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.34837278723716736, "learning_rate": 2.2582206595743323e-06, "loss": 0.3063, "step": 23994, "teacher_loss": 0.30162712931632996 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.7202326059341431, "learning_rate": 2.2570223444453203e-06, "loss": 0.4423, "step": 23995, "teacher_loss": 0.41144686937332153 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.31576868891716003, "learning_rate": 2.255824321474576e-06, "loss": 0.2031, "step": 23996, "teacher_loss": 0.19055622816085815 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.8264892101287842, "learning_rate": 2.2546265906895726e-06, "loss": 0.221, "step": 23997, "teacher_loss": 0.15368813276290894 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.18294093012809753, "learning_rate": 2.2534291521177725e-06, "loss": 0.2155, "step": 23998, "teacher_loss": 0.21916446089744568 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.4606604278087616, "learning_rate": 2.2522320057866223e-06, "loss": 0.1811, "step": 23999, "teacher_loss": 0.15003514289855957 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.2972765564918518, "learning_rate": 2.2510351517235722e-06, "loss": 0.1687, "step": 24000, "teacher_loss": 0.15437576174736023 }, { "epoch": 4.34, "eval_exact_match": 80.4162724692526, "eval_f1": 87.7545031856987, "step": 24000 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.5671945810317993, "learning_rate": 2.249838589956065e-06, "loss": 0.2619, "step": 24001, "teacher_loss": 0.2279355823993683 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.5974215865135193, "learning_rate": 2.2486423205115297e-06, "loss": 0.2258, "step": 24002, "teacher_loss": 0.1844923496246338 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.27336937189102173, "learning_rate": 2.2474463434173953e-06, "loss": 0.1901, "step": 24003, "teacher_loss": 0.18086571991443634 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.4351341724395752, "learning_rate": 2.246250658701086e-06, "loss": 0.1554, "step": 24004, "teacher_loss": 0.12434118986129761 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.1674986481666565, "learning_rate": 2.245055266390007e-06, "loss": 0.1997, "step": 24005, "teacher_loss": 0.20327094197273254 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.5593927502632141, "learning_rate": 2.2438601665115693e-06, "loss": 0.2579, "step": 24006, "teacher_loss": 0.2244146168231964 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.7360590100288391, "learning_rate": 2.242665359093178e-06, "loss": 0.2534, "step": 24007, "teacher_loss": 0.19975420832633972 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.7808851003646851, "learning_rate": 2.241470844162217e-06, "loss": 0.2893, "step": 24008, "teacher_loss": 0.2346796691417694 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.45372313261032104, "learning_rate": 2.2402766217460808e-06, "loss": 0.1975, "step": 24009, "teacher_loss": 0.16908572614192963 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.29683762788772583, "learning_rate": 2.239082691872143e-06, "loss": 0.1454, "step": 24010, "teacher_loss": 0.12856057286262512 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.12044206261634827, "learning_rate": 2.237889054567782e-06, "loss": 0.214, "step": 24011, "teacher_loss": 0.2243853658437729 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.2856326103210449, "learning_rate": 2.236695709860361e-06, "loss": 0.2074, "step": 24012, "teacher_loss": 0.1986769735813141 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.3515746593475342, "learning_rate": 2.2355026577772402e-06, "loss": 0.228, "step": 24013, "teacher_loss": 0.2142791450023651 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.2595018744468689, "learning_rate": 2.2343098983457754e-06, "loss": 0.154, "step": 24014, "teacher_loss": 0.14223405718803406 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.2016221582889557, "learning_rate": 2.2331174315933102e-06, "loss": 0.2365, "step": 24015, "teacher_loss": 0.24041730165481567 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.43893638253211975, "learning_rate": 2.231925257547184e-06, "loss": 0.2073, "step": 24016, "teacher_loss": 0.1815786063671112 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.2347278594970703, "learning_rate": 2.230733376234733e-06, "loss": 0.1879, "step": 24017, "teacher_loss": 0.18270018696784973 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.6399128437042236, "learning_rate": 2.2295417876832795e-06, "loss": 0.3248, "step": 24018, "teacher_loss": 0.2897520661354065 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.403209924697876, "learning_rate": 2.228350491920148e-06, "loss": 0.1618, "step": 24019, "teacher_loss": 0.13497664034366608 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.64995938539505, "learning_rate": 2.2271594889726467e-06, "loss": 0.2449, "step": 24020, "teacher_loss": 0.19987425208091736 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.8262883424758911, "learning_rate": 2.2259687788680817e-06, "loss": 0.2806, "step": 24021, "teacher_loss": 0.2199312150478363 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.17706947028636932, "learning_rate": 2.2247783616337564e-06, "loss": 0.1899, "step": 24022, "teacher_loss": 0.19136342406272888 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.23855794966220856, "learning_rate": 2.223588237296959e-06, "loss": 0.1814, "step": 24023, "teacher_loss": 0.17504951357841492 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.3920797109603882, "learning_rate": 2.2223984058849776e-06, "loss": 0.1899, "step": 24024, "teacher_loss": 0.16746732592582703 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.4302196502685547, "learning_rate": 2.221208867425096e-06, "loss": 0.1941, "step": 24025, "teacher_loss": 0.16787764430046082 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.3299451470375061, "learning_rate": 2.220019621944578e-06, "loss": 0.2342, "step": 24026, "teacher_loss": 0.2235976755619049 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.15167132019996643, "learning_rate": 2.2188306694706924e-06, "loss": 0.2695, "step": 24027, "teacher_loss": 0.2825429439544678 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.592416524887085, "learning_rate": 2.217642010030704e-06, "loss": 0.2124, "step": 24028, "teacher_loss": 0.1702287197113037 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.42218273878097534, "learning_rate": 2.2164536436518574e-06, "loss": 0.1861, "step": 24029, "teacher_loss": 0.15989606082439423 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.40831851959228516, "learning_rate": 2.215265570361406e-06, "loss": 0.2237, "step": 24030, "teacher_loss": 0.20313864946365356 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 1.3227722644805908, "learning_rate": 2.21407779018658e-06, "loss": 0.2593, "step": 24031, "teacher_loss": 0.14114999771118164 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.6363925337791443, "learning_rate": 2.2128903031546188e-06, "loss": 0.3576, "step": 24032, "teacher_loss": 0.3265949487686157 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.8205530643463135, "learning_rate": 2.2117031092927444e-06, "loss": 0.2911, "step": 24033, "teacher_loss": 0.23226453363895416 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 1.0109715461730957, "learning_rate": 2.2105162086281742e-06, "loss": 0.3184, "step": 24034, "teacher_loss": 0.24147063493728638 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.5781655311584473, "learning_rate": 2.209329601188126e-06, "loss": 0.2042, "step": 24035, "teacher_loss": 0.16265341639518738 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.268660306930542, "learning_rate": 2.2081432869997985e-06, "loss": 0.164, "step": 24036, "teacher_loss": 0.1523973047733307 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.15392056107521057, "learning_rate": 2.206957266090394e-06, "loss": 0.113, "step": 24037, "teacher_loss": 0.10848838090896606 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.4632466435432434, "learning_rate": 2.2057715384871075e-06, "loss": 0.1578, "step": 24038, "teacher_loss": 0.1238139197230339 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.21561163663864136, "learning_rate": 2.204586104217115e-06, "loss": 0.2011, "step": 24039, "teacher_loss": 0.19952334463596344 }, { "compression_loss": 0.0, "epoch": 4.34, "label_loss": 0.4543490409851074, "learning_rate": 2.203400963307602e-06, "loss": 0.2271, "step": 24040, "teacher_loss": 0.2018272876739502 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.21323370933532715, "learning_rate": 2.202216115785743e-06, "loss": 0.151, "step": 24041, "teacher_loss": 0.1441287249326706 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.6229729056358337, "learning_rate": 2.201031561678698e-06, "loss": 0.246, "step": 24042, "teacher_loss": 0.2041531801223755 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.2918744385242462, "learning_rate": 2.199847301013619e-06, "loss": 0.1533, "step": 24043, "teacher_loss": 0.13786277174949646 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.35242557525634766, "learning_rate": 2.1986633338176702e-06, "loss": 0.2097, "step": 24044, "teacher_loss": 0.1937883198261261 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.561247706413269, "learning_rate": 2.1974796601179926e-06, "loss": 0.2529, "step": 24045, "teacher_loss": 0.21868997812271118 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.1913042962551117, "learning_rate": 2.1962962799417187e-06, "loss": 0.1731, "step": 24046, "teacher_loss": 0.171085923910141 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.4552307724952698, "learning_rate": 2.1951131933159834e-06, "loss": 0.2249, "step": 24047, "teacher_loss": 0.19931060075759888 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.1914690136909485, "learning_rate": 2.193930400267914e-06, "loss": 0.1304, "step": 24048, "teacher_loss": 0.12359493970870972 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.45944905281066895, "learning_rate": 2.1927479008246233e-06, "loss": 0.2289, "step": 24049, "teacher_loss": 0.20332002639770508 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.6912100911140442, "learning_rate": 2.191565695013226e-06, "loss": 0.2125, "step": 24050, "teacher_loss": 0.1593523919582367 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.5325913429260254, "learning_rate": 2.1903837828608282e-06, "loss": 0.2184, "step": 24051, "teacher_loss": 0.1834602802991867 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.516902506351471, "learning_rate": 2.1892021643945255e-06, "loss": 0.1905, "step": 24052, "teacher_loss": 0.15426768362522125 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.428198903799057, "learning_rate": 2.1880208396413996e-06, "loss": 0.2726, "step": 24053, "teacher_loss": 0.2553304433822632 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.37251389026641846, "learning_rate": 2.186839808628553e-06, "loss": 0.2011, "step": 24054, "teacher_loss": 0.18208475410938263 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.2808970808982849, "learning_rate": 2.1856590713830516e-06, "loss": 0.1762, "step": 24055, "teacher_loss": 0.16459207236766815 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.48111385107040405, "learning_rate": 2.1844786279319625e-06, "loss": 0.2589, "step": 24056, "teacher_loss": 0.23418283462524414 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.245268315076828, "learning_rate": 2.1832984783023633e-06, "loss": 0.1847, "step": 24057, "teacher_loss": 0.17793044447898865 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.569835364818573, "learning_rate": 2.182118622521302e-06, "loss": 0.3982, "step": 24058, "teacher_loss": 0.3791613280773163 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.7276433706283569, "learning_rate": 2.1809390606158277e-06, "loss": 0.2101, "step": 24059, "teacher_loss": 0.15258993208408356 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.37281936407089233, "learning_rate": 2.179759792612987e-06, "loss": 0.1668, "step": 24060, "teacher_loss": 0.14387336373329163 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.12428450584411621, "learning_rate": 2.178580818539822e-06, "loss": 0.1681, "step": 24061, "teacher_loss": 0.1729975938796997 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.3061058223247528, "learning_rate": 2.1774021384233526e-06, "loss": 0.1658, "step": 24062, "teacher_loss": 0.15022574365139008 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.5158843994140625, "learning_rate": 2.176223752290609e-06, "loss": 0.2441, "step": 24063, "teacher_loss": 0.21386940777301788 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.3103901147842407, "learning_rate": 2.175045660168611e-06, "loss": 0.1868, "step": 24064, "teacher_loss": 0.17307725548744202 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.3477204442024231, "learning_rate": 2.1738678620843612e-06, "loss": 0.2126, "step": 24065, "teacher_loss": 0.19756081700325012 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.40253746509552, "learning_rate": 2.1726903580648666e-06, "loss": 0.2156, "step": 24066, "teacher_loss": 0.1948145180940628 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.41213327646255493, "learning_rate": 2.171513148137128e-06, "loss": 0.228, "step": 24067, "teacher_loss": 0.20753689110279083 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.3104656934738159, "learning_rate": 2.1703362323281257e-06, "loss": 0.1825, "step": 24068, "teacher_loss": 0.16827671229839325 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.2336173951625824, "learning_rate": 2.1691596106648524e-06, "loss": 0.156, "step": 24069, "teacher_loss": 0.14734283089637756 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.2792956829071045, "learning_rate": 2.1679832831742773e-06, "loss": 0.1352, "step": 24070, "teacher_loss": 0.11923306435346603 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.36622339487075806, "learning_rate": 2.1668072498833737e-06, "loss": 0.1936, "step": 24071, "teacher_loss": 0.1744169145822525 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.36221152544021606, "learning_rate": 2.1656315108191078e-06, "loss": 0.1902, "step": 24072, "teacher_loss": 0.17105789482593536 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.5037327408790588, "learning_rate": 2.1644560660084266e-06, "loss": 0.3178, "step": 24073, "teacher_loss": 0.29709190130233765 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.6849277019500732, "learning_rate": 2.163280915478289e-06, "loss": 0.6393, "step": 24074, "teacher_loss": 0.6342679858207703 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.4697837233543396, "learning_rate": 2.1621060592556312e-06, "loss": 0.2168, "step": 24075, "teacher_loss": 0.18868029117584229 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.45679864287376404, "learning_rate": 2.1609314973673905e-06, "loss": 0.2284, "step": 24076, "teacher_loss": 0.20305031538009644 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.21940001845359802, "learning_rate": 2.1597572298405e-06, "loss": 0.2657, "step": 24077, "teacher_loss": 0.2708034813404083 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.4848310947418213, "learning_rate": 2.1585832567018767e-06, "loss": 0.1956, "step": 24078, "teacher_loss": 0.16347095370292664 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.6124930381774902, "learning_rate": 2.1574095779784385e-06, "loss": 0.3337, "step": 24079, "teacher_loss": 0.30269038677215576 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.4767211973667145, "learning_rate": 2.156236193697098e-06, "loss": 0.2064, "step": 24080, "teacher_loss": 0.17635218799114227 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.23997816443443298, "learning_rate": 2.15506310388475e-06, "loss": 0.2028, "step": 24081, "teacher_loss": 0.1986789107322693 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.2863200902938843, "learning_rate": 2.1538903085682984e-06, "loss": 0.1676, "step": 24082, "teacher_loss": 0.15440011024475098 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.5700246691703796, "learning_rate": 2.1527178077746223e-06, "loss": 0.192, "step": 24083, "teacher_loss": 0.15001827478408813 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.6011705994606018, "learning_rate": 2.1515456015306096e-06, "loss": 0.2763, "step": 24084, "teacher_loss": 0.24024289846420288 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.3729507029056549, "learning_rate": 2.1503736898631384e-06, "loss": 0.1774, "step": 24085, "teacher_loss": 0.15566977858543396 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.5780166387557983, "learning_rate": 2.14920207279907e-06, "loss": 0.2374, "step": 24086, "teacher_loss": 0.1995784044265747 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.30591559410095215, "learning_rate": 2.148030750365269e-06, "loss": 0.1953, "step": 24087, "teacher_loss": 0.18295682966709137 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.36636483669281006, "learning_rate": 2.146859722588595e-06, "loss": 0.223, "step": 24088, "teacher_loss": 0.20705002546310425 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.6003850698471069, "learning_rate": 2.1456889894958864e-06, "loss": 0.2416, "step": 24089, "teacher_loss": 0.20170001685619354 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.22337886691093445, "learning_rate": 2.144518551113992e-06, "loss": 0.182, "step": 24090, "teacher_loss": 0.17737668752670288 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.513768196105957, "learning_rate": 2.143348407469747e-06, "loss": 0.2697, "step": 24091, "teacher_loss": 0.24252937734127045 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.2958924174308777, "learning_rate": 2.142178558589976e-06, "loss": 0.1714, "step": 24092, "teacher_loss": 0.15751270949840546 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.1881890445947647, "learning_rate": 2.1410090045014985e-06, "loss": 0.1421, "step": 24093, "teacher_loss": 0.13699409365653992 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.3031767010688782, "learning_rate": 2.1398397452311323e-06, "loss": 0.1916, "step": 24094, "teacher_loss": 0.1792091727256775 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.22616443037986755, "learning_rate": 2.138670780805685e-06, "loss": 0.1888, "step": 24095, "teacher_loss": 0.18464316427707672 }, { "compression_loss": 0.0, "epoch": 4.35, "label_loss": 0.1772937774658203, "learning_rate": 2.137502111251954e-06, "loss": 0.1202, "step": 24096, "teacher_loss": 0.11382432281970978 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.20297923684120178, "learning_rate": 2.136333736596738e-06, "loss": 0.1417, "step": 24097, "teacher_loss": 0.13489598035812378 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.3368719220161438, "learning_rate": 2.135165656866822e-06, "loss": 0.1511, "step": 24098, "teacher_loss": 0.13049712777137756 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.30626314878463745, "learning_rate": 2.1339978720889856e-06, "loss": 0.1621, "step": 24099, "teacher_loss": 0.14607040584087372 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.22838753461837769, "learning_rate": 2.1328303822900025e-06, "loss": 0.1827, "step": 24100, "teacher_loss": 0.17767687141895294 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.4264363646507263, "learning_rate": 2.1316631874966457e-06, "loss": 0.203, "step": 24101, "teacher_loss": 0.1781388819217682 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.6638039350509644, "learning_rate": 2.1304962877356692e-06, "loss": 0.3321, "step": 24102, "teacher_loss": 0.29519668221473694 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.6335201263427734, "learning_rate": 2.129329683033823e-06, "loss": 0.2358, "step": 24103, "teacher_loss": 0.19159609079360962 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.3509160876274109, "learning_rate": 2.1281633734178645e-06, "loss": 0.2267, "step": 24104, "teacher_loss": 0.21285486221313477 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.4193546772003174, "learning_rate": 2.1269973589145284e-06, "loss": 0.164, "step": 24105, "teacher_loss": 0.13557904958724976 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.3707237243652344, "learning_rate": 2.1258316395505406e-06, "loss": 0.1814, "step": 24106, "teacher_loss": 0.16035783290863037 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.47226423025131226, "learning_rate": 2.1246662153526407e-06, "loss": 0.2141, "step": 24107, "teacher_loss": 0.18538369238376617 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.3969842493534088, "learning_rate": 2.123501086347543e-06, "loss": 0.2752, "step": 24108, "teacher_loss": 0.2616182565689087 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.32110410928726196, "learning_rate": 2.122336252561957e-06, "loss": 0.2018, "step": 24109, "teacher_loss": 0.18854191899299622 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.853962779045105, "learning_rate": 2.1211717140225906e-06, "loss": 0.2528, "step": 24110, "teacher_loss": 0.1860085129737854 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 1.148671269416809, "learning_rate": 2.120007470756147e-06, "loss": 0.2782, "step": 24111, "teacher_loss": 0.18146607279777527 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.10295100510120392, "learning_rate": 2.1188435227893145e-06, "loss": 0.1288, "step": 24112, "teacher_loss": 0.13170501589775085 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.3872445523738861, "learning_rate": 2.117679870148779e-06, "loss": 0.1878, "step": 24113, "teacher_loss": 0.16563282907009125 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.6574846506118774, "learning_rate": 2.1165165128612235e-06, "loss": 0.2303, "step": 24114, "teacher_loss": 0.18283995985984802 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.44447746872901917, "learning_rate": 2.1153534509533174e-06, "loss": 0.1941, "step": 24115, "teacher_loss": 0.16631552577018738 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.17659206688404083, "learning_rate": 2.1141906844517207e-06, "loss": 0.1412, "step": 24116, "teacher_loss": 0.13726702332496643 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.5166727900505066, "learning_rate": 2.113028213383105e-06, "loss": 0.2705, "step": 24117, "teacher_loss": 0.2431592345237732 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.2641560137271881, "learning_rate": 2.111866037774115e-06, "loss": 0.1587, "step": 24118, "teacher_loss": 0.14694499969482422 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.15038985013961792, "learning_rate": 2.1107041576513917e-06, "loss": 0.1892, "step": 24119, "teacher_loss": 0.19353672862052917 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.6606764197349548, "learning_rate": 2.1095425730415796e-06, "loss": 0.2075, "step": 24120, "teacher_loss": 0.15713909268379211 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.3634679913520813, "learning_rate": 2.1083812839713117e-06, "loss": 0.1714, "step": 24121, "teacher_loss": 0.15001505613327026 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.6436595320701599, "learning_rate": 2.1072202904672056e-06, "loss": 0.2287, "step": 24122, "teacher_loss": 0.18261811137199402 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.6445033550262451, "learning_rate": 2.1060595925558844e-06, "loss": 0.2215, "step": 24123, "teacher_loss": 0.1744779646396637 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.5155205726623535, "learning_rate": 2.1048991902639623e-06, "loss": 0.2101, "step": 24124, "teacher_loss": 0.17621192336082458 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.2924107313156128, "learning_rate": 2.1037390836180366e-06, "loss": 0.1535, "step": 24125, "teacher_loss": 0.13808830082416534 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.3616622984409332, "learning_rate": 2.1025792726447083e-06, "loss": 0.2339, "step": 24126, "teacher_loss": 0.21969962120056152 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.34055787324905396, "learning_rate": 2.101419757370572e-06, "loss": 0.176, "step": 24127, "teacher_loss": 0.1577366441488266 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.7559992074966431, "learning_rate": 2.1002605378222068e-06, "loss": 0.2821, "step": 24128, "teacher_loss": 0.2294563204050064 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.8462119102478027, "learning_rate": 2.0991016140261903e-06, "loss": 0.3052, "step": 24129, "teacher_loss": 0.24507129192352295 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.5615392923355103, "learning_rate": 2.0979429860090985e-06, "loss": 0.224, "step": 24130, "teacher_loss": 0.1864570528268814 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.8582161664962769, "learning_rate": 2.096784653797489e-06, "loss": 0.4078, "step": 24131, "teacher_loss": 0.3578042984008789 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.16840235888957977, "learning_rate": 2.0956266174179263e-06, "loss": 0.1537, "step": 24132, "teacher_loss": 0.1520490050315857 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.4229716658592224, "learning_rate": 2.0944688768969496e-06, "loss": 0.2504, "step": 24133, "teacher_loss": 0.2312595546245575 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.353396475315094, "learning_rate": 2.093311432261111e-06, "loss": 0.1712, "step": 24134, "teacher_loss": 0.15091989934444427 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.21631215512752533, "learning_rate": 2.0921542835369472e-06, "loss": 0.1712, "step": 24135, "teacher_loss": 0.16614016890525818 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.4670332074165344, "learning_rate": 2.0909974307509826e-06, "loss": 0.466, "step": 24136, "teacher_loss": 0.4658501148223877 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.199095219373703, "learning_rate": 2.0898408739297476e-06, "loss": 0.213, "step": 24137, "teacher_loss": 0.21459971368312836 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.36358705163002014, "learning_rate": 2.0886846130997504e-06, "loss": 0.2243, "step": 24138, "teacher_loss": 0.20887590944766998 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.1979871690273285, "learning_rate": 2.087528648287507e-06, "loss": 0.1339, "step": 24139, "teacher_loss": 0.12672746181488037 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.5115912556648254, "learning_rate": 2.0863729795195196e-06, "loss": 0.1766, "step": 24140, "teacher_loss": 0.13942985236644745 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.5048018097877502, "learning_rate": 2.08521760682228e-06, "loss": 0.2367, "step": 24141, "teacher_loss": 0.20696169137954712 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.2570592761039734, "learning_rate": 2.084062530222284e-06, "loss": 0.1491, "step": 24142, "teacher_loss": 0.13714447617530823 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.5125433206558228, "learning_rate": 2.082907749746005e-06, "loss": 0.2377, "step": 24143, "teacher_loss": 0.20713132619857788 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.8262771368026733, "learning_rate": 2.081753265419925e-06, "loss": 0.2367, "step": 24144, "teacher_loss": 0.1712222695350647 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.4055216908454895, "learning_rate": 2.080599077270515e-06, "loss": 0.2144, "step": 24145, "teacher_loss": 0.19317930936813354 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.824671745300293, "learning_rate": 2.079445185324231e-06, "loss": 0.3085, "step": 24146, "teacher_loss": 0.25112730264663696 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.3611491918563843, "learning_rate": 2.0782915896075305e-06, "loss": 0.1831, "step": 24147, "teacher_loss": 0.16334109008312225 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.3308403491973877, "learning_rate": 2.077138290146867e-06, "loss": 0.1913, "step": 24148, "teacher_loss": 0.1758241355419159 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.40630969405174255, "learning_rate": 2.075985286968673e-06, "loss": 0.2998, "step": 24149, "teacher_loss": 0.2879410982131958 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.25508078932762146, "learning_rate": 2.0748325800993884e-06, "loss": 0.1606, "step": 24150, "teacher_loss": 0.150080144405365 }, { "compression_loss": 0.0, "epoch": 4.36, "label_loss": 0.3130847215652466, "learning_rate": 2.073680169565445e-06, "loss": 0.1843, "step": 24151, "teacher_loss": 0.16996704041957855 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.23449155688285828, "learning_rate": 2.0725280553932552e-06, "loss": 0.1913, "step": 24152, "teacher_loss": 0.1865461766719818 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.38948217034339905, "learning_rate": 2.071376237609241e-06, "loss": 0.1799, "step": 24153, "teacher_loss": 0.15664543211460114 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.27497944235801697, "learning_rate": 2.07022471623981e-06, "loss": 0.1542, "step": 24154, "teacher_loss": 0.14075277745723724 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.4171023964881897, "learning_rate": 2.0690734913113603e-06, "loss": 0.1628, "step": 24155, "teacher_loss": 0.1345631182193756 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.3434973359107971, "learning_rate": 2.067922562850283e-06, "loss": 0.2004, "step": 24156, "teacher_loss": 0.18453413248062134 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.3588903546333313, "learning_rate": 2.0667719308829686e-06, "loss": 0.2062, "step": 24157, "teacher_loss": 0.18921679258346558 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.45504844188690186, "learning_rate": 2.0656215954358025e-06, "loss": 0.1888, "step": 24158, "teacher_loss": 0.15918779373168945 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.26276034116744995, "learning_rate": 2.064471556535151e-06, "loss": 0.2209, "step": 24159, "teacher_loss": 0.21629098057746887 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 1.4562435150146484, "learning_rate": 2.0633218142073833e-06, "loss": 0.3214, "step": 24160, "teacher_loss": 0.19531689584255219 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.3991554379463196, "learning_rate": 2.062172368478863e-06, "loss": 0.2685, "step": 24161, "teacher_loss": 0.25393807888031006 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.44233760237693787, "learning_rate": 2.061023219375938e-06, "loss": 0.2474, "step": 24162, "teacher_loss": 0.22574535012245178 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.48257550597190857, "learning_rate": 2.0598743669249565e-06, "loss": 0.2549, "step": 24163, "teacher_loss": 0.2295970618724823 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.4222247004508972, "learning_rate": 2.0587258111522635e-06, "loss": 0.168, "step": 24164, "teacher_loss": 0.13978828489780426 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.684607982635498, "learning_rate": 2.0575775520841878e-06, "loss": 0.2716, "step": 24165, "teacher_loss": 0.22576190531253815 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.275020956993103, "learning_rate": 2.0564295897470487e-06, "loss": 0.1678, "step": 24166, "teacher_loss": 0.15589390695095062 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.33841192722320557, "learning_rate": 2.055281924167178e-06, "loss": 0.2097, "step": 24167, "teacher_loss": 0.1953493058681488 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.2886737287044525, "learning_rate": 2.054134555370884e-06, "loss": 0.1733, "step": 24168, "teacher_loss": 0.16047939658164978 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.48450154066085815, "learning_rate": 2.0529874833844662e-06, "loss": 0.1902, "step": 24169, "teacher_loss": 0.1575442999601364 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.18101289868354797, "learning_rate": 2.0518407082342294e-06, "loss": 0.148, "step": 24170, "teacher_loss": 0.14432629942893982 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.5600560307502747, "learning_rate": 2.0506942299464672e-06, "loss": 0.2153, "step": 24171, "teacher_loss": 0.17702136933803558 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.3536297082901001, "learning_rate": 2.04954804854746e-06, "loss": 0.2491, "step": 24172, "teacher_loss": 0.23747684061527252 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.25043439865112305, "learning_rate": 2.048402164063487e-06, "loss": 0.1877, "step": 24173, "teacher_loss": 0.18071895837783813 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.5213544368743896, "learning_rate": 2.0472565765208256e-06, "loss": 0.2366, "step": 24174, "teacher_loss": 0.20491428673267365 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.1874285638332367, "learning_rate": 2.046111285945733e-06, "loss": 0.1703, "step": 24175, "teacher_loss": 0.16837960481643677 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.20623981952667236, "learning_rate": 2.044966292364472e-06, "loss": 0.1715, "step": 24176, "teacher_loss": 0.16760051250457764 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.29263463616371155, "learning_rate": 2.043821595803296e-06, "loss": 0.1681, "step": 24177, "teacher_loss": 0.1542688012123108 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.4170669913291931, "learning_rate": 2.0426771962884437e-06, "loss": 0.1615, "step": 24178, "teacher_loss": 0.13315586745738983 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.2835770845413208, "learning_rate": 2.0415330938461503e-06, "loss": 0.1872, "step": 24179, "teacher_loss": 0.17653462290763855 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.32481706142425537, "learning_rate": 2.0403892885026587e-06, "loss": 0.2032, "step": 24180, "teacher_loss": 0.18967324495315552 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.622667133808136, "learning_rate": 2.039245780284185e-06, "loss": 0.2518, "step": 24181, "teacher_loss": 0.21059229969978333 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.2885042726993561, "learning_rate": 2.0381025692169437e-06, "loss": 0.1563, "step": 24182, "teacher_loss": 0.14164510369300842 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.3007691502571106, "learning_rate": 2.0369596553271496e-06, "loss": 0.1683, "step": 24183, "teacher_loss": 0.15352830290794373 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.6698163747787476, "learning_rate": 2.0358170386410072e-06, "loss": 0.2108, "step": 24184, "teacher_loss": 0.1597941815853119 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 1.22364342212677, "learning_rate": 2.034674719184709e-06, "loss": 0.3441, "step": 24185, "teacher_loss": 0.24633751809597015 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.1892719566822052, "learning_rate": 2.0335326969844476e-06, "loss": 0.1521, "step": 24186, "teacher_loss": 0.14792287349700928 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.7989034652709961, "learning_rate": 2.0323909720664096e-06, "loss": 0.305, "step": 24187, "teacher_loss": 0.25011682510375977 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.35490041971206665, "learning_rate": 2.0312495444567645e-06, "loss": 0.1823, "step": 24188, "teacher_loss": 0.16307701170444489 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.587756335735321, "learning_rate": 2.0301084141816836e-06, "loss": 0.1952, "step": 24189, "teacher_loss": 0.1515846699476242 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.39118218421936035, "learning_rate": 2.0289675812673363e-06, "loss": 0.1856, "step": 24190, "teacher_loss": 0.1627466380596161 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.5157334208488464, "learning_rate": 2.027827045739868e-06, "loss": 0.1939, "step": 24191, "teacher_loss": 0.1581641435623169 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.25750893354415894, "learning_rate": 2.026686807625437e-06, "loss": 0.1712, "step": 24192, "teacher_loss": 0.16165369749069214 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.4656542241573334, "learning_rate": 2.0255468669501777e-06, "loss": 0.1809, "step": 24193, "teacher_loss": 0.14931261539459229 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.36860764026641846, "learning_rate": 2.0244072237402307e-06, "loss": 0.1864, "step": 24194, "teacher_loss": 0.16619321703910828 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.5511794090270996, "learning_rate": 2.023267878021725e-06, "loss": 0.2364, "step": 24195, "teacher_loss": 0.20137529075145721 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.47485533356666565, "learning_rate": 2.0221288298207787e-06, "loss": 0.216, "step": 24196, "teacher_loss": 0.1872279942035675 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.7470526695251465, "learning_rate": 2.0209900791635116e-06, "loss": 0.1955, "step": 24197, "teacher_loss": 0.1342616230249405 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.4112088084220886, "learning_rate": 2.0198516260760254e-06, "loss": 0.2587, "step": 24198, "teacher_loss": 0.24180760979652405 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.312944620847702, "learning_rate": 2.0187134705844233e-06, "loss": 0.1616, "step": 24199, "teacher_loss": 0.1447295844554901 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.4228675961494446, "learning_rate": 2.017575612714807e-06, "loss": 0.3033, "step": 24200, "teacher_loss": 0.2900681495666504 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.28748029470443726, "learning_rate": 2.0164380524932534e-06, "loss": 0.1755, "step": 24201, "teacher_loss": 0.1630670726299286 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.24348270893096924, "learning_rate": 2.01530078994585e-06, "loss": 0.1714, "step": 24202, "teacher_loss": 0.1634211540222168 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.3653257489204407, "learning_rate": 2.0141638250986714e-06, "loss": 0.2104, "step": 24203, "teacher_loss": 0.19316327571868896 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.32686927914619446, "learning_rate": 2.0130271579777816e-06, "loss": 0.2495, "step": 24204, "teacher_loss": 0.24090583622455597 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.7371385097503662, "learning_rate": 2.0118907886092443e-06, "loss": 0.288, "step": 24205, "teacher_loss": 0.23814445734024048 }, { "compression_loss": 0.0, "epoch": 4.37, "label_loss": 0.3627144396305084, "learning_rate": 2.010754717019107e-06, "loss": 0.2264, "step": 24206, "teacher_loss": 0.2112187147140503 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.33188870549201965, "learning_rate": 2.0096189432334194e-06, "loss": 0.1833, "step": 24207, "teacher_loss": 0.1668439656496048 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.24071869254112244, "learning_rate": 2.0084834672782272e-06, "loss": 0.1629, "step": 24208, "teacher_loss": 0.15422964096069336 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.23737987875938416, "learning_rate": 2.007348289179554e-06, "loss": 0.2118, "step": 24209, "teacher_loss": 0.20901280641555786 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.612793505191803, "learning_rate": 2.0062134089634317e-06, "loss": 0.3046, "step": 24210, "teacher_loss": 0.2703706622123718 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.6180058717727661, "learning_rate": 2.005078826655882e-06, "loss": 0.2836, "step": 24211, "teacher_loss": 0.24641090631484985 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.42693600058555603, "learning_rate": 2.003944542282909e-06, "loss": 0.2215, "step": 24212, "teacher_loss": 0.19866369664669037 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.351433664560318, "learning_rate": 2.002810555870526e-06, "loss": 0.1612, "step": 24213, "teacher_loss": 0.140067458152771 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.3575143814086914, "learning_rate": 2.001676867444731e-06, "loss": 0.1898, "step": 24214, "teacher_loss": 0.17114710807800293 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.36525657773017883, "learning_rate": 2.0005434770315145e-06, "loss": 0.2554, "step": 24215, "teacher_loss": 0.24320705235004425 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.21546344459056854, "learning_rate": 1.9994103846568553e-06, "loss": 0.1538, "step": 24216, "teacher_loss": 0.14692160487174988 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.1961396187543869, "learning_rate": 1.9982775903467438e-06, "loss": 0.158, "step": 24217, "teacher_loss": 0.15379637479782104 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.7073092460632324, "learning_rate": 1.9971450941271462e-06, "loss": 0.5855, "step": 24218, "teacher_loss": 0.5719879865646362 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.5682158470153809, "learning_rate": 1.9960128960240255e-06, "loss": 0.3044, "step": 24219, "teacher_loss": 0.2750796675682068 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.6237989068031311, "learning_rate": 1.99488099606334e-06, "loss": 0.3087, "step": 24220, "teacher_loss": 0.2736767828464508 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.37909480929374695, "learning_rate": 1.993749394271045e-06, "loss": 0.229, "step": 24221, "teacher_loss": 0.21229872107505798 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.6887695789337158, "learning_rate": 1.992618090673079e-06, "loss": 0.2414, "step": 24222, "teacher_loss": 0.19167444109916687 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.20408578217029572, "learning_rate": 1.9914870852953815e-06, "loss": 0.1556, "step": 24223, "teacher_loss": 0.15017807483673096 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.3909336030483246, "learning_rate": 1.990356378163887e-06, "loss": 0.1596, "step": 24224, "teacher_loss": 0.133936807513237 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.3879234492778778, "learning_rate": 1.9892259693045156e-06, "loss": 0.1996, "step": 24225, "teacher_loss": 0.17866727709770203 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.9407852292060852, "learning_rate": 1.9880958587431775e-06, "loss": 0.3175, "step": 24226, "teacher_loss": 0.24827361106872559 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.7187612652778625, "learning_rate": 1.9869660465057965e-06, "loss": 0.2416, "step": 24227, "teacher_loss": 0.18862125277519226 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.6091758012771606, "learning_rate": 1.985836532618269e-06, "loss": 0.2576, "step": 24228, "teacher_loss": 0.218548983335495 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.20917585492134094, "learning_rate": 1.984707317106485e-06, "loss": 0.2206, "step": 24229, "teacher_loss": 0.22184666991233826 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.28677740693092346, "learning_rate": 1.9835783999963463e-06, "loss": 0.1687, "step": 24230, "teacher_loss": 0.15562953054904938 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.5958304405212402, "learning_rate": 1.9824497813137303e-06, "loss": 0.2335, "step": 24231, "teacher_loss": 0.19329044222831726 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.9408868551254272, "learning_rate": 1.9813214610845083e-06, "loss": 0.2803, "step": 24232, "teacher_loss": 0.20685502886772156 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.33608466386795044, "learning_rate": 1.980193439334554e-06, "loss": 0.2287, "step": 24233, "teacher_loss": 0.2168155312538147 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.40105342864990234, "learning_rate": 1.9790657160897317e-06, "loss": 0.1859, "step": 24234, "teacher_loss": 0.16195125877857208 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.5643248558044434, "learning_rate": 1.9779382913758898e-06, "loss": 0.2499, "step": 24235, "teacher_loss": 0.2149730622768402 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.6095297932624817, "learning_rate": 1.976811165218882e-06, "loss": 0.2231, "step": 24236, "teacher_loss": 0.180125892162323 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.33640748262405396, "learning_rate": 1.9756843376445512e-06, "loss": 0.259, "step": 24237, "teacher_loss": 0.25038206577301025 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.6414741277694702, "learning_rate": 1.974557808678726e-06, "loss": 0.2465, "step": 24238, "teacher_loss": 0.20258015394210815 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.46503180265426636, "learning_rate": 1.973431578347239e-06, "loss": 0.2662, "step": 24239, "teacher_loss": 0.24408169090747833 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.6486656665802002, "learning_rate": 1.9723056466759116e-06, "loss": 0.205, "step": 24240, "teacher_loss": 0.1556711494922638 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.22172841429710388, "learning_rate": 1.9711800136905585e-06, "loss": 0.1537, "step": 24241, "teacher_loss": 0.1461057811975479 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.27311083674430847, "learning_rate": 1.9700546794169804e-06, "loss": 0.2176, "step": 24242, "teacher_loss": 0.21142545342445374 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.42104649543762207, "learning_rate": 1.968929643880984e-06, "loss": 0.2236, "step": 24243, "teacher_loss": 0.20164865255355835 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.1840902864933014, "learning_rate": 1.967804907108365e-06, "loss": 0.2031, "step": 24244, "teacher_loss": 0.20526769757270813 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.24259570240974426, "learning_rate": 1.9666804691249043e-06, "loss": 0.149, "step": 24245, "teacher_loss": 0.13858619332313538 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.4887298047542572, "learning_rate": 1.965556329956383e-06, "loss": 0.2336, "step": 24246, "teacher_loss": 0.20521315932273865 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.5103280544281006, "learning_rate": 1.9644324896285787e-06, "loss": 0.2249, "step": 24247, "teacher_loss": 0.19322550296783447 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.4663625955581665, "learning_rate": 1.9633089481672528e-06, "loss": 0.2409, "step": 24248, "teacher_loss": 0.21582567691802979 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.10184285044670105, "learning_rate": 1.9621857055981664e-06, "loss": 0.1671, "step": 24249, "teacher_loss": 0.17435456812381744 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.5180944800376892, "learning_rate": 1.961062761947075e-06, "loss": 0.2385, "step": 24250, "teacher_loss": 0.20748373866081238 }, { "epoch": 4.38, "eval_exact_match": 80.40681173131505, "eval_f1": 87.699795173519, "step": 24250 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.6319336891174316, "learning_rate": 1.9599401172397186e-06, "loss": 0.1727, "step": 24251, "teacher_loss": 0.12171150743961334 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.37688523530960083, "learning_rate": 1.9588177715018395e-06, "loss": 0.168, "step": 24252, "teacher_loss": 0.1447594165802002 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.6969236135482788, "learning_rate": 1.9576957247591724e-06, "loss": 0.1678, "step": 24253, "teacher_loss": 0.10901831835508347 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.2916853725910187, "learning_rate": 1.9565739770374367e-06, "loss": 0.2035, "step": 24254, "teacher_loss": 0.19373945891857147 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.3300135135650635, "learning_rate": 1.955452528362357e-06, "loss": 0.1741, "step": 24255, "teacher_loss": 0.1567775011062622 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.4738526940345764, "learning_rate": 1.954331378759637e-06, "loss": 0.2582, "step": 24256, "teacher_loss": 0.23427650332450867 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.422522634267807, "learning_rate": 1.9532105282549856e-06, "loss": 0.2011, "step": 24257, "teacher_loss": 0.17644914984703064 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.43353402614593506, "learning_rate": 1.9520899768741045e-06, "loss": 0.2792, "step": 24258, "teacher_loss": 0.26208794116973877 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.1121024638414383, "learning_rate": 1.9509697246426773e-06, "loss": 0.23, "step": 24259, "teacher_loss": 0.24314171075820923 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.3976435363292694, "learning_rate": 1.9498497715863943e-06, "loss": 0.2561, "step": 24260, "teacher_loss": 0.24034327268600464 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.590404212474823, "learning_rate": 1.948730117730926e-06, "loss": 0.2189, "step": 24261, "teacher_loss": 0.17764912545681 }, { "compression_loss": 0.0, "epoch": 4.38, "label_loss": 0.5480066537857056, "learning_rate": 1.9476107631019476e-06, "loss": 0.2518, "step": 24262, "teacher_loss": 0.2189018577337265 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.24600154161453247, "learning_rate": 1.946491707725122e-06, "loss": 0.194, "step": 24263, "teacher_loss": 0.18817198276519775 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.4572528600692749, "learning_rate": 1.945372951626104e-06, "loss": 0.2135, "step": 24264, "teacher_loss": 0.1864059865474701 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.3452264070510864, "learning_rate": 1.9442544948305468e-06, "loss": 0.2125, "step": 24265, "teacher_loss": 0.19775322079658508 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.47714322805404663, "learning_rate": 1.943136337364089e-06, "loss": 0.2063, "step": 24266, "teacher_loss": 0.17618080973625183 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.34876763820648193, "learning_rate": 1.9420184792523675e-06, "loss": 0.1749, "step": 24267, "teacher_loss": 0.15558937191963196 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.460848867893219, "learning_rate": 1.940900920521015e-06, "loss": 0.1925, "step": 24268, "teacher_loss": 0.16267919540405273 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.2802533209323883, "learning_rate": 1.939783661195649e-06, "loss": 0.1656, "step": 24269, "teacher_loss": 0.15284088253974915 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.19455453753471375, "learning_rate": 1.9386667013018855e-06, "loss": 0.1705, "step": 24270, "teacher_loss": 0.16787326335906982 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.5968358516693115, "learning_rate": 1.9375500408653398e-06, "loss": 0.2561, "step": 24271, "teacher_loss": 0.2182273268699646 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.13981568813323975, "learning_rate": 1.936433679911604e-06, "loss": 0.1604, "step": 24272, "teacher_loss": 0.1627308577299118 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.7101109623908997, "learning_rate": 1.935317618466278e-06, "loss": 0.5776, "step": 24273, "teacher_loss": 0.5629128813743591 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.31860584020614624, "learning_rate": 1.9342018565549514e-06, "loss": 0.1414, "step": 24274, "teacher_loss": 0.12166139483451843 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.23628222942352295, "learning_rate": 1.9330863942032017e-06, "loss": 0.1594, "step": 24275, "teacher_loss": 0.15084734559059143 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.1805683970451355, "learning_rate": 1.931971231436602e-06, "loss": 0.1494, "step": 24276, "teacher_loss": 0.1459105908870697 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.256730854511261, "learning_rate": 1.9308563682807255e-06, "loss": 0.1899, "step": 24277, "teacher_loss": 0.18248644471168518 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.4917605221271515, "learning_rate": 1.9297418047611315e-06, "loss": 0.2464, "step": 24278, "teacher_loss": 0.21916818618774414 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.7212333083152771, "learning_rate": 1.9286275409033628e-06, "loss": 0.3114, "step": 24279, "teacher_loss": 0.26587826013565063 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.39056336879730225, "learning_rate": 1.9275135767329803e-06, "loss": 0.2073, "step": 24280, "teacher_loss": 0.18690626323223114 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.14089876413345337, "learning_rate": 1.9263999122755195e-06, "loss": 0.1672, "step": 24281, "teacher_loss": 0.17017263174057007 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.5980688333511353, "learning_rate": 1.925286547556509e-06, "loss": 0.236, "step": 24282, "teacher_loss": 0.19576843082904816 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.713057279586792, "learning_rate": 1.924173482601477e-06, "loss": 0.304, "step": 24283, "teacher_loss": 0.25855037569999695 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.14567795395851135, "learning_rate": 1.923060717435947e-06, "loss": 0.1405, "step": 24284, "teacher_loss": 0.1399250626564026 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.6200891733169556, "learning_rate": 1.921948252085425e-06, "loss": 0.2632, "step": 24285, "teacher_loss": 0.22357742488384247 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.2569945454597473, "learning_rate": 1.9208360865754197e-06, "loss": 0.284, "step": 24286, "teacher_loss": 0.2869962453842163 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.25493282079696655, "learning_rate": 1.9197242209314336e-06, "loss": 0.2105, "step": 24287, "teacher_loss": 0.20560970902442932 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 1.1352591514587402, "learning_rate": 1.918612655178954e-06, "loss": 0.3576, "step": 24288, "teacher_loss": 0.27114880084991455 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.3497392535209656, "learning_rate": 1.917501389343459e-06, "loss": 0.1734, "step": 24289, "teacher_loss": 0.15375816822052002 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.4669134020805359, "learning_rate": 1.91639042345044e-06, "loss": 0.2497, "step": 24290, "teacher_loss": 0.2255280613899231 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.3916887044906616, "learning_rate": 1.9152797575253627e-06, "loss": 0.2167, "step": 24291, "teacher_loss": 0.19721734523773193 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.24773097038269043, "learning_rate": 1.9141693915936874e-06, "loss": 0.1716, "step": 24292, "teacher_loss": 0.16312453150749207 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.4091212749481201, "learning_rate": 1.9130593256808736e-06, "loss": 0.1854, "step": 24293, "teacher_loss": 0.16050340235233307 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.6714450716972351, "learning_rate": 1.9119495598123776e-06, "loss": 0.2372, "step": 24294, "teacher_loss": 0.18894308805465698 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.49575275182724, "learning_rate": 1.910840094013634e-06, "loss": 0.2896, "step": 24295, "teacher_loss": 0.26666566729545593 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.36540377140045166, "learning_rate": 1.9097309283100823e-06, "loss": 0.1822, "step": 24296, "teacher_loss": 0.16179600358009338 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.5823034048080444, "learning_rate": 1.9086220627271587e-06, "loss": 0.229, "step": 24297, "teacher_loss": 0.1897973120212555 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.7699387073516846, "learning_rate": 1.9075134972902787e-06, "loss": 0.2583, "step": 24298, "teacher_loss": 0.2014748901128769 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.19820314645767212, "learning_rate": 1.9064052320248594e-06, "loss": 0.172, "step": 24299, "teacher_loss": 0.16913267970085144 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.25550639629364014, "learning_rate": 1.9052972669563157e-06, "loss": 0.1602, "step": 24300, "teacher_loss": 0.1496301293373108 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.3392167091369629, "learning_rate": 1.904189602110044e-06, "loss": 0.2039, "step": 24301, "teacher_loss": 0.18881529569625854 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.3752850294113159, "learning_rate": 1.9030822375114375e-06, "loss": 0.2095, "step": 24302, "teacher_loss": 0.19105173647403717 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.15167555212974548, "learning_rate": 1.9019751731858926e-06, "loss": 0.1629, "step": 24303, "teacher_loss": 0.16415277123451233 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.1904190182685852, "learning_rate": 1.9008684091587886e-06, "loss": 0.1683, "step": 24304, "teacher_loss": 0.16580118238925934 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.565811812877655, "learning_rate": 1.8997619454554955e-06, "loss": 0.2519, "step": 24305, "teacher_loss": 0.2170458436012268 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.3874053955078125, "learning_rate": 1.8986557821013844e-06, "loss": 0.174, "step": 24306, "teacher_loss": 0.15026253461837769 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.5449634194374084, "learning_rate": 1.8975499191218188e-06, "loss": 0.3114, "step": 24307, "teacher_loss": 0.2854386270046234 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.6796234250068665, "learning_rate": 1.896444356542148e-06, "loss": 0.1904, "step": 24308, "teacher_loss": 0.13602030277252197 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.35378479957580566, "learning_rate": 1.895339094387722e-06, "loss": 0.221, "step": 24309, "teacher_loss": 0.20626473426818848 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.19117116928100586, "learning_rate": 1.8942341326838835e-06, "loss": 0.1707, "step": 24310, "teacher_loss": 0.16844549775123596 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.35521265864372253, "learning_rate": 1.8931294714559593e-06, "loss": 0.1479, "step": 24311, "teacher_loss": 0.12484799325466156 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.5196978449821472, "learning_rate": 1.8920251107292808e-06, "loss": 0.2542, "step": 24312, "teacher_loss": 0.22468310594558716 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.7515823841094971, "learning_rate": 1.8909210505291691e-06, "loss": 0.2975, "step": 24313, "teacher_loss": 0.24701833724975586 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.21503028273582458, "learning_rate": 1.8898172908809324e-06, "loss": 0.1975, "step": 24314, "teacher_loss": 0.1955067366361618 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.30153951048851013, "learning_rate": 1.8887138318098807e-06, "loss": 0.1994, "step": 24315, "teacher_loss": 0.1880682408809662 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.3739209473133087, "learning_rate": 1.8876106733413083e-06, "loss": 0.2047, "step": 24316, "teacher_loss": 0.18591301143169403 }, { "compression_loss": 0.0, "epoch": 4.39, "label_loss": 0.5987281799316406, "learning_rate": 1.8865078155005106e-06, "loss": 0.2801, "step": 24317, "teacher_loss": 0.2447076439857483 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.15823659300804138, "learning_rate": 1.8854052583127769e-06, "loss": 0.1978, "step": 24318, "teacher_loss": 0.20219901204109192 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.39573559165000916, "learning_rate": 1.884303001803377e-06, "loss": 0.2103, "step": 24319, "teacher_loss": 0.18969517946243286 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.3875979781150818, "learning_rate": 1.8832010459975858e-06, "loss": 0.1989, "step": 24320, "teacher_loss": 0.1778961718082428 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.4808189868927002, "learning_rate": 1.8820993909206713e-06, "loss": 0.1874, "step": 24321, "teacher_loss": 0.1548454463481903 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.5599536895751953, "learning_rate": 1.8809980365978851e-06, "loss": 0.2237, "step": 24322, "teacher_loss": 0.1863691806793213 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.41733264923095703, "learning_rate": 1.8798969830544853e-06, "loss": 0.2391, "step": 24323, "teacher_loss": 0.219307541847229 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.423758327960968, "learning_rate": 1.878796230315707e-06, "loss": 0.2184, "step": 24324, "teacher_loss": 0.19557365775108337 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.1972811073064804, "learning_rate": 1.8776957784067928e-06, "loss": 0.2436, "step": 24325, "teacher_loss": 0.24880138039588928 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.24348028004169464, "learning_rate": 1.8765956273529744e-06, "loss": 0.2145, "step": 24326, "teacher_loss": 0.21124625205993652 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.3601433336734772, "learning_rate": 1.87549577717947e-06, "loss": 0.1832, "step": 24327, "teacher_loss": 0.1635204702615738 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.6053407192230225, "learning_rate": 1.8743962279115012e-06, "loss": 0.2477, "step": 24328, "teacher_loss": 0.2080015391111374 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.4397616684436798, "learning_rate": 1.8732969795742694e-06, "loss": 0.2447, "step": 24329, "teacher_loss": 0.22301416099071503 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.33742594718933105, "learning_rate": 1.8721980321929843e-06, "loss": 0.1857, "step": 24330, "teacher_loss": 0.16884958744049072 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.25857770442962646, "learning_rate": 1.871099385792841e-06, "loss": 0.2033, "step": 24331, "teacher_loss": 0.1971796452999115 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 1.1512397527694702, "learning_rate": 1.8700010403990225e-06, "loss": 0.2813, "step": 24332, "teacher_loss": 0.18466314673423767 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.9605982899665833, "learning_rate": 1.8689029960367154e-06, "loss": 0.2522, "step": 24333, "teacher_loss": 0.17345799505710602 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.24304628372192383, "learning_rate": 1.8678052527310963e-06, "loss": 0.1672, "step": 24334, "teacher_loss": 0.15879777073860168 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.14222028851509094, "learning_rate": 1.8667078105073283e-06, "loss": 0.1743, "step": 24335, "teacher_loss": 0.17785710096359253 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.5630488991737366, "learning_rate": 1.8656106693905729e-06, "loss": 0.2517, "step": 24336, "teacher_loss": 0.2171015441417694 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.6622228622436523, "learning_rate": 1.8645138294059899e-06, "loss": 0.2391, "step": 24337, "teacher_loss": 0.19205957651138306 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.24015012383460999, "learning_rate": 1.8634172905787228e-06, "loss": 0.1882, "step": 24338, "teacher_loss": 0.18239492177963257 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.3334270119667053, "learning_rate": 1.8623210529339047e-06, "loss": 0.1637, "step": 24339, "teacher_loss": 0.1448170393705368 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.6739846467971802, "learning_rate": 1.861225116496682e-06, "loss": 0.2778, "step": 24340, "teacher_loss": 0.23375552892684937 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.6571506261825562, "learning_rate": 1.8601294812921765e-06, "loss": 0.2483, "step": 24341, "teacher_loss": 0.2028503119945526 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.9343421459197998, "learning_rate": 1.859034147345503e-06, "loss": 0.4066, "step": 24342, "teacher_loss": 0.34793543815612793 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.3712049722671509, "learning_rate": 1.8579391146817781e-06, "loss": 0.2406, "step": 24343, "teacher_loss": 0.22604236006736755 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.3520374000072479, "learning_rate": 1.8568443833261101e-06, "loss": 0.2294, "step": 24344, "teacher_loss": 0.21577146649360657 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.33999398350715637, "learning_rate": 1.8557499533035922e-06, "loss": 0.206, "step": 24345, "teacher_loss": 0.19111041724681854 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.4820348918437958, "learning_rate": 1.8546558246393191e-06, "loss": 0.2729, "step": 24346, "teacher_loss": 0.24960818886756897 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.45540502667427063, "learning_rate": 1.8535619973583795e-06, "loss": 0.239, "step": 24347, "teacher_loss": 0.21490515768527985 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.3991914689540863, "learning_rate": 1.852468471485848e-06, "loss": 0.2358, "step": 24348, "teacher_loss": 0.2175983190536499 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.26334309577941895, "learning_rate": 1.8513752470467897e-06, "loss": 0.172, "step": 24349, "teacher_loss": 0.16186802089214325 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.6084012985229492, "learning_rate": 1.850282324066283e-06, "loss": 0.211, "step": 24350, "teacher_loss": 0.1668529361486435 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.37073570489883423, "learning_rate": 1.8491897025693761e-06, "loss": 0.1716, "step": 24351, "teacher_loss": 0.14951342344284058 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.44495195150375366, "learning_rate": 1.8480973825811154e-06, "loss": 0.1916, "step": 24352, "teacher_loss": 0.16349640488624573 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.6999865174293518, "learning_rate": 1.847005364126556e-06, "loss": 0.2546, "step": 24353, "teacher_loss": 0.2051447182893753 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 1.2140017747879028, "learning_rate": 1.8459136472307297e-06, "loss": 0.3739, "step": 24354, "teacher_loss": 0.28061002492904663 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.19357341527938843, "learning_rate": 1.844822231918663e-06, "loss": 0.1765, "step": 24355, "teacher_loss": 0.17463155090808868 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.34363871812820435, "learning_rate": 1.8437311182153793e-06, "loss": 0.1966, "step": 24356, "teacher_loss": 0.1802481859922409 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.8654544353485107, "learning_rate": 1.8426403061459019e-06, "loss": 0.2611, "step": 24357, "teacher_loss": 0.1939910352230072 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.36112669110298157, "learning_rate": 1.841549795735229e-06, "loss": 0.1653, "step": 24358, "teacher_loss": 0.1434864103794098 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.507938027381897, "learning_rate": 1.8404595870083706e-06, "loss": 0.2654, "step": 24359, "teacher_loss": 0.23849962651729584 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.3904286026954651, "learning_rate": 1.8393696799903204e-06, "loss": 0.2257, "step": 24360, "teacher_loss": 0.20734341442584991 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.9596391916275024, "learning_rate": 1.8382800747060646e-06, "loss": 0.3869, "step": 24361, "teacher_loss": 0.3232209384441376 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.485186904668808, "learning_rate": 1.8371907711805851e-06, "loss": 0.2027, "step": 24362, "teacher_loss": 0.17126059532165527 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.40507572889328003, "learning_rate": 1.8361017694388588e-06, "loss": 0.24, "step": 24363, "teacher_loss": 0.22166912257671356 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.18776948750019073, "learning_rate": 1.8350130695058537e-06, "loss": 0.1498, "step": 24364, "teacher_loss": 0.14555975794792175 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.3984917104244232, "learning_rate": 1.8339246714065232e-06, "loss": 0.1508, "step": 24365, "teacher_loss": 0.12329752743244171 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.43079084157943726, "learning_rate": 1.8328365751658277e-06, "loss": 0.199, "step": 24366, "teacher_loss": 0.17329490184783936 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.4045030474662781, "learning_rate": 1.8317487808087152e-06, "loss": 0.2245, "step": 24367, "teacher_loss": 0.2044980525970459 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.46897411346435547, "learning_rate": 1.8306612883601193e-06, "loss": 0.1912, "step": 24368, "teacher_loss": 0.16037797927856445 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.6358522176742554, "learning_rate": 1.8295740978449748e-06, "loss": 0.2446, "step": 24369, "teacher_loss": 0.20110619068145752 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.26492926478385925, "learning_rate": 1.8284872092882138e-06, "loss": 0.1618, "step": 24370, "teacher_loss": 0.15029895305633545 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.8276483416557312, "learning_rate": 1.8274006227147478e-06, "loss": 0.2634, "step": 24371, "teacher_loss": 0.20074006915092468 }, { "compression_loss": 0.0, "epoch": 4.4, "label_loss": 0.6769616007804871, "learning_rate": 1.8263143381494917e-06, "loss": 0.2713, "step": 24372, "teacher_loss": 0.22626271843910217 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.3706105351448059, "learning_rate": 1.8252283556173544e-06, "loss": 0.1698, "step": 24373, "teacher_loss": 0.147472083568573 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.5602754950523376, "learning_rate": 1.8241426751432273e-06, "loss": 0.2023, "step": 24374, "teacher_loss": 0.162471741437912 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.3811646103858948, "learning_rate": 1.8230572967520071e-06, "loss": 0.2002, "step": 24375, "teacher_loss": 0.18010881543159485 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.3324650824069977, "learning_rate": 1.8219722204685775e-06, "loss": 0.2054, "step": 24376, "teacher_loss": 0.19125190377235413 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.265918105840683, "learning_rate": 1.8208874463178133e-06, "loss": 0.2949, "step": 24377, "teacher_loss": 0.29807722568511963 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.42196860909461975, "learning_rate": 1.8198029743245897e-06, "loss": 0.1835, "step": 24378, "teacher_loss": 0.15700052678585052 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.7597277760505676, "learning_rate": 1.8187188045137637e-06, "loss": 0.1991, "step": 24379, "teacher_loss": 0.1368383765220642 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.4966760277748108, "learning_rate": 1.817634936910197e-06, "loss": 0.2321, "step": 24380, "teacher_loss": 0.20269182324409485 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.6978597640991211, "learning_rate": 1.8165513715387411e-06, "loss": 0.247, "step": 24381, "teacher_loss": 0.1969597041606903 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.529492974281311, "learning_rate": 1.815468108424233e-06, "loss": 0.2378, "step": 24382, "teacher_loss": 0.20543350279331207 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.6435953378677368, "learning_rate": 1.814385147591513e-06, "loss": 0.2446, "step": 24383, "teacher_loss": 0.20031176507472992 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.4451681673526764, "learning_rate": 1.813302489065411e-06, "loss": 0.2366, "step": 24384, "teacher_loss": 0.21341761946678162 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.284879207611084, "learning_rate": 1.812220132870744e-06, "loss": 0.1767, "step": 24385, "teacher_loss": 0.16470956802368164 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.9364553093910217, "learning_rate": 1.8111380790323334e-06, "loss": 0.2554, "step": 24386, "teacher_loss": 0.17977027595043182 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.4164993464946747, "learning_rate": 1.8100563275749832e-06, "loss": 0.2547, "step": 24387, "teacher_loss": 0.2367614507675171 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.4270293712615967, "learning_rate": 1.8089748785234965e-06, "loss": 0.1882, "step": 24388, "teacher_loss": 0.16168951988220215 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.82603520154953, "learning_rate": 1.8078937319026655e-06, "loss": 0.2949, "step": 24389, "teacher_loss": 0.23584884405136108 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.32706597447395325, "learning_rate": 1.8068128877372785e-06, "loss": 0.1472, "step": 24390, "teacher_loss": 0.12715966999530792 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.16757752001285553, "learning_rate": 1.8057323460521208e-06, "loss": 0.1327, "step": 24391, "teacher_loss": 0.12887245416641235 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.4521830081939697, "learning_rate": 1.8046521068719573e-06, "loss": 0.2035, "step": 24392, "teacher_loss": 0.17590278387069702 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.439098596572876, "learning_rate": 1.8035721702215601e-06, "loss": 0.1877, "step": 24393, "teacher_loss": 0.1597745716571808 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.165091872215271, "learning_rate": 1.8024925361256911e-06, "loss": 0.1615, "step": 24394, "teacher_loss": 0.16106855869293213 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.19946181774139404, "learning_rate": 1.801413204609097e-06, "loss": 0.1837, "step": 24395, "teacher_loss": 0.1819675713777542 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.2500089108943939, "learning_rate": 1.8003341756965263e-06, "loss": 0.1627, "step": 24396, "teacher_loss": 0.15297794342041016 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.19036871194839478, "learning_rate": 1.7992554494127229e-06, "loss": 0.1838, "step": 24397, "teacher_loss": 0.18302863836288452 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.43877318501472473, "learning_rate": 1.7981770257824098e-06, "loss": 0.2703, "step": 24398, "teacher_loss": 0.25158458948135376 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.4876255989074707, "learning_rate": 1.797098904830316e-06, "loss": 0.286, "step": 24399, "teacher_loss": 0.2636134624481201 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.7392245531082153, "learning_rate": 1.7960210865811634e-06, "loss": 0.2335, "step": 24400, "teacher_loss": 0.17726773023605347 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.6270226836204529, "learning_rate": 1.79494357105966e-06, "loss": 0.2603, "step": 24401, "teacher_loss": 0.21956589818000793 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.17647115886211395, "learning_rate": 1.7938663582905035e-06, "loss": 0.1622, "step": 24402, "teacher_loss": 0.16065333783626556 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.1639460325241089, "learning_rate": 1.7927894482984036e-06, "loss": 0.1359, "step": 24403, "teacher_loss": 0.13276860117912292 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.5401804447174072, "learning_rate": 1.7917128411080442e-06, "loss": 0.2255, "step": 24404, "teacher_loss": 0.19049134850502014 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.2855954170227051, "learning_rate": 1.7906365367441068e-06, "loss": 0.2362, "step": 24405, "teacher_loss": 0.23068645596504211 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.484529048204422, "learning_rate": 1.7895605352312689e-06, "loss": 0.2447, "step": 24406, "teacher_loss": 0.21803578734397888 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.6127817630767822, "learning_rate": 1.7884848365942035e-06, "loss": 0.2157, "step": 24407, "teacher_loss": 0.17159083485603333 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.32247716188430786, "learning_rate": 1.787409440857568e-06, "loss": 0.1888, "step": 24408, "teacher_loss": 0.17389345169067383 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.43364787101745605, "learning_rate": 1.7863343480460208e-06, "loss": 0.2254, "step": 24409, "teacher_loss": 0.2022097259759903 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.4437745213508606, "learning_rate": 1.7852595581842141e-06, "loss": 0.262, "step": 24410, "teacher_loss": 0.24182236194610596 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.19968535006046295, "learning_rate": 1.7841850712967844e-06, "loss": 0.1718, "step": 24411, "teacher_loss": 0.168709397315979 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.1655711680650711, "learning_rate": 1.7831108874083623e-06, "loss": 0.184, "step": 24412, "teacher_loss": 0.1860230416059494 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.24983613193035126, "learning_rate": 1.782037006543588e-06, "loss": 0.1909, "step": 24413, "teacher_loss": 0.18433141708374023 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.5995436906814575, "learning_rate": 1.7809634287270754e-06, "loss": 0.207, "step": 24414, "teacher_loss": 0.16339261829853058 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.29286009073257446, "learning_rate": 1.7798901539834362e-06, "loss": 0.16, "step": 24415, "teacher_loss": 0.14525073766708374 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.6715531349182129, "learning_rate": 1.7788171823372789e-06, "loss": 0.2091, "step": 24416, "teacher_loss": 0.15775421261787415 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 1.046951413154602, "learning_rate": 1.7777445138132075e-06, "loss": 0.2772, "step": 24417, "teacher_loss": 0.19165468215942383 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.32450950145721436, "learning_rate": 1.7766721484358089e-06, "loss": 0.1957, "step": 24418, "teacher_loss": 0.18141093850135803 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.7267633080482483, "learning_rate": 1.7756000862296735e-06, "loss": 0.3177, "step": 24419, "teacher_loss": 0.27227070927619934 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.7510654926300049, "learning_rate": 1.7745283272193814e-06, "loss": 0.3022, "step": 24420, "teacher_loss": 0.25235676765441895 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.6541892886161804, "learning_rate": 1.7734568714294997e-06, "loss": 0.2812, "step": 24421, "teacher_loss": 0.23978833854198456 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.2009134441614151, "learning_rate": 1.7723857188845972e-06, "loss": 0.16, "step": 24422, "teacher_loss": 0.15540926158428192 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.0928993970155716, "learning_rate": 1.771314869609234e-06, "loss": 0.1854, "step": 24423, "teacher_loss": 0.19564759731292725 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.2945198714733124, "learning_rate": 1.7702443236279576e-06, "loss": 0.1712, "step": 24424, "teacher_loss": 0.15746445953845978 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.1602174937725067, "learning_rate": 1.7691740809653128e-06, "loss": 0.1225, "step": 24425, "teacher_loss": 0.11825625598430634 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.603344202041626, "learning_rate": 1.7681041416458405e-06, "loss": 0.1997, "step": 24426, "teacher_loss": 0.15483233332633972 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.16134929656982422, "learning_rate": 1.7670345056940708e-06, "loss": 0.2615, "step": 24427, "teacher_loss": 0.2726028561592102 }, { "compression_loss": 0.0, "epoch": 4.41, "label_loss": 0.4265596568584442, "learning_rate": 1.7659651731345206e-06, "loss": 0.2343, "step": 24428, "teacher_loss": 0.21288880705833435 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.5848217010498047, "learning_rate": 1.7648961439917122e-06, "loss": 0.346, "step": 24429, "teacher_loss": 0.31950464844703674 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.6196691393852234, "learning_rate": 1.7638274182901576e-06, "loss": 0.2176, "step": 24430, "teacher_loss": 0.17288947105407715 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.3572612404823303, "learning_rate": 1.7627589960543522e-06, "loss": 0.2065, "step": 24431, "teacher_loss": 0.18978887796401978 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.22268274426460266, "learning_rate": 1.7616908773087965e-06, "loss": 0.2262, "step": 24432, "teacher_loss": 0.22663632035255432 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.43206608295440674, "learning_rate": 1.7606230620779822e-06, "loss": 0.2033, "step": 24433, "teacher_loss": 0.17787495255470276 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.4927389621734619, "learning_rate": 1.7595555503863836e-06, "loss": 0.3245, "step": 24434, "teacher_loss": 0.3058363199234009 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.29906463623046875, "learning_rate": 1.7584883422584791e-06, "loss": 0.1785, "step": 24435, "teacher_loss": 0.16505998373031616 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.48722803592681885, "learning_rate": 1.7574214377187393e-06, "loss": 0.2289, "step": 24436, "teacher_loss": 0.20017579197883606 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.3993653655052185, "learning_rate": 1.756354836791621e-06, "loss": 0.1948, "step": 24437, "teacher_loss": 0.17206673324108124 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.28124380111694336, "learning_rate": 1.7552885395015816e-06, "loss": 0.1564, "step": 24438, "teacher_loss": 0.1425042599439621 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.35560667514801025, "learning_rate": 1.7542225458730631e-06, "loss": 0.2529, "step": 24439, "teacher_loss": 0.2414434552192688 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.2461598962545395, "learning_rate": 1.7531568559305094e-06, "loss": 0.2827, "step": 24440, "teacher_loss": 0.28670698404312134 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.10769784450531006, "learning_rate": 1.7520914696983558e-06, "loss": 0.0994, "step": 24441, "teacher_loss": 0.09850557893514633 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.6627588272094727, "learning_rate": 1.7510263872010229e-06, "loss": 0.2882, "step": 24442, "teacher_loss": 0.2466033697128296 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.5780213475227356, "learning_rate": 1.7499616084629328e-06, "loss": 0.2319, "step": 24443, "teacher_loss": 0.19340112805366516 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.5630593299865723, "learning_rate": 1.748897133508499e-06, "loss": 0.2938, "step": 24444, "teacher_loss": 0.2639217972755432 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.20912209153175354, "learning_rate": 1.7478329623621226e-06, "loss": 0.1349, "step": 24445, "teacher_loss": 0.12668466567993164 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.12842725217342377, "learning_rate": 1.7467690950482052e-06, "loss": 0.1437, "step": 24446, "teacher_loss": 0.14537307620048523 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.6309764385223389, "learning_rate": 1.7457055315911391e-06, "loss": 0.2254, "step": 24447, "teacher_loss": 0.18037429451942444 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.41530001163482666, "learning_rate": 1.7446422720153032e-06, "loss": 0.2162, "step": 24448, "teacher_loss": 0.19403663277626038 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.2699463665485382, "learning_rate": 1.743579316345083e-06, "loss": 0.159, "step": 24449, "teacher_loss": 0.14670351147651672 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.3707202076911926, "learning_rate": 1.7425166646048407e-06, "loss": 0.1722, "step": 24450, "teacher_loss": 0.15016481280326843 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.3348059058189392, "learning_rate": 1.741454316818945e-06, "loss": 0.1985, "step": 24451, "teacher_loss": 0.18334174156188965 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.6357457041740417, "learning_rate": 1.74039227301175e-06, "loss": 0.2315, "step": 24452, "teacher_loss": 0.1866014152765274 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.8041625022888184, "learning_rate": 1.7393305332076043e-06, "loss": 0.2363, "step": 24453, "teacher_loss": 0.17319905757904053 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.2968295216560364, "learning_rate": 1.7382690974308551e-06, "loss": 0.2248, "step": 24454, "teacher_loss": 0.2168281376361847 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.28981465101242065, "learning_rate": 1.7372079657058316e-06, "loss": 0.2474, "step": 24455, "teacher_loss": 0.24271957576274872 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.45003271102905273, "learning_rate": 1.7361471380568654e-06, "loss": 0.2608, "step": 24456, "teacher_loss": 0.23978403210639954 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.6797051429748535, "learning_rate": 1.7350866145082827e-06, "loss": 0.2075, "step": 24457, "teacher_loss": 0.1550377607345581 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.3361632227897644, "learning_rate": 1.7340263950843888e-06, "loss": 0.1666, "step": 24458, "teacher_loss": 0.14774832129478455 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.2442660927772522, "learning_rate": 1.7329664798094973e-06, "loss": 0.2122, "step": 24459, "teacher_loss": 0.20859801769256592 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.23953089118003845, "learning_rate": 1.731906868707911e-06, "loss": 0.1732, "step": 24460, "teacher_loss": 0.16579227149486542 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.21019737422466278, "learning_rate": 1.7308475618039199e-06, "loss": 0.1647, "step": 24461, "teacher_loss": 0.15966854989528656 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.4295654594898224, "learning_rate": 1.7297885591218049e-06, "loss": 0.4592, "step": 24462, "teacher_loss": 0.46250009536743164 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.42035824060440063, "learning_rate": 1.7287298606858582e-06, "loss": 0.2162, "step": 24463, "teacher_loss": 0.19350774586200714 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.36720970273017883, "learning_rate": 1.727671466520347e-06, "loss": 0.1841, "step": 24464, "teacher_loss": 0.1637439876794815 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.38488417863845825, "learning_rate": 1.7266133766495317e-06, "loss": 0.1771, "step": 24465, "teacher_loss": 0.15397042036056519 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.5386950373649597, "learning_rate": 1.7255555910976784e-06, "loss": 0.2476, "step": 24466, "teacher_loss": 0.21529436111450195 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.3712984025478363, "learning_rate": 1.724498109889039e-06, "loss": 0.3113, "step": 24467, "teacher_loss": 0.3046547770500183 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.08986647427082062, "learning_rate": 1.7234409330478523e-06, "loss": 0.18, "step": 24468, "teacher_loss": 0.19004280865192413 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.49769020080566406, "learning_rate": 1.7223840605983593e-06, "loss": 0.2349, "step": 24469, "teacher_loss": 0.20565104484558105 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.34266024827957153, "learning_rate": 1.7213274925647954e-06, "loss": 0.2121, "step": 24470, "teacher_loss": 0.19759118556976318 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.6303396224975586, "learning_rate": 1.7202712289713813e-06, "loss": 0.2157, "step": 24471, "teacher_loss": 0.16958779096603394 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.3793298006057739, "learning_rate": 1.7192152698423258e-06, "loss": 0.3377, "step": 24472, "teacher_loss": 0.3330966532230377 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.4321761727333069, "learning_rate": 1.718159615201853e-06, "loss": 0.256, "step": 24473, "teacher_loss": 0.23644474148750305 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.6198815107345581, "learning_rate": 1.7171042650741585e-06, "loss": 0.1738, "step": 24474, "teacher_loss": 0.12429051101207733 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.5762876272201538, "learning_rate": 1.7160492194834331e-06, "loss": 0.1981, "step": 24475, "teacher_loss": 0.1560766100883484 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.22794653475284576, "learning_rate": 1.714994478453879e-06, "loss": 0.1751, "step": 24476, "teacher_loss": 0.16918307542800903 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.3369901478290558, "learning_rate": 1.7139400420096702e-06, "loss": 0.2215, "step": 24477, "teacher_loss": 0.20867237448692322 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.3909589946269989, "learning_rate": 1.7128859101749789e-06, "loss": 0.1945, "step": 24478, "teacher_loss": 0.1727023720741272 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.31496286392211914, "learning_rate": 1.7118320829739775e-06, "loss": 0.1848, "step": 24479, "teacher_loss": 0.17036299407482147 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.5217673778533936, "learning_rate": 1.7107785604308301e-06, "loss": 0.1998, "step": 24480, "teacher_loss": 0.16398631036281586 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.7781469225883484, "learning_rate": 1.7097253425696823e-06, "loss": 0.2862, "step": 24481, "teacher_loss": 0.2315066158771515 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.4367533028125763, "learning_rate": 1.7086724294146884e-06, "loss": 0.235, "step": 24482, "teacher_loss": 0.2125321328639984 }, { "compression_loss": 0.0, "epoch": 4.42, "label_loss": 0.5458082556724548, "learning_rate": 1.7076198209899885e-06, "loss": 0.354, "step": 24483, "teacher_loss": 0.3326645493507385 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.26402565836906433, "learning_rate": 1.7065675173197087e-06, "loss": 0.1423, "step": 24484, "teacher_loss": 0.1287655234336853 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.8025052547454834, "learning_rate": 1.7055155184279814e-06, "loss": 0.4451, "step": 24485, "teacher_loss": 0.40533286333084106 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.5758934020996094, "learning_rate": 1.7044638243389271e-06, "loss": 0.2573, "step": 24486, "teacher_loss": 0.22188061475753784 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.21546858549118042, "learning_rate": 1.7034124350766533e-06, "loss": 0.1579, "step": 24487, "teacher_loss": 0.1515224277973175 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.16158457100391388, "learning_rate": 1.7023613506652692e-06, "loss": 0.2592, "step": 24488, "teacher_loss": 0.2700707018375397 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.37913060188293457, "learning_rate": 1.7013105711288668e-06, "loss": 0.1897, "step": 24489, "teacher_loss": 0.16861286759376526 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.44704902172088623, "learning_rate": 1.7002600964915471e-06, "loss": 0.2827, "step": 24490, "teacher_loss": 0.2643897235393524 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.6619666814804077, "learning_rate": 1.6992099267773842e-06, "loss": 0.2676, "step": 24491, "teacher_loss": 0.22376984357833862 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.31577426195144653, "learning_rate": 1.6981600620104586e-06, "loss": 0.1879, "step": 24492, "teacher_loss": 0.1736985743045807 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.7790653705596924, "learning_rate": 1.6971105022148465e-06, "loss": 0.2735, "step": 24493, "teacher_loss": 0.21737366914749146 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.2521287500858307, "learning_rate": 1.696061247414603e-06, "loss": 0.1909, "step": 24494, "teacher_loss": 0.1840868443250656 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.6298946142196655, "learning_rate": 1.6950122976337879e-06, "loss": 0.2244, "step": 24495, "teacher_loss": 0.17929315567016602 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.40682095289230347, "learning_rate": 1.693963652896453e-06, "loss": 0.2116, "step": 24496, "teacher_loss": 0.1899423599243164 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.4832611382007599, "learning_rate": 1.6929153132266363e-06, "loss": 0.1945, "step": 24497, "teacher_loss": 0.1624184548854828 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.5534741878509521, "learning_rate": 1.691867278648373e-06, "loss": 0.2569, "step": 24498, "teacher_loss": 0.2239881455898285 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.1723393201828003, "learning_rate": 1.6908195491856976e-06, "loss": 0.1922, "step": 24499, "teacher_loss": 0.1944473683834076 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.3675430119037628, "learning_rate": 1.6897721248626224e-06, "loss": 0.3125, "step": 24500, "teacher_loss": 0.30634430050849915 }, { "epoch": 4.43, "eval_exact_match": 80.42573320719016, "eval_f1": 87.68799005551737, "step": 24500 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.45333170890808105, "learning_rate": 1.6887250057031701e-06, "loss": 0.2882, "step": 24501, "teacher_loss": 0.26980215311050415 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.728473424911499, "learning_rate": 1.687678191731341e-06, "loss": 0.2882, "step": 24502, "teacher_loss": 0.2393219918012619 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.3277839422225952, "learning_rate": 1.6866316829711381e-06, "loss": 0.1931, "step": 24503, "teacher_loss": 0.17818287014961243 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.38891106843948364, "learning_rate": 1.6855854794465602e-06, "loss": 0.2188, "step": 24504, "teacher_loss": 0.19989748299121857 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.5908646583557129, "learning_rate": 1.6845395811815834e-06, "loss": 0.2213, "step": 24505, "teacher_loss": 0.18020027875900269 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.6886515617370605, "learning_rate": 1.6834939882001931e-06, "loss": 0.2807, "step": 24506, "teacher_loss": 0.23535841703414917 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.16791735589504242, "learning_rate": 1.6824487005263639e-06, "loss": 0.1744, "step": 24507, "teacher_loss": 0.17516668140888214 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.34179258346557617, "learning_rate": 1.6814037181840546e-06, "loss": 0.1485, "step": 24508, "teacher_loss": 0.12703868746757507 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.41665416955947876, "learning_rate": 1.6803590411972263e-06, "loss": 0.1735, "step": 24509, "teacher_loss": 0.14642837643623352 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.6931857466697693, "learning_rate": 1.6793146695898332e-06, "loss": 0.2836, "step": 24510, "teacher_loss": 0.2381274700164795 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.7405045032501221, "learning_rate": 1.6782706033858191e-06, "loss": 0.2111, "step": 24511, "teacher_loss": 0.15233029425144196 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.6076223850250244, "learning_rate": 1.6772268426091153e-06, "loss": 0.2549, "step": 24512, "teacher_loss": 0.21572019159793854 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.44470375776290894, "learning_rate": 1.6761833872836575e-06, "loss": 0.291, "step": 24513, "teacher_loss": 0.2739550769329071 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.36093905568122864, "learning_rate": 1.6751402374333696e-06, "loss": 0.195, "step": 24514, "teacher_loss": 0.17654427886009216 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.21240416169166565, "learning_rate": 1.674097393082163e-06, "loss": 0.1731, "step": 24515, "teacher_loss": 0.16877998411655426 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.2884906530380249, "learning_rate": 1.6730548542539498e-06, "loss": 0.2116, "step": 24516, "teacher_loss": 0.2030259370803833 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.5228714942932129, "learning_rate": 1.6720126209726362e-06, "loss": 0.2415, "step": 24517, "teacher_loss": 0.2101929634809494 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.658111572265625, "learning_rate": 1.670970693262111e-06, "loss": 0.1985, "step": 24518, "teacher_loss": 0.14747270941734314 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.44723668694496155, "learning_rate": 1.6699290711462656e-06, "loss": 0.2366, "step": 24519, "teacher_loss": 0.21324560046195984 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.3051148056983948, "learning_rate": 1.6688877546489823e-06, "loss": 0.177, "step": 24520, "teacher_loss": 0.16281157732009888 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.3116874694824219, "learning_rate": 1.667846743794132e-06, "loss": 0.1577, "step": 24521, "teacher_loss": 0.1405673623085022 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.37893199920654297, "learning_rate": 1.6668060386055855e-06, "loss": 0.2268, "step": 24522, "teacher_loss": 0.2099481225013733 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.2683839499950409, "learning_rate": 1.6657656391072024e-06, "loss": 0.1941, "step": 24523, "teacher_loss": 0.18583469092845917 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.32590773701667786, "learning_rate": 1.6647255453228366e-06, "loss": 0.2221, "step": 24524, "teacher_loss": 0.21059784293174744 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.3735974133014679, "learning_rate": 1.6636857572763258e-06, "loss": 0.221, "step": 24525, "teacher_loss": 0.20400184392929077 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.5440957546234131, "learning_rate": 1.6626462749915227e-06, "loss": 0.2715, "step": 24526, "teacher_loss": 0.24119020998477936 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.4373500645160675, "learning_rate": 1.6616070984922515e-06, "loss": 0.2301, "step": 24527, "teacher_loss": 0.2070498764514923 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.2646661400794983, "learning_rate": 1.6605682278023383e-06, "loss": 0.1948, "step": 24528, "teacher_loss": 0.18702928721904755 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.5522036552429199, "learning_rate": 1.6595296629456002e-06, "loss": 0.2706, "step": 24529, "teacher_loss": 0.23935039341449738 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.37035250663757324, "learning_rate": 1.6584914039458537e-06, "loss": 0.1956, "step": 24530, "teacher_loss": 0.17616891860961914 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.2043423056602478, "learning_rate": 1.6574534508268978e-06, "loss": 0.2439, "step": 24531, "teacher_loss": 0.24826355278491974 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.8247231245040894, "learning_rate": 1.6564158036125287e-06, "loss": 0.3939, "step": 24532, "teacher_loss": 0.3459968864917755 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.16633634269237518, "learning_rate": 1.655378462326544e-06, "loss": 0.2034, "step": 24533, "teacher_loss": 0.20750859379768372 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.44069015979766846, "learning_rate": 1.654341426992721e-06, "loss": 0.2179, "step": 24534, "teacher_loss": 0.19311542809009552 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.4300878047943115, "learning_rate": 1.653304697634831e-06, "loss": 0.1783, "step": 24535, "teacher_loss": 0.1502757966518402 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.4605398178100586, "learning_rate": 1.6522682742766549e-06, "loss": 0.2422, "step": 24536, "teacher_loss": 0.21799196302890778 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.24333976209163666, "learning_rate": 1.6512321569419486e-06, "loss": 0.1571, "step": 24537, "teacher_loss": 0.14752452075481415 }, { "compression_loss": 0.0, "epoch": 4.43, "label_loss": 0.3269619941711426, "learning_rate": 1.650196345654465e-06, "loss": 0.191, "step": 24538, "teacher_loss": 0.17590953409671783 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.5551846027374268, "learning_rate": 1.6491608404379532e-06, "loss": 0.2386, "step": 24539, "teacher_loss": 0.20343518257141113 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.3133026361465454, "learning_rate": 1.6481256413161594e-06, "loss": 0.2031, "step": 24540, "teacher_loss": 0.19086697697639465 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.3053411841392517, "learning_rate": 1.6470907483128095e-06, "loss": 0.1836, "step": 24541, "teacher_loss": 0.17003867030143738 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.34737879037857056, "learning_rate": 1.6460561614516362e-06, "loss": 0.196, "step": 24542, "teacher_loss": 0.1791771799325943 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.41136208176612854, "learning_rate": 1.6450218807563605e-06, "loss": 0.2294, "step": 24543, "teacher_loss": 0.20915460586547852 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.24792975187301636, "learning_rate": 1.6439879062506885e-06, "loss": 0.203, "step": 24544, "teacher_loss": 0.19802913069725037 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.8182623386383057, "learning_rate": 1.6429542379583313e-06, "loss": 0.3206, "step": 24545, "teacher_loss": 0.26530399918556213 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.3134274482727051, "learning_rate": 1.6419208759029898e-06, "loss": 0.1618, "step": 24546, "teacher_loss": 0.14490006864070892 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.2610355019569397, "learning_rate": 1.64088782010835e-06, "loss": 0.1658, "step": 24547, "teacher_loss": 0.15524545311927795 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.48949819803237915, "learning_rate": 1.6398550705980997e-06, "loss": 0.2324, "step": 24548, "teacher_loss": 0.20381684601306915 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.24826902151107788, "learning_rate": 1.63882262739592e-06, "loss": 0.2574, "step": 24549, "teacher_loss": 0.2584322392940521 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.36715832352638245, "learning_rate": 1.6377904905254753e-06, "loss": 0.1971, "step": 24550, "teacher_loss": 0.17826001346111298 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.3578345775604248, "learning_rate": 1.6367586600104366e-06, "loss": 0.1644, "step": 24551, "teacher_loss": 0.14294584095478058 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.2464931160211563, "learning_rate": 1.6357271358744534e-06, "loss": 0.1254, "step": 24552, "teacher_loss": 0.11192546039819717 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.34004467725753784, "learning_rate": 1.634695918141182e-06, "loss": 0.1652, "step": 24553, "teacher_loss": 0.14578023552894592 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.5536391735076904, "learning_rate": 1.6336650068342578e-06, "loss": 0.2831, "step": 24554, "teacher_loss": 0.252998024225235 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.2095588594675064, "learning_rate": 1.632634401977321e-06, "loss": 0.1703, "step": 24555, "teacher_loss": 0.1659424901008606 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.3328903317451477, "learning_rate": 1.6316041035940038e-06, "loss": 0.1701, "step": 24556, "teacher_loss": 0.15202638506889343 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.48475855588912964, "learning_rate": 1.6305741117079193e-06, "loss": 0.1888, "step": 24557, "teacher_loss": 0.15589985251426697 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.40051794052124023, "learning_rate": 1.6295444263426884e-06, "loss": 0.1961, "step": 24558, "teacher_loss": 0.17336460947990417 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.3068946897983551, "learning_rate": 1.628515047521919e-06, "loss": 0.1711, "step": 24559, "teacher_loss": 0.15605273842811584 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.9022079706192017, "learning_rate": 1.6274859752692073e-06, "loss": 0.331, "step": 24560, "teacher_loss": 0.2675066292285919 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.3375336229801178, "learning_rate": 1.6264572096081526e-06, "loss": 0.236, "step": 24561, "teacher_loss": 0.2247694432735443 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.13949500024318695, "learning_rate": 1.625428750562336e-06, "loss": 0.1651, "step": 24562, "teacher_loss": 0.1679687201976776 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.24447891116142273, "learning_rate": 1.6244005981553373e-06, "loss": 0.1817, "step": 24563, "teacher_loss": 0.17474150657653809 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.5347205400466919, "learning_rate": 1.623372752410734e-06, "loss": 0.2485, "step": 24564, "teacher_loss": 0.2167244851589203 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.158926323056221, "learning_rate": 1.6223452133520855e-06, "loss": 0.1905, "step": 24565, "teacher_loss": 0.19403883814811707 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.6647493243217468, "learning_rate": 1.6213179810029533e-06, "loss": 0.2619, "step": 24566, "teacher_loss": 0.21714705228805542 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.17486582696437836, "learning_rate": 1.6202910553868916e-06, "loss": 0.1344, "step": 24567, "teacher_loss": 0.12994059920310974 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.08730772137641907, "learning_rate": 1.6192644365274384e-06, "loss": 0.1532, "step": 24568, "teacher_loss": 0.16054219007492065 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.3011443614959717, "learning_rate": 1.6182381244481349e-06, "loss": 0.1786, "step": 24569, "teacher_loss": 0.165004163980484 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.6814702153205872, "learning_rate": 1.617212119172512e-06, "loss": 0.2327, "step": 24570, "teacher_loss": 0.18287155032157898 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.41953521966934204, "learning_rate": 1.6161864207240895e-06, "loss": 0.1774, "step": 24571, "teacher_loss": 0.15053123235702515 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.4130859971046448, "learning_rate": 1.6151610291263885e-06, "loss": 0.2179, "step": 24572, "teacher_loss": 0.1962481141090393 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 1.1413060426712036, "learning_rate": 1.6141359444029118e-06, "loss": 0.3217, "step": 24573, "teacher_loss": 0.23064029216766357 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.734916090965271, "learning_rate": 1.6131111665771692e-06, "loss": 0.3244, "step": 24574, "teacher_loss": 0.2787725329399109 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.2855764627456665, "learning_rate": 1.6120866956726466e-06, "loss": 0.223, "step": 24575, "teacher_loss": 0.2160138189792633 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.24173466861248016, "learning_rate": 1.6110625317128386e-06, "loss": 0.2365, "step": 24576, "teacher_loss": 0.2359049916267395 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.20645572245121002, "learning_rate": 1.6100386747212265e-06, "loss": 0.1775, "step": 24577, "teacher_loss": 0.17428681254386902 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.39578214287757874, "learning_rate": 1.6090151247212814e-06, "loss": 0.1795, "step": 24578, "teacher_loss": 0.15550529956817627 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.2625391483306885, "learning_rate": 1.6079918817364697e-06, "loss": 0.1881, "step": 24579, "teacher_loss": 0.17986449599266052 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.5581576228141785, "learning_rate": 1.6069689457902558e-06, "loss": 0.2521, "step": 24580, "teacher_loss": 0.21814373135566711 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.6056469678878784, "learning_rate": 1.6059463169060862e-06, "loss": 0.2243, "step": 24581, "teacher_loss": 0.181891530752182 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.22236371040344238, "learning_rate": 1.6049239951074118e-06, "loss": 0.1915, "step": 24582, "teacher_loss": 0.18808165192604065 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.13871464133262634, "learning_rate": 1.6039019804176709e-06, "loss": 0.1744, "step": 24583, "teacher_loss": 0.17835819721221924 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.22152800858020782, "learning_rate": 1.6028802728602943e-06, "loss": 0.189, "step": 24584, "teacher_loss": 0.1853894591331482 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.38740813732147217, "learning_rate": 1.601858872458702e-06, "loss": 0.1991, "step": 24585, "teacher_loss": 0.17819377779960632 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.7993651628494263, "learning_rate": 1.60083777923632e-06, "loss": 0.324, "step": 24586, "teacher_loss": 0.27118727564811707 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.6790557503700256, "learning_rate": 1.5998169932165563e-06, "loss": 0.2862, "step": 24587, "teacher_loss": 0.24253737926483154 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.3495616018772125, "learning_rate": 1.5987965144228089e-06, "loss": 0.2117, "step": 24588, "teacher_loss": 0.19633352756500244 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.26742318272590637, "learning_rate": 1.5977763428784792e-06, "loss": 0.192, "step": 24589, "teacher_loss": 0.18366271257400513 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.1663876473903656, "learning_rate": 1.5967564786069599e-06, "loss": 0.1593, "step": 24590, "teacher_loss": 0.15848945081233978 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.15313363075256348, "learning_rate": 1.5957369216316242e-06, "loss": 0.1424, "step": 24591, "teacher_loss": 0.14121964573860168 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.33252301812171936, "learning_rate": 1.594717671975855e-06, "loss": 0.2011, "step": 24592, "teacher_loss": 0.18647587299346924 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.17173290252685547, "learning_rate": 1.5936987296630202e-06, "loss": 0.1887, "step": 24593, "teacher_loss": 0.19054588675498962 }, { "compression_loss": 0.0, "epoch": 4.44, "label_loss": 0.0792551189661026, "learning_rate": 1.592680094716481e-06, "loss": 0.1636, "step": 24594, "teacher_loss": 0.17292210459709167 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.2631770372390747, "learning_rate": 1.5916617671595823e-06, "loss": 0.2519, "step": 24595, "teacher_loss": 0.25062841176986694 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.31205427646636963, "learning_rate": 1.5906437470156854e-06, "loss": 0.197, "step": 24596, "teacher_loss": 0.1841677725315094 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.37070631980895996, "learning_rate": 1.5896260343081247e-06, "loss": 0.2009, "step": 24597, "teacher_loss": 0.18208113312721252 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.7372523546218872, "learning_rate": 1.5886086290602253e-06, "loss": 0.3211, "step": 24598, "teacher_loss": 0.27482372522354126 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.3188885450363159, "learning_rate": 1.5875915312953281e-06, "loss": 0.182, "step": 24599, "teacher_loss": 0.16679808497428894 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.5853077173233032, "learning_rate": 1.5865747410367449e-06, "loss": 0.263, "step": 24600, "teacher_loss": 0.22717157006263733 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.276511013507843, "learning_rate": 1.5855582583077833e-06, "loss": 0.166, "step": 24601, "teacher_loss": 0.153678297996521 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.2685980200767517, "learning_rate": 1.5845420831317514e-06, "loss": 0.2098, "step": 24602, "teacher_loss": 0.2032998502254486 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.6641155481338501, "learning_rate": 1.5835262155319524e-06, "loss": 0.313, "step": 24603, "teacher_loss": 0.2739391624927521 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.3032028079032898, "learning_rate": 1.5825106555316693e-06, "loss": 0.1883, "step": 24604, "teacher_loss": 0.17552849650382996 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.26810744404792786, "learning_rate": 1.5814954031541883e-06, "loss": 0.1458, "step": 24605, "teacher_loss": 0.13221679627895355 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.703420877456665, "learning_rate": 1.5804804584227912e-06, "loss": 0.2463, "step": 24606, "teacher_loss": 0.19551962614059448 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.08948704600334167, "learning_rate": 1.5794658213607387e-06, "loss": 0.1073, "step": 24607, "teacher_loss": 0.10924255102872849 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.6370911002159119, "learning_rate": 1.5784514919912995e-06, "loss": 0.2512, "step": 24608, "teacher_loss": 0.20831122994422913 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.21382860839366913, "learning_rate": 1.5774374703377314e-06, "loss": 0.1413, "step": 24609, "teacher_loss": 0.13328108191490173 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.24738924205303192, "learning_rate": 1.5764237564232743e-06, "loss": 0.247, "step": 24610, "teacher_loss": 0.2469521015882492 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.4090577960014343, "learning_rate": 1.5754103502711791e-06, "loss": 0.2377, "step": 24611, "teacher_loss": 0.21862216293811798 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.420366108417511, "learning_rate": 1.5743972519046712e-06, "loss": 0.2265, "step": 24612, "teacher_loss": 0.20496167242527008 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.454008549451828, "learning_rate": 1.5733844613469833e-06, "loss": 0.3187, "step": 24613, "teacher_loss": 0.30361682176589966 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.14232882857322693, "learning_rate": 1.572371978621337e-06, "loss": 0.1243, "step": 24614, "teacher_loss": 0.12224339693784714 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.3221674859523773, "learning_rate": 1.5713598037509403e-06, "loss": 0.2208, "step": 24615, "teacher_loss": 0.20957759022712708 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.5490390062332153, "learning_rate": 1.5703479367590045e-06, "loss": 0.2602, "step": 24616, "teacher_loss": 0.22805500030517578 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.5837002396583557, "learning_rate": 1.5693363776687248e-06, "loss": 0.2261, "step": 24617, "teacher_loss": 0.1863297075033188 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.6253211498260498, "learning_rate": 1.5683251265032938e-06, "loss": 0.1997, "step": 24618, "teacher_loss": 0.15243801474571228 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.42183393239974976, "learning_rate": 1.5673141832859e-06, "loss": 0.2426, "step": 24619, "teacher_loss": 0.22263690829277039 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.822284996509552, "learning_rate": 1.5663035480397164e-06, "loss": 0.2775, "step": 24620, "teacher_loss": 0.2169930785894394 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.3234124183654785, "learning_rate": 1.5652932207879162e-06, "loss": 0.2255, "step": 24621, "teacher_loss": 0.21458397805690765 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.4226740002632141, "learning_rate": 1.5642832015536656e-06, "loss": 0.1919, "step": 24622, "teacher_loss": 0.16626834869384766 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.3952505588531494, "learning_rate": 1.5632734903601164e-06, "loss": 0.2201, "step": 24623, "teacher_loss": 0.20063111186027527 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.2535870373249054, "learning_rate": 1.5622640872304234e-06, "loss": 0.1601, "step": 24624, "teacher_loss": 0.14972186088562012 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.4565557837486267, "learning_rate": 1.5612549921877228e-06, "loss": 0.2402, "step": 24625, "teacher_loss": 0.21612709760665894 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.3729138970375061, "learning_rate": 1.5602462052551548e-06, "loss": 0.1652, "step": 24626, "teacher_loss": 0.14213019609451294 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.31533122062683105, "learning_rate": 1.5592377264558489e-06, "loss": 0.1678, "step": 24627, "teacher_loss": 0.15138697624206543 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.26353466510772705, "learning_rate": 1.5582295558129218e-06, "loss": 0.2184, "step": 24628, "teacher_loss": 0.21338677406311035 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.5461602210998535, "learning_rate": 1.5572216933494914e-06, "loss": 0.1904, "step": 24629, "teacher_loss": 0.15084102749824524 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.6247513890266418, "learning_rate": 1.5562141390886663e-06, "loss": 0.2491, "step": 24630, "teacher_loss": 0.20732802152633667 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.4554939866065979, "learning_rate": 1.5552068930535412e-06, "loss": 0.246, "step": 24631, "teacher_loss": 0.22275623679161072 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.3492724597454071, "learning_rate": 1.5541999552672127e-06, "loss": 0.19, "step": 24632, "teacher_loss": 0.1723240166902542 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.5178931951522827, "learning_rate": 1.5531933257527686e-06, "loss": 0.1915, "step": 24633, "teacher_loss": 0.15527847409248352 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.3822687268257141, "learning_rate": 1.5521870045332875e-06, "loss": 0.2464, "step": 24634, "teacher_loss": 0.2313191294670105 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.16244147717952728, "learning_rate": 1.551180991631836e-06, "loss": 0.2605, "step": 24635, "teacher_loss": 0.271393358707428 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.19074472784996033, "learning_rate": 1.5501752870714837e-06, "loss": 0.147, "step": 24636, "teacher_loss": 0.14218004047870636 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.2485916018486023, "learning_rate": 1.5491698908752889e-06, "loss": 0.2338, "step": 24637, "teacher_loss": 0.23218220472335815 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.6456116437911987, "learning_rate": 1.548164803066298e-06, "loss": 0.2829, "step": 24638, "teacher_loss": 0.24261735379695892 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.2742326855659485, "learning_rate": 1.5471600236675582e-06, "loss": 0.1709, "step": 24639, "teacher_loss": 0.15940365195274353 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.6404263973236084, "learning_rate": 1.5461555527021088e-06, "loss": 0.222, "step": 24640, "teacher_loss": 0.17556026577949524 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.48365387320518494, "learning_rate": 1.5451513901929714e-06, "loss": 0.2302, "step": 24641, "teacher_loss": 0.20198820531368256 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.735905647277832, "learning_rate": 1.5441475361631746e-06, "loss": 0.2455, "step": 24642, "teacher_loss": 0.19096827507019043 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.3681783080101013, "learning_rate": 1.5431439906357332e-06, "loss": 0.2491, "step": 24643, "teacher_loss": 0.2358437478542328 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.05697283148765564, "learning_rate": 1.5421407536336534e-06, "loss": 0.1422, "step": 24644, "teacher_loss": 0.15165507793426514 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.40386247634887695, "learning_rate": 1.5411378251799357e-06, "loss": 0.2246, "step": 24645, "teacher_loss": 0.2046658843755722 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.3578646183013916, "learning_rate": 1.5401352052975798e-06, "loss": 0.1987, "step": 24646, "teacher_loss": 0.18096241354942322 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.08000800013542175, "learning_rate": 1.5391328940095674e-06, "loss": 0.1294, "step": 24647, "teacher_loss": 0.13485166430473328 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.683329701423645, "learning_rate": 1.5381308913388764e-06, "loss": 0.2484, "step": 24648, "teacher_loss": 0.20010778307914734 }, { "compression_loss": 0.0, "epoch": 4.45, "label_loss": 0.7667165994644165, "learning_rate": 1.5371291973084873e-06, "loss": 0.2812, "step": 24649, "teacher_loss": 0.22724270820617676 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.5416953563690186, "learning_rate": 1.536127811941363e-06, "loss": 0.284, "step": 24650, "teacher_loss": 0.25540316104888916 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.19146335124969482, "learning_rate": 1.535126735260457e-06, "loss": 0.2142, "step": 24651, "teacher_loss": 0.21671931445598602 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.12340216338634491, "learning_rate": 1.534125967288726e-06, "loss": 0.1254, "step": 24652, "teacher_loss": 0.12559425830841064 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.4364277720451355, "learning_rate": 1.5331255080491162e-06, "loss": 0.21, "step": 24653, "teacher_loss": 0.18480491638183594 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.4281620979309082, "learning_rate": 1.5321253575645615e-06, "loss": 0.164, "step": 24654, "teacher_loss": 0.13470236957073212 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.5908030271530151, "learning_rate": 1.5311255158579918e-06, "loss": 0.4113, "step": 24655, "teacher_loss": 0.3914051353931427 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.3711923360824585, "learning_rate": 1.530125982952335e-06, "loss": 0.204, "step": 24656, "teacher_loss": 0.18546688556671143 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.6110674142837524, "learning_rate": 1.5291267588705065e-06, "loss": 0.3238, "step": 24657, "teacher_loss": 0.29193222522735596 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.3262002468109131, "learning_rate": 1.5281278436354063e-06, "loss": 0.2399, "step": 24658, "teacher_loss": 0.2302880436182022 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.2759372293949127, "learning_rate": 1.5271292372699507e-06, "loss": 0.172, "step": 24659, "teacher_loss": 0.1604917347431183 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.3211386501789093, "learning_rate": 1.5261309397970269e-06, "loss": 0.2032, "step": 24660, "teacher_loss": 0.1901111602783203 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.4968227744102478, "learning_rate": 1.5251329512395212e-06, "loss": 0.2178, "step": 24661, "teacher_loss": 0.18679603934288025 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.3995521366596222, "learning_rate": 1.5241352716203173e-06, "loss": 0.1969, "step": 24662, "teacher_loss": 0.1743713617324829 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.1897321343421936, "learning_rate": 1.5231379009622914e-06, "loss": 0.1452, "step": 24663, "teacher_loss": 0.1402081549167633 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.585931658744812, "learning_rate": 1.5221408392883057e-06, "loss": 0.2181, "step": 24664, "teacher_loss": 0.17719818651676178 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.7102159857749939, "learning_rate": 1.5211440866212218e-06, "loss": 0.2759, "step": 24665, "teacher_loss": 0.22761595249176025 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.8968967795372009, "learning_rate": 1.5201476429838945e-06, "loss": 0.268, "step": 24666, "teacher_loss": 0.1980932503938675 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.8282178640365601, "learning_rate": 1.5191515083991641e-06, "loss": 0.4576, "step": 24667, "teacher_loss": 0.4164133071899414 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.5654422044754028, "learning_rate": 1.5181556828898724e-06, "loss": 0.2073, "step": 24668, "teacher_loss": 0.16754689812660217 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.6311633586883545, "learning_rate": 1.5171601664788525e-06, "loss": 0.2931, "step": 24669, "teacher_loss": 0.255515456199646 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.8347383737564087, "learning_rate": 1.5161649591889248e-06, "loss": 0.3213, "step": 24670, "teacher_loss": 0.26424330472946167 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.30423033237457275, "learning_rate": 1.5151700610429076e-06, "loss": 0.384, "step": 24671, "teacher_loss": 0.39285749197006226 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.25690388679504395, "learning_rate": 1.5141754720636126e-06, "loss": 0.2137, "step": 24672, "teacher_loss": 0.2089177817106247 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.9678155779838562, "learning_rate": 1.5131811922738398e-06, "loss": 0.2912, "step": 24673, "teacher_loss": 0.2160579413175583 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.9530693292617798, "learning_rate": 1.5121872216963894e-06, "loss": 0.3314, "step": 24674, "teacher_loss": 0.2623268961906433 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.4829097390174866, "learning_rate": 1.511193560354045e-06, "loss": 0.2306, "step": 24675, "teacher_loss": 0.2025410234928131 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.3551364541053772, "learning_rate": 1.510200208269593e-06, "loss": 0.2342, "step": 24676, "teacher_loss": 0.22076798975467682 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.41642531752586365, "learning_rate": 1.5092071654658024e-06, "loss": 0.1354, "step": 24677, "teacher_loss": 0.1041313111782074 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.7869532108306885, "learning_rate": 1.5082144319654445e-06, "loss": 0.2753, "step": 24678, "teacher_loss": 0.2184121012687683 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.5624879598617554, "learning_rate": 1.507222007791283e-06, "loss": 0.2677, "step": 24679, "teacher_loss": 0.2349700629711151 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.2554394602775574, "learning_rate": 1.5062298929660628e-06, "loss": 0.1441, "step": 24680, "teacher_loss": 0.13173530995845795 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.3748375475406647, "learning_rate": 1.505238087512536e-06, "loss": 0.1815, "step": 24681, "teacher_loss": 0.16005949676036835 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.4648219048976898, "learning_rate": 1.5042465914534426e-06, "loss": 0.1887, "step": 24682, "teacher_loss": 0.158060222864151 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.7790089249610901, "learning_rate": 1.503255404811511e-06, "loss": 0.2987, "step": 24683, "teacher_loss": 0.24531182646751404 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.2799026072025299, "learning_rate": 1.5022645276094681e-06, "loss": 0.1935, "step": 24684, "teacher_loss": 0.18392933905124664 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.3267536759376526, "learning_rate": 1.501273959870031e-06, "loss": 0.2577, "step": 24685, "teacher_loss": 0.2500481605529785 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.2251317799091339, "learning_rate": 1.5002837016159093e-06, "loss": 0.164, "step": 24686, "teacher_loss": 0.15719449520111084 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.6285199522972107, "learning_rate": 1.4992937528698103e-06, "loss": 0.2394, "step": 24687, "teacher_loss": 0.19614554941654205 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.8788745999336243, "learning_rate": 1.4983041136544273e-06, "loss": 0.2853, "step": 24688, "teacher_loss": 0.2193090170621872 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.23762989044189453, "learning_rate": 1.497314783992449e-06, "loss": 0.1476, "step": 24689, "teacher_loss": 0.13756787776947021 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.5801180005073547, "learning_rate": 1.4963257639065636e-06, "loss": 0.2635, "step": 24690, "teacher_loss": 0.22836823761463165 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.676182210445404, "learning_rate": 1.4953370534194382e-06, "loss": 0.2348, "step": 24691, "teacher_loss": 0.18579134345054626 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.42612701654434204, "learning_rate": 1.4943486525537464e-06, "loss": 0.1783, "step": 24692, "teacher_loss": 0.15074390172958374 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.8064684867858887, "learning_rate": 1.49336056133215e-06, "loss": 0.2335, "step": 24693, "teacher_loss": 0.1698903888463974 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.4468753933906555, "learning_rate": 1.492372779777299e-06, "loss": 0.1866, "step": 24694, "teacher_loss": 0.15769805014133453 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.5548670291900635, "learning_rate": 1.4913853079118422e-06, "loss": 0.2115, "step": 24695, "teacher_loss": 0.17333796620368958 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.3382173180580139, "learning_rate": 1.4903981457584215e-06, "loss": 0.2125, "step": 24696, "teacher_loss": 0.1985061764717102 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.2376953363418579, "learning_rate": 1.489411293339667e-06, "loss": 0.2183, "step": 24697, "teacher_loss": 0.21618568897247314 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.38035848736763, "learning_rate": 1.4884247506782023e-06, "loss": 0.2258, "step": 24698, "teacher_loss": 0.2085937261581421 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.9342123866081238, "learning_rate": 1.4874385177966493e-06, "loss": 0.2226, "step": 24699, "teacher_loss": 0.14357687532901764 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.6674273014068604, "learning_rate": 1.48645259471762e-06, "loss": 0.2196, "step": 24700, "teacher_loss": 0.16979068517684937 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.3347325325012207, "learning_rate": 1.4854669814637145e-06, "loss": 0.2579, "step": 24701, "teacher_loss": 0.24932903051376343 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.5112294554710388, "learning_rate": 1.4844816780575315e-06, "loss": 0.1881, "step": 24702, "teacher_loss": 0.15215738117694855 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.3892780542373657, "learning_rate": 1.4834966845216663e-06, "loss": 0.2246, "step": 24703, "teacher_loss": 0.20635256171226501 }, { "compression_loss": 0.0, "epoch": 4.46, "label_loss": 0.5779500007629395, "learning_rate": 1.482512000878694e-06, "loss": 0.2208, "step": 24704, "teacher_loss": 0.18107977509498596 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.507804274559021, "learning_rate": 1.4815276271511936e-06, "loss": 0.1904, "step": 24705, "teacher_loss": 0.15515880286693573 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.3817175328731537, "learning_rate": 1.4805435633617381e-06, "loss": 0.1584, "step": 24706, "teacher_loss": 0.13354583084583282 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.29282355308532715, "learning_rate": 1.479559809532885e-06, "loss": 0.1659, "step": 24707, "teacher_loss": 0.15183493494987488 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.4344145655632019, "learning_rate": 1.4785763656871826e-06, "loss": 0.2414, "step": 24708, "teacher_loss": 0.21998082101345062 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.8593874573707581, "learning_rate": 1.4775932318471913e-06, "loss": 0.2505, "step": 24709, "teacher_loss": 0.1828332394361496 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.1697319597005844, "learning_rate": 1.4766104080354448e-06, "loss": 0.18, "step": 24710, "teacher_loss": 0.1811947226524353 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.19014552235603333, "learning_rate": 1.4756278942744733e-06, "loss": 0.1453, "step": 24711, "teacher_loss": 0.1402636468410492 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.42603686451911926, "learning_rate": 1.4746456905868055e-06, "loss": 0.27, "step": 24712, "teacher_loss": 0.2526553273200989 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.3187551498413086, "learning_rate": 1.4736637969949634e-06, "loss": 0.1662, "step": 24713, "teacher_loss": 0.14927524328231812 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.45264625549316406, "learning_rate": 1.4726822135214558e-06, "loss": 0.2053, "step": 24714, "teacher_loss": 0.17778238654136658 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.4446519613265991, "learning_rate": 1.471700940188786e-06, "loss": 0.2067, "step": 24715, "teacher_loss": 0.18024948239326477 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.3290180563926697, "learning_rate": 1.470719977019458e-06, "loss": 0.1718, "step": 24716, "teacher_loss": 0.15432271361351013 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.434977650642395, "learning_rate": 1.4697393240359536e-06, "loss": 0.2098, "step": 24717, "teacher_loss": 0.18477891385555267 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.3501642048358917, "learning_rate": 1.4687589812607616e-06, "loss": 0.1579, "step": 24718, "teacher_loss": 0.13657647371292114 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.5264714956283569, "learning_rate": 1.4677789487163606e-06, "loss": 0.1831, "step": 24719, "teacher_loss": 0.1449185311794281 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.24844853579998016, "learning_rate": 1.466799226425216e-06, "loss": 0.1484, "step": 24720, "teacher_loss": 0.13733674585819244 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.9030625224113464, "learning_rate": 1.4658198144097851e-06, "loss": 0.2783, "step": 24721, "teacher_loss": 0.20885899662971497 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.648826003074646, "learning_rate": 1.4648407126925329e-06, "loss": 0.2168, "step": 24722, "teacher_loss": 0.16884389519691467 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 1.0891716480255127, "learning_rate": 1.4638619212959047e-06, "loss": 0.311, "step": 24723, "teacher_loss": 0.2245115041732788 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.39611464738845825, "learning_rate": 1.4628834402423347e-06, "loss": 0.19, "step": 24724, "teacher_loss": 0.1671123057603836 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.25927823781967163, "learning_rate": 1.4619052695542612e-06, "loss": 0.1868, "step": 24725, "teacher_loss": 0.17873573303222656 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.2830972671508789, "learning_rate": 1.4609274092541148e-06, "loss": 0.1664, "step": 24726, "teacher_loss": 0.15346573293209076 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.17762327194213867, "learning_rate": 1.4599498593643056e-06, "loss": 0.1601, "step": 24727, "teacher_loss": 0.15810611844062805 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.1138099730014801, "learning_rate": 1.4589726199072528e-06, "loss": 0.1993, "step": 24728, "teacher_loss": 0.20885249972343445 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.17823049426078796, "learning_rate": 1.4579956909053616e-06, "loss": 0.1974, "step": 24729, "teacher_loss": 0.1995641142129898 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.29373645782470703, "learning_rate": 1.4570190723810256e-06, "loss": 0.1385, "step": 24730, "teacher_loss": 0.12124369293451309 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.2298370599746704, "learning_rate": 1.4560427643566388e-06, "loss": 0.2069, "step": 24731, "teacher_loss": 0.20432695746421814 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.26759904623031616, "learning_rate": 1.4550667668545864e-06, "loss": 0.1385, "step": 24732, "teacher_loss": 0.12414171546697617 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.28581398725509644, "learning_rate": 1.4540910798972407e-06, "loss": 0.2237, "step": 24733, "teacher_loss": 0.21684041619300842 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.5487774610519409, "learning_rate": 1.453115703506977e-06, "loss": 0.2201, "step": 24734, "teacher_loss": 0.18352553248405457 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.9727622270584106, "learning_rate": 1.4521406377061525e-06, "loss": 0.2929, "step": 24735, "teacher_loss": 0.21734902262687683 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.4192865490913391, "learning_rate": 1.4511658825171226e-06, "loss": 0.2818, "step": 24736, "teacher_loss": 0.2665690779685974 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.5896538496017456, "learning_rate": 1.4501914379622411e-06, "loss": 0.2047, "step": 24737, "teacher_loss": 0.16190290451049805 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.25755244493484497, "learning_rate": 1.4492173040638435e-06, "loss": 0.2007, "step": 24738, "teacher_loss": 0.1944286972284317 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.4545484781265259, "learning_rate": 1.4482434808442686e-06, "loss": 0.2612, "step": 24739, "teacher_loss": 0.2397494912147522 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.42317995429039, "learning_rate": 1.4472699683258366e-06, "loss": 0.2486, "step": 24740, "teacher_loss": 0.22916650772094727 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 1.0320826768875122, "learning_rate": 1.446296766530872e-06, "loss": 0.4354, "step": 24741, "teacher_loss": 0.36911171674728394 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.26616430282592773, "learning_rate": 1.4453238754816878e-06, "loss": 0.176, "step": 24742, "teacher_loss": 0.1660006046295166 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.4261089563369751, "learning_rate": 1.4443512952005866e-06, "loss": 0.2478, "step": 24743, "teacher_loss": 0.22793373465538025 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.2057255357503891, "learning_rate": 1.4433790257098672e-06, "loss": 0.1886, "step": 24744, "teacher_loss": 0.18664193153381348 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.7159783244132996, "learning_rate": 1.4424070670318252e-06, "loss": 0.2734, "step": 24745, "teacher_loss": 0.22421352565288544 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.6081286668777466, "learning_rate": 1.4414354191887392e-06, "loss": 0.2706, "step": 24746, "teacher_loss": 0.2331523895263672 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.40091952681541443, "learning_rate": 1.4404640822028914e-06, "loss": 0.1951, "step": 24747, "teacher_loss": 0.17221969366073608 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.13203710317611694, "learning_rate": 1.4394930560965441e-06, "loss": 0.1971, "step": 24748, "teacher_loss": 0.20431926846504211 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.22094330191612244, "learning_rate": 1.4385223408919662e-06, "loss": 0.1982, "step": 24749, "teacher_loss": 0.19565460085868835 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.37275946140289307, "learning_rate": 1.437551936611413e-06, "loss": 0.1911, "step": 24750, "teacher_loss": 0.17092156410217285 }, { "epoch": 4.47, "eval_exact_match": 80.47303689687796, "eval_f1": 87.72585406675309, "step": 24750 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.658419132232666, "learning_rate": 1.4365818432771289e-06, "loss": 0.2418, "step": 24751, "teacher_loss": 0.1954875886440277 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.8100974559783936, "learning_rate": 1.4356120609113587e-06, "loss": 0.2808, "step": 24752, "teacher_loss": 0.22197428345680237 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.4429551064968109, "learning_rate": 1.4346425895363385e-06, "loss": 0.2405, "step": 24753, "teacher_loss": 0.21795004606246948 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.349784791469574, "learning_rate": 1.4336734291742904e-06, "loss": 0.2065, "step": 24754, "teacher_loss": 0.19053535163402557 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.315716028213501, "learning_rate": 1.4327045798474347e-06, "loss": 0.1715, "step": 24755, "teacher_loss": 0.15547645092010498 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 1.0179736614227295, "learning_rate": 1.4317360415779907e-06, "loss": 0.2393, "step": 24756, "teacher_loss": 0.15274512767791748 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.4865512251853943, "learning_rate": 1.4307678143881586e-06, "loss": 0.1607, "step": 24757, "teacher_loss": 0.12445828318595886 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.2341880053281784, "learning_rate": 1.4297998983001327e-06, "loss": 0.1765, "step": 24758, "teacher_loss": 0.17013861238956451 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.4739038348197937, "learning_rate": 1.4288322933361147e-06, "loss": 0.2063, "step": 24759, "teacher_loss": 0.17651841044425964 }, { "compression_loss": 0.0, "epoch": 4.47, "label_loss": 0.4122597277164459, "learning_rate": 1.4278649995182858e-06, "loss": 0.2249, "step": 24760, "teacher_loss": 0.20410922169685364 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.8624149560928345, "learning_rate": 1.4268980168688161e-06, "loss": 0.2762, "step": 24761, "teacher_loss": 0.21102187037467957 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.4933362603187561, "learning_rate": 1.42593134540988e-06, "loss": 0.2163, "step": 24762, "teacher_loss": 0.18548986315727234 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.12792108952999115, "learning_rate": 1.424964985163646e-06, "loss": 0.1672, "step": 24763, "teacher_loss": 0.17160899937152863 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.38337233662605286, "learning_rate": 1.4239989361522599e-06, "loss": 0.2332, "step": 24764, "teacher_loss": 0.2164752334356308 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.17478561401367188, "learning_rate": 1.4230331983978773e-06, "loss": 0.1824, "step": 24765, "teacher_loss": 0.1832735240459442 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.4481823444366455, "learning_rate": 1.4220677719226389e-06, "loss": 0.2335, "step": 24766, "teacher_loss": 0.20965327322483063 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.4820939898490906, "learning_rate": 1.4211026567486752e-06, "loss": 0.1987, "step": 24767, "teacher_loss": 0.16721491515636444 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.2123582363128662, "learning_rate": 1.420137852898117e-06, "loss": 0.1179, "step": 24768, "teacher_loss": 0.10745567828416824 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.4929640293121338, "learning_rate": 1.4191733603930845e-06, "loss": 0.1772, "step": 24769, "teacher_loss": 0.1421264111995697 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.23019075393676758, "learning_rate": 1.4182091792556906e-06, "loss": 0.1725, "step": 24770, "teacher_loss": 0.1660854071378708 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.36552345752716064, "learning_rate": 1.417245309508034e-06, "loss": 0.1818, "step": 24771, "teacher_loss": 0.16137701272964478 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.49269217252731323, "learning_rate": 1.4162817511722237e-06, "loss": 0.1861, "step": 24772, "teacher_loss": 0.15204837918281555 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.5924199819564819, "learning_rate": 1.4153185042703488e-06, "loss": 0.1951, "step": 24773, "teacher_loss": 0.1509571373462677 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.49942952394485474, "learning_rate": 1.4143555688244863e-06, "loss": 0.2845, "step": 24774, "teacher_loss": 0.2606343626976013 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.3113252520561218, "learning_rate": 1.4133929448567206e-06, "loss": 0.1971, "step": 24775, "teacher_loss": 0.18445220589637756 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.3413528800010681, "learning_rate": 1.4124306323891222e-06, "loss": 0.1711, "step": 24776, "teacher_loss": 0.15218310058116913 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.4467523694038391, "learning_rate": 1.4114686314437487e-06, "loss": 0.2471, "step": 24777, "teacher_loss": 0.22488830983638763 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.4966054856777191, "learning_rate": 1.4105069420426603e-06, "loss": 0.235, "step": 24778, "teacher_loss": 0.20590201020240784 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.2515527606010437, "learning_rate": 1.4095455642079048e-06, "loss": 0.1677, "step": 24779, "teacher_loss": 0.15837281942367554 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.6548233032226562, "learning_rate": 1.408584497961526e-06, "loss": 0.2813, "step": 24780, "teacher_loss": 0.2397511899471283 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.31414830684661865, "learning_rate": 1.407623743325548e-06, "loss": 0.1766, "step": 24781, "teacher_loss": 0.16128921508789062 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.1872939169406891, "learning_rate": 1.4066633003220113e-06, "loss": 0.175, "step": 24782, "teacher_loss": 0.17358499765396118 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.5155812501907349, "learning_rate": 1.4057031689729304e-06, "loss": 0.2798, "step": 24783, "teacher_loss": 0.2536531686782837 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.5210274457931519, "learning_rate": 1.4047433493003141e-06, "loss": 0.2562, "step": 24784, "teacher_loss": 0.22682130336761475 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.49933522939682007, "learning_rate": 1.4037838413261733e-06, "loss": 0.2407, "step": 24785, "teacher_loss": 0.21200178563594818 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.46735522150993347, "learning_rate": 1.4028246450725084e-06, "loss": 0.1796, "step": 24786, "teacher_loss": 0.14761066436767578 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.14099842309951782, "learning_rate": 1.4018657605613056e-06, "loss": 0.1028, "step": 24787, "teacher_loss": 0.09852568805217743 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.6091364622116089, "learning_rate": 1.4009071878145502e-06, "loss": 0.2166, "step": 24788, "teacher_loss": 0.17299991846084595 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.5375706553459167, "learning_rate": 1.3999489268542248e-06, "loss": 0.2859, "step": 24789, "teacher_loss": 0.2578897774219513 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.3684234619140625, "learning_rate": 1.3989909777022918e-06, "loss": 0.2074, "step": 24790, "teacher_loss": 0.1895555853843689 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.5994600057601929, "learning_rate": 1.3980333403807189e-06, "loss": 0.3783, "step": 24791, "teacher_loss": 0.3536805510520935 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.577006459236145, "learning_rate": 1.3970760149114614e-06, "loss": 0.2635, "step": 24792, "teacher_loss": 0.22869713604450226 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.42220306396484375, "learning_rate": 1.3961190013164653e-06, "loss": 0.1982, "step": 24793, "teacher_loss": 0.1732625663280487 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.2720046043395996, "learning_rate": 1.395162299617675e-06, "loss": 0.2046, "step": 24794, "teacher_loss": 0.19710539281368256 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.18090149760246277, "learning_rate": 1.3942059098370258e-06, "loss": 0.1509, "step": 24795, "teacher_loss": 0.1475912183523178 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.7780902981758118, "learning_rate": 1.3932498319964403e-06, "loss": 0.2417, "step": 24796, "teacher_loss": 0.18214696645736694 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.39168781042099, "learning_rate": 1.3922940661178429e-06, "loss": 0.2042, "step": 24797, "teacher_loss": 0.18339765071868896 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.21832218766212463, "learning_rate": 1.3913386122231426e-06, "loss": 0.2034, "step": 24798, "teacher_loss": 0.2017078399658203 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.4339350461959839, "learning_rate": 1.3903834703342466e-06, "loss": 0.1658, "step": 24799, "teacher_loss": 0.13602042198181152 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.5206286311149597, "learning_rate": 1.3894286404730576e-06, "loss": 0.2713, "step": 24800, "teacher_loss": 0.24358314275741577 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.4038732647895813, "learning_rate": 1.38847412266146e-06, "loss": 0.2993, "step": 24801, "teacher_loss": 0.28771454095840454 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.6999591588973999, "learning_rate": 1.3875199169213443e-06, "loss": 0.2736, "step": 24802, "teacher_loss": 0.22623848915100098 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.49118661880493164, "learning_rate": 1.3865660232745813e-06, "loss": 0.1868, "step": 24803, "teacher_loss": 0.1530255675315857 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.45903363823890686, "learning_rate": 1.3856124417430438e-06, "loss": 0.2109, "step": 24804, "teacher_loss": 0.18338388204574585 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.22812677919864655, "learning_rate": 1.3846591723485974e-06, "loss": 0.1256, "step": 24805, "teacher_loss": 0.11420183628797531 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.5153632164001465, "learning_rate": 1.383706215113093e-06, "loss": 0.2431, "step": 24806, "teacher_loss": 0.21281574666500092 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.5113099813461304, "learning_rate": 1.382753570058385e-06, "loss": 0.2509, "step": 24807, "teacher_loss": 0.22199270129203796 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.4905911684036255, "learning_rate": 1.3818012372063071e-06, "loss": 0.2455, "step": 24808, "teacher_loss": 0.21822744607925415 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.2916296720504761, "learning_rate": 1.380849216578699e-06, "loss": 0.2346, "step": 24809, "teacher_loss": 0.2282804250717163 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.18334712088108063, "learning_rate": 1.3798975081973864e-06, "loss": 0.2082, "step": 24810, "teacher_loss": 0.21090614795684814 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.3228442966938019, "learning_rate": 1.3789461120841883e-06, "loss": 0.1673, "step": 24811, "teacher_loss": 0.15005150437355042 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.1938084065914154, "learning_rate": 1.3779950282609162e-06, "loss": 0.1712, "step": 24812, "teacher_loss": 0.16863678395748138 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.26241979002952576, "learning_rate": 1.3770442567493801e-06, "loss": 0.1847, "step": 24813, "teacher_loss": 0.17604553699493408 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.3147497773170471, "learning_rate": 1.3760937975713734e-06, "loss": 0.1489, "step": 24814, "teacher_loss": 0.13042427599430084 }, { "compression_loss": 0.0, "epoch": 4.48, "label_loss": 0.6530540585517883, "learning_rate": 1.3751436507486898e-06, "loss": 0.2279, "step": 24815, "teacher_loss": 0.18060529232025146 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.4609566330909729, "learning_rate": 1.3741938163031153e-06, "loss": 0.2471, "step": 24816, "teacher_loss": 0.22337350249290466 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 1.0050783157348633, "learning_rate": 1.373244294256421e-06, "loss": 0.2616, "step": 24817, "teacher_loss": 0.17904475331306458 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.5441428422927856, "learning_rate": 1.3722950846303794e-06, "loss": 0.2544, "step": 24818, "teacher_loss": 0.2221723198890686 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.45430320501327515, "learning_rate": 1.3713461874467564e-06, "loss": 0.2498, "step": 24819, "teacher_loss": 0.227039635181427 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3739086389541626, "learning_rate": 1.3703976027273063e-06, "loss": 0.1684, "step": 24820, "teacher_loss": 0.14556419849395752 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.15170660614967346, "learning_rate": 1.3694493304937683e-06, "loss": 0.1916, "step": 24821, "teacher_loss": 0.19606655836105347 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.5213661193847656, "learning_rate": 1.3685013707678968e-06, "loss": 0.2356, "step": 24822, "teacher_loss": 0.2038998156785965 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.6473950147628784, "learning_rate": 1.3675537235714192e-06, "loss": 0.239, "step": 24823, "teacher_loss": 0.19361340999603271 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.5452805161476135, "learning_rate": 1.36660638892606e-06, "loss": 0.2018, "step": 24824, "teacher_loss": 0.16364173591136932 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3174707889556885, "learning_rate": 1.36565936685354e-06, "loss": 0.1905, "step": 24825, "teacher_loss": 0.17644575238227844 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.33065760135650635, "learning_rate": 1.3647126573755768e-06, "loss": 0.1669, "step": 24826, "teacher_loss": 0.14869606494903564 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.48030608892440796, "learning_rate": 1.3637662605138684e-06, "loss": 0.2938, "step": 24827, "teacher_loss": 0.27303820848464966 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3332180082798004, "learning_rate": 1.362820176290117e-06, "loss": 0.2622, "step": 24828, "teacher_loss": 0.2542579174041748 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.5059642791748047, "learning_rate": 1.3618744047260156e-06, "loss": 0.2169, "step": 24829, "teacher_loss": 0.1848001778125763 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.2178192138671875, "learning_rate": 1.3609289458432434e-06, "loss": 0.2063, "step": 24830, "teacher_loss": 0.20503322780132294 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.4879953861236572, "learning_rate": 1.3599837996634744e-06, "loss": 0.203, "step": 24831, "teacher_loss": 0.17132779955863953 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3109952211380005, "learning_rate": 1.3590389662083868e-06, "loss": 0.2373, "step": 24832, "teacher_loss": 0.22914129495620728 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3827393651008606, "learning_rate": 1.3580944454996364e-06, "loss": 0.2531, "step": 24833, "teacher_loss": 0.23864270746707916 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.4307803511619568, "learning_rate": 1.357150237558879e-06, "loss": 0.2066, "step": 24834, "teacher_loss": 0.18172679841518402 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.49109745025634766, "learning_rate": 1.3562063424077625e-06, "loss": 0.1991, "step": 24835, "teacher_loss": 0.16665557026863098 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.4011456072330475, "learning_rate": 1.3552627600679312e-06, "loss": 0.2229, "step": 24836, "teacher_loss": 0.20311488211154938 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3925430178642273, "learning_rate": 1.3543194905610146e-06, "loss": 0.2708, "step": 24837, "teacher_loss": 0.2573012113571167 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.698509693145752, "learning_rate": 1.3533765339086384e-06, "loss": 0.2165, "step": 24838, "teacher_loss": 0.16299782693386078 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.5401134490966797, "learning_rate": 1.3524338901324273e-06, "loss": 0.2181, "step": 24839, "teacher_loss": 0.1822671890258789 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 1.1441680192947388, "learning_rate": 1.3514915592539857e-06, "loss": 0.3092, "step": 24840, "teacher_loss": 0.21641850471496582 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.6434491872787476, "learning_rate": 1.3505495412949225e-06, "loss": 0.2842, "step": 24841, "teacher_loss": 0.24430745840072632 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.15564462542533875, "learning_rate": 1.3496078362768394e-06, "loss": 0.1826, "step": 24842, "teacher_loss": 0.18561133742332458 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3927781879901886, "learning_rate": 1.3486664442213203e-06, "loss": 0.1327, "step": 24843, "teacher_loss": 0.10376574099063873 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.5456156730651855, "learning_rate": 1.3477253651499467e-06, "loss": 0.2467, "step": 24844, "teacher_loss": 0.21350297331809998 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.24306191504001617, "learning_rate": 1.3467845990843042e-06, "loss": 0.1786, "step": 24845, "teacher_loss": 0.17145463824272156 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.5092607736587524, "learning_rate": 1.345844146045956e-06, "loss": 0.2402, "step": 24846, "teacher_loss": 0.21031442284584045 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.47005465626716614, "learning_rate": 1.3449040060564627e-06, "loss": 0.2265, "step": 24847, "teacher_loss": 0.19944171607494354 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.875042200088501, "learning_rate": 1.3439641791373792e-06, "loss": 0.2611, "step": 24848, "teacher_loss": 0.192830890417099 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.45506036281585693, "learning_rate": 1.343024665310258e-06, "loss": 0.1899, "step": 24849, "teacher_loss": 0.16043657064437866 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.21290555596351624, "learning_rate": 1.3420854645966318e-06, "loss": 0.1965, "step": 24850, "teacher_loss": 0.1946442425251007 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.2770000100135803, "learning_rate": 1.341146577018037e-06, "loss": 0.1902, "step": 24851, "teacher_loss": 0.18053388595581055 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.447892427444458, "learning_rate": 1.3402080025960028e-06, "loss": 0.2024, "step": 24852, "teacher_loss": 0.17512789368629456 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.6519142389297485, "learning_rate": 1.3392697413520422e-06, "loss": 0.2176, "step": 24853, "teacher_loss": 0.16930551826953888 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.6063581705093384, "learning_rate": 1.3383317933076712e-06, "loss": 0.2065, "step": 24854, "teacher_loss": 0.16211152076721191 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.29877063632011414, "learning_rate": 1.3373941584843924e-06, "loss": 0.1936, "step": 24855, "teacher_loss": 0.1819210648536682 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.37757647037506104, "learning_rate": 1.3364568369037022e-06, "loss": 0.2557, "step": 24856, "teacher_loss": 0.24213998019695282 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.5091791152954102, "learning_rate": 1.3355198285870935e-06, "loss": 0.1999, "step": 24857, "teacher_loss": 0.16555072367191315 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3189675211906433, "learning_rate": 1.3345831335560437e-06, "loss": 0.2367, "step": 24858, "teacher_loss": 0.22760102152824402 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.41672682762145996, "learning_rate": 1.333646751832031e-06, "loss": 0.2566, "step": 24859, "teacher_loss": 0.23877234756946564 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3014252781867981, "learning_rate": 1.3327106834365282e-06, "loss": 0.3953, "step": 24860, "teacher_loss": 0.4056766629219055 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.5225478410720825, "learning_rate": 1.3317749283909898e-06, "loss": 0.2122, "step": 24861, "teacher_loss": 0.17771360278129578 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3503858745098114, "learning_rate": 1.3308394867168733e-06, "loss": 0.1649, "step": 24862, "teacher_loss": 0.14430014789104462 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.398532509803772, "learning_rate": 1.3299043584356268e-06, "loss": 0.2391, "step": 24863, "teacher_loss": 0.22142288088798523 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.6095664501190186, "learning_rate": 1.3289695435686865e-06, "loss": 0.1849, "step": 24864, "teacher_loss": 0.13770407438278198 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.7627614140510559, "learning_rate": 1.3280350421374888e-06, "loss": 0.2199, "step": 24865, "teacher_loss": 0.15954527258872986 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.20806953310966492, "learning_rate": 1.3271008541634544e-06, "loss": 0.1833, "step": 24866, "teacher_loss": 0.1805018186569214 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.3728608787059784, "learning_rate": 1.3261669796680048e-06, "loss": 0.2144, "step": 24867, "teacher_loss": 0.1968323439359665 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.6760705709457397, "learning_rate": 1.3252334186725513e-06, "loss": 0.231, "step": 24868, "teacher_loss": 0.18158340454101562 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.5103421807289124, "learning_rate": 1.3243001711984948e-06, "loss": 0.2324, "step": 24869, "teacher_loss": 0.20153653621673584 }, { "compression_loss": 0.0, "epoch": 4.49, "label_loss": 0.22068721055984497, "learning_rate": 1.3233672372672367e-06, "loss": 0.1804, "step": 24870, "teacher_loss": 0.1759127378463745 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.34370845556259155, "learning_rate": 1.3224346169001583e-06, "loss": 0.2411, "step": 24871, "teacher_loss": 0.2297222912311554 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.4138121008872986, "learning_rate": 1.321502310118649e-06, "loss": 0.1752, "step": 24872, "teacher_loss": 0.14870071411132812 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.5852549076080322, "learning_rate": 1.3205703169440837e-06, "loss": 0.2719, "step": 24873, "teacher_loss": 0.23711101710796356 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.3902353346347809, "learning_rate": 1.3196386373978231e-06, "loss": 0.1753, "step": 24874, "teacher_loss": 0.15142151713371277 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.4449976086616516, "learning_rate": 1.3187072715012355e-06, "loss": 0.2439, "step": 24875, "teacher_loss": 0.22152970731258392 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.42878836393356323, "learning_rate": 1.317776219275672e-06, "loss": 0.2109, "step": 24876, "teacher_loss": 0.18670785427093506 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.4768916964530945, "learning_rate": 1.3168454807424774e-06, "loss": 0.1832, "step": 24877, "teacher_loss": 0.15056294202804565 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.35752326250076294, "learning_rate": 1.3159150559229909e-06, "loss": 0.2025, "step": 24878, "teacher_loss": 0.18526506423950195 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.2957937717437744, "learning_rate": 1.3149849448385475e-06, "loss": 0.158, "step": 24879, "teacher_loss": 0.1427101343870163 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.9467372894287109, "learning_rate": 1.31405514751047e-06, "loss": 0.3351, "step": 24880, "teacher_loss": 0.26711851358413696 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.18497636914253235, "learning_rate": 1.3131256639600697e-06, "loss": 0.1632, "step": 24881, "teacher_loss": 0.1607877016067505 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.17727389931678772, "learning_rate": 1.3121964942086694e-06, "loss": 0.1584, "step": 24882, "teacher_loss": 0.15632781386375427 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.22995662689208984, "learning_rate": 1.311267638277564e-06, "loss": 0.1425, "step": 24883, "teacher_loss": 0.13276252150535583 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.30090975761413574, "learning_rate": 1.3103390961880446e-06, "loss": 0.1888, "step": 24884, "teacher_loss": 0.1763882040977478 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.5656433701515198, "learning_rate": 1.3094108679614125e-06, "loss": 0.2518, "step": 24885, "teacher_loss": 0.2169492244720459 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.2606929838657379, "learning_rate": 1.3084829536189424e-06, "loss": 0.1892, "step": 24886, "teacher_loss": 0.18120211362838745 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.46706366539001465, "learning_rate": 1.3075553531819057e-06, "loss": 0.2047, "step": 24887, "teacher_loss": 0.17554797232151031 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.23889076709747314, "learning_rate": 1.3066280666715735e-06, "loss": 0.1705, "step": 24888, "teacher_loss": 0.16284796595573425 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.44100844860076904, "learning_rate": 1.3057010941092073e-06, "loss": 0.2585, "step": 24889, "teacher_loss": 0.23820900917053223 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 1.2169859409332275, "learning_rate": 1.3047744355160551e-06, "loss": 0.3799, "step": 24890, "teacher_loss": 0.28690385818481445 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.44141945242881775, "learning_rate": 1.3038480909133631e-06, "loss": 0.2531, "step": 24891, "teacher_loss": 0.23221619427204132 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.4876900911331177, "learning_rate": 1.3029220603223746e-06, "loss": 0.2642, "step": 24892, "teacher_loss": 0.23939740657806396 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.6729294061660767, "learning_rate": 1.301996343764319e-06, "loss": 0.2655, "step": 24893, "teacher_loss": 0.22024670243263245 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.4354380667209625, "learning_rate": 1.301070941260411e-06, "loss": 0.2378, "step": 24894, "teacher_loss": 0.21582409739494324 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.5990495681762695, "learning_rate": 1.3001458528318805e-06, "loss": 0.2551, "step": 24895, "teacher_loss": 0.21685002744197845 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.3487049341201782, "learning_rate": 1.299221078499932e-06, "loss": 0.2, "step": 24896, "teacher_loss": 0.18346790969371796 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.3687381148338318, "learning_rate": 1.2982966182857637e-06, "loss": 0.2004, "step": 24897, "teacher_loss": 0.18166957795619965 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.36865270137786865, "learning_rate": 1.297372472210575e-06, "loss": 0.2961, "step": 24898, "teacher_loss": 0.2880134582519531 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.477919340133667, "learning_rate": 1.296448640295556e-06, "loss": 0.2065, "step": 24899, "teacher_loss": 0.17631492018699646 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.34093210101127625, "learning_rate": 1.2955251225618797e-06, "loss": 0.295, "step": 24900, "teacher_loss": 0.28993409872055054 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.1556183099746704, "learning_rate": 1.2946019190307256e-06, "loss": 0.1589, "step": 24901, "teacher_loss": 0.15929779410362244 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.5449413061141968, "learning_rate": 1.2936790297232603e-06, "loss": 0.2136, "step": 24902, "teacher_loss": 0.1768096536397934 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.32774674892425537, "learning_rate": 1.2927564546606401e-06, "loss": 0.1628, "step": 24903, "teacher_loss": 0.144521102309227 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.5019647479057312, "learning_rate": 1.2918341938640165e-06, "loss": 0.3192, "step": 24904, "teacher_loss": 0.29887545108795166 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.4542728066444397, "learning_rate": 1.2909122473545393e-06, "loss": 0.1526, "step": 24905, "teacher_loss": 0.11904914677143097 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.27091312408447266, "learning_rate": 1.2899906151533414e-06, "loss": 0.2097, "step": 24906, "teacher_loss": 0.20289915800094604 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.26909172534942627, "learning_rate": 1.289069297281551e-06, "loss": 0.1614, "step": 24907, "teacher_loss": 0.14943546056747437 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.1845596730709076, "learning_rate": 1.2881482937602946e-06, "loss": 0.1376, "step": 24908, "teacher_loss": 0.13232731819152832 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.3280317187309265, "learning_rate": 1.2872276046106901e-06, "loss": 0.1846, "step": 24909, "teacher_loss": 0.16865724325180054 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.4422951638698578, "learning_rate": 1.2863072298538426e-06, "loss": 0.1618, "step": 24910, "teacher_loss": 0.13066960871219635 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.2813696265220642, "learning_rate": 1.2853871695108531e-06, "loss": 0.162, "step": 24911, "teacher_loss": 0.14877942204475403 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.503632664680481, "learning_rate": 1.2844674236028186e-06, "loss": 0.2056, "step": 24912, "teacher_loss": 0.1724781095981598 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.44062715768814087, "learning_rate": 1.2835479921508236e-06, "loss": 0.2093, "step": 24913, "teacher_loss": 0.18355225026607513 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.314726859331131, "learning_rate": 1.2826288751759495e-06, "loss": 0.1768, "step": 24914, "teacher_loss": 0.16152897477149963 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.6788257360458374, "learning_rate": 1.2817100726992698e-06, "loss": 0.1978, "step": 24915, "teacher_loss": 0.14438213407993317 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.5926549434661865, "learning_rate": 1.2807915847418472e-06, "loss": 0.2405, "step": 24916, "teacher_loss": 0.20140224695205688 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.46895840764045715, "learning_rate": 1.2798734113247419e-06, "loss": 0.2339, "step": 24917, "teacher_loss": 0.20773360133171082 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.23749813437461853, "learning_rate": 1.278955552469005e-06, "loss": 0.2077, "step": 24918, "teacher_loss": 0.20435330271720886 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.6081233024597168, "learning_rate": 1.2780380081956766e-06, "loss": 0.2486, "step": 24919, "teacher_loss": 0.20870795845985413 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.68998783826828, "learning_rate": 1.2771207785258e-06, "loss": 0.2235, "step": 24920, "teacher_loss": 0.1716526299715042 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.4073273241519928, "learning_rate": 1.276203863480398e-06, "loss": 0.1585, "step": 24921, "teacher_loss": 0.13088031113147736 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.5293726325035095, "learning_rate": 1.2752872630804956e-06, "loss": 0.2109, "step": 24922, "teacher_loss": 0.1755514144897461 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.2349901795387268, "learning_rate": 1.2743709773471078e-06, "loss": 0.1809, "step": 24923, "teacher_loss": 0.17485250532627106 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.19986101984977722, "learning_rate": 1.2734550063012411e-06, "loss": 0.1508, "step": 24924, "teacher_loss": 0.14536993205547333 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.4453723430633545, "learning_rate": 1.272539349963897e-06, "loss": 0.1774, "step": 24925, "teacher_loss": 0.1476157009601593 }, { "compression_loss": 0.0, "epoch": 4.5, "label_loss": 0.4535841941833496, "learning_rate": 1.2716240083560705e-06, "loss": 0.2065, "step": 24926, "teacher_loss": 0.17906233668327332 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.20025601983070374, "learning_rate": 1.2707089814987445e-06, "loss": 0.1659, "step": 24927, "teacher_loss": 0.16205769777297974 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.3287839889526367, "learning_rate": 1.2697942694129006e-06, "loss": 0.179, "step": 24928, "teacher_loss": 0.16240474581718445 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.41799116134643555, "learning_rate": 1.2688798721195054e-06, "loss": 0.1954, "step": 24929, "teacher_loss": 0.17066457867622375 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.7418502569198608, "learning_rate": 1.2679657896395292e-06, "loss": 0.7169, "step": 24930, "teacher_loss": 0.7141244411468506 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.2920127511024475, "learning_rate": 1.2670520219939263e-06, "loss": 0.1929, "step": 24931, "teacher_loss": 0.1819000244140625 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.324868381023407, "learning_rate": 1.2661385692036454e-06, "loss": 0.1746, "step": 24932, "teacher_loss": 0.15793661773204803 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.19607430696487427, "learning_rate": 1.2652254312896328e-06, "loss": 0.1598, "step": 24933, "teacher_loss": 0.1557374894618988 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.7852168679237366, "learning_rate": 1.2643126082728201e-06, "loss": 0.2512, "step": 24934, "teacher_loss": 0.19183233380317688 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.5920917391777039, "learning_rate": 1.2634001001741375e-06, "loss": 0.2604, "step": 24935, "teacher_loss": 0.2235337793827057 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.5382420420646667, "learning_rate": 1.2624879070145095e-06, "loss": 0.2248, "step": 24936, "teacher_loss": 0.18995723128318787 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.5238633751869202, "learning_rate": 1.2615760288148431e-06, "loss": 0.2416, "step": 24937, "teacher_loss": 0.21029233932495117 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.6047776937484741, "learning_rate": 1.2606644655960497e-06, "loss": 0.2728, "step": 24938, "teacher_loss": 0.23592683672904968 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.39686086773872375, "learning_rate": 1.2597532173790295e-06, "loss": 0.1896, "step": 24939, "teacher_loss": 0.16662435233592987 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.41148820519447327, "learning_rate": 1.2588422841846686e-06, "loss": 0.2069, "step": 24940, "teacher_loss": 0.18413375318050385 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.4481838643550873, "learning_rate": 1.2579316660338575e-06, "loss": 0.3155, "step": 24941, "teacher_loss": 0.3007173240184784 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.478749543428421, "learning_rate": 1.257021362947474e-06, "loss": 0.2246, "step": 24942, "teacher_loss": 0.19634415209293365 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.4562843441963196, "learning_rate": 1.2561113749463887e-06, "loss": 0.1653, "step": 24943, "teacher_loss": 0.13296276330947876 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.6366191506385803, "learning_rate": 1.255201702051456e-06, "loss": 0.2365, "step": 24944, "teacher_loss": 0.19207364320755005 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.20650248229503632, "learning_rate": 1.2542923442835464e-06, "loss": 0.2001, "step": 24945, "teacher_loss": 0.1993858516216278 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.6839329600334167, "learning_rate": 1.253383301663501e-06, "loss": 0.2531, "step": 24946, "teacher_loss": 0.2051989585161209 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.34950923919677734, "learning_rate": 1.2524745742121584e-06, "loss": 0.1938, "step": 24947, "teacher_loss": 0.17648985981941223 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.3094252049922943, "learning_rate": 1.2515661619503572e-06, "loss": 0.1858, "step": 24948, "teacher_loss": 0.17206791043281555 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.30745187401771545, "learning_rate": 1.250658064898927e-06, "loss": 0.1808, "step": 24949, "teacher_loss": 0.1667398363351822 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.4365466237068176, "learning_rate": 1.2497502830786812e-06, "loss": 0.2565, "step": 24950, "teacher_loss": 0.23646265268325806 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.295702189207077, "learning_rate": 1.2488428165104366e-06, "loss": 0.1614, "step": 24951, "teacher_loss": 0.14642928540706635 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.6158324480056763, "learning_rate": 1.2479356652149999e-06, "loss": 0.2632, "step": 24952, "teacher_loss": 0.22404597699642181 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.8396400213241577, "learning_rate": 1.2470288292131693e-06, "loss": 0.265, "step": 24953, "teacher_loss": 0.20110076665878296 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.34785571694374084, "learning_rate": 1.2461223085257267e-06, "loss": 0.2189, "step": 24954, "teacher_loss": 0.2045457512140274 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.36714956164360046, "learning_rate": 1.2452161031734705e-06, "loss": 0.2139, "step": 24955, "teacher_loss": 0.1968824714422226 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.6675052642822266, "learning_rate": 1.2443102131771688e-06, "loss": 0.254, "step": 24956, "teacher_loss": 0.2080499678850174 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.41223758459091187, "learning_rate": 1.2434046385575903e-06, "loss": 0.1784, "step": 24957, "teacher_loss": 0.15245135128498077 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.29313555359840393, "learning_rate": 1.2424993793354982e-06, "loss": 0.2539, "step": 24958, "teacher_loss": 0.24952684342861176 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.21495641767978668, "learning_rate": 1.2415944355316527e-06, "loss": 0.1533, "step": 24959, "teacher_loss": 0.14642101526260376 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.35847973823547363, "learning_rate": 1.2406898071667922e-06, "loss": 0.2064, "step": 24960, "teacher_loss": 0.18953177332878113 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.5915001630783081, "learning_rate": 1.2397854942616632e-06, "loss": 0.2106, "step": 24961, "teacher_loss": 0.16825184226036072 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.48203861713409424, "learning_rate": 1.2388814968369993e-06, "loss": 0.2108, "step": 24962, "teacher_loss": 0.1806488186120987 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.2799552083015442, "learning_rate": 1.2379778149135223e-06, "loss": 0.1989, "step": 24963, "teacher_loss": 0.18991994857788086 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.3912736177444458, "learning_rate": 1.237074448511954e-06, "loss": 0.1498, "step": 24964, "teacher_loss": 0.12299604713916779 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.5617449879646301, "learning_rate": 1.2361713976530076e-06, "loss": 0.2435, "step": 24965, "teacher_loss": 0.2081899344921112 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.2986752986907959, "learning_rate": 1.2352686623573817e-06, "loss": 0.1756, "step": 24966, "teacher_loss": 0.16195544600486755 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.2902662754058838, "learning_rate": 1.2343662426457764e-06, "loss": 0.3264, "step": 24967, "teacher_loss": 0.3303978443145752 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.8083678483963013, "learning_rate": 1.233464138538885e-06, "loss": 0.3214, "step": 24968, "teacher_loss": 0.2672524154186249 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.632462739944458, "learning_rate": 1.2325623500573863e-06, "loss": 0.2542, "step": 24969, "teacher_loss": 0.21219700574874878 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.34110569953918457, "learning_rate": 1.231660877221955e-06, "loss": 0.1982, "step": 24970, "teacher_loss": 0.1823311597108841 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.7903971672058105, "learning_rate": 1.2307597200532584e-06, "loss": 0.3031, "step": 24971, "teacher_loss": 0.24891966581344604 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.24154360592365265, "learning_rate": 1.229858878571961e-06, "loss": 0.2124, "step": 24972, "teacher_loss": 0.2092091590166092 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.3956694006919861, "learning_rate": 1.2289583527987137e-06, "loss": 0.2042, "step": 24973, "teacher_loss": 0.18287552893161774 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.25109967589378357, "learning_rate": 1.2280581427541627e-06, "loss": 0.1755, "step": 24974, "teacher_loss": 0.16714423894882202 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.6288987994194031, "learning_rate": 1.2271582484589517e-06, "loss": 0.2506, "step": 24975, "teacher_loss": 0.2086101770401001 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.5662125945091248, "learning_rate": 1.2262586699337042e-06, "loss": 0.235, "step": 24976, "teacher_loss": 0.19816681742668152 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.2918793559074402, "learning_rate": 1.225359407199052e-06, "loss": 0.1854, "step": 24977, "teacher_loss": 0.17358943819999695 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.4843177795410156, "learning_rate": 1.2244604602756104e-06, "loss": 0.2072, "step": 24978, "teacher_loss": 0.17643311619758606 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.28618890047073364, "learning_rate": 1.2235618291839878e-06, "loss": 0.1511, "step": 24979, "teacher_loss": 0.1361401528120041 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.3873986303806305, "learning_rate": 1.2226635139447912e-06, "loss": 0.2166, "step": 24980, "teacher_loss": 0.19762305915355682 }, { "compression_loss": 0.0, "epoch": 4.51, "label_loss": 0.3318076729774475, "learning_rate": 1.2217655145786122e-06, "loss": 0.1848, "step": 24981, "teacher_loss": 0.1684718132019043 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 1.028064250946045, "learning_rate": 1.2208678311060379e-06, "loss": 0.3238, "step": 24982, "teacher_loss": 0.24552811682224274 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.4785216152667999, "learning_rate": 1.2199704635476566e-06, "loss": 0.2309, "step": 24983, "teacher_loss": 0.20336297154426575 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.1786719262599945, "learning_rate": 1.2190734119240353e-06, "loss": 0.1757, "step": 24984, "teacher_loss": 0.17534658312797546 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.5340508222579956, "learning_rate": 1.2181766762557429e-06, "loss": 0.2151, "step": 24985, "teacher_loss": 0.17970529198646545 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.5029054880142212, "learning_rate": 1.2172802565633423e-06, "loss": 0.3032, "step": 24986, "teacher_loss": 0.28099173307418823 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.5457120537757874, "learning_rate": 1.2163841528673808e-06, "loss": 0.2324, "step": 24987, "teacher_loss": 0.19758740067481995 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.27344438433647156, "learning_rate": 1.2154883651884035e-06, "loss": 0.211, "step": 24988, "teacher_loss": 0.2040776163339615 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.5257419347763062, "learning_rate": 1.2145928935469524e-06, "loss": 0.2108, "step": 24989, "teacher_loss": 0.17575670778751373 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.2967720329761505, "learning_rate": 1.2136977379635545e-06, "loss": 0.2105, "step": 24990, "teacher_loss": 0.20087894797325134 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.34486064314842224, "learning_rate": 1.2128028984587363e-06, "loss": 0.2686, "step": 24991, "teacher_loss": 0.2601253390312195 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.5057714581489563, "learning_rate": 1.2119083750530086e-06, "loss": 0.2994, "step": 24992, "teacher_loss": 0.27651315927505493 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.2792707681655884, "learning_rate": 1.2110141677668845e-06, "loss": 0.1828, "step": 24993, "teacher_loss": 0.1720803678035736 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.4043523371219635, "learning_rate": 1.210120276620863e-06, "loss": 0.2457, "step": 24994, "teacher_loss": 0.22809180617332458 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.524276614189148, "learning_rate": 1.2092267016354375e-06, "loss": 0.2026, "step": 24995, "teacher_loss": 0.16690057516098022 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.24703338742256165, "learning_rate": 1.2083334428311016e-06, "loss": 0.1558, "step": 24996, "teacher_loss": 0.14562749862670898 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.3075560927391052, "learning_rate": 1.2074405002283256e-06, "loss": 0.1464, "step": 24997, "teacher_loss": 0.12851177155971527 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.7544401288032532, "learning_rate": 1.2065478738475883e-06, "loss": 0.3023, "step": 24998, "teacher_loss": 0.2520369291305542 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.42518627643585205, "learning_rate": 1.2056555637093564e-06, "loss": 0.205, "step": 24999, "teacher_loss": 0.1805545687675476 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.48664218187332153, "learning_rate": 1.2047635698340803e-06, "loss": 0.3763, "step": 25000, "teacher_loss": 0.364020437002182 }, { "epoch": 4.52, "eval_exact_match": 80.34058656575213, "eval_f1": 87.71330130857804, "step": 25000 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.4412286877632141, "learning_rate": 1.203871892242217e-06, "loss": 0.2291, "step": 25001, "teacher_loss": 0.20557951927185059 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.22568213939666748, "learning_rate": 1.2029805309542102e-06, "loss": 0.1726, "step": 25002, "teacher_loss": 0.1666538566350937 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.6624314785003662, "learning_rate": 1.2020894859904934e-06, "loss": 0.2063, "step": 25003, "teacher_loss": 0.1556379199028015 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.2870168089866638, "learning_rate": 1.2011987573714922e-06, "loss": 0.1785, "step": 25004, "teacher_loss": 0.16645720601081848 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.3117113411426544, "learning_rate": 1.2003083451176366e-06, "loss": 0.2123, "step": 25005, "teacher_loss": 0.201265349984169 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.22180937230587006, "learning_rate": 1.199418249249339e-06, "loss": 0.2, "step": 25006, "teacher_loss": 0.19752441346645355 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.399008572101593, "learning_rate": 1.1985284697869975e-06, "loss": 0.2327, "step": 25007, "teacher_loss": 0.214262455701828 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.5661318302154541, "learning_rate": 1.1976390067510262e-06, "loss": 0.2471, "step": 25008, "teacher_loss": 0.21164795756340027 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.3104586601257324, "learning_rate": 1.1967498601618089e-06, "loss": 0.1788, "step": 25009, "teacher_loss": 0.16413713991641998 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.20756185054779053, "learning_rate": 1.1958610300397321e-06, "loss": 0.2193, "step": 25010, "teacher_loss": 0.22057145833969116 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.21955274045467377, "learning_rate": 1.194972516405175e-06, "loss": 0.1711, "step": 25011, "teacher_loss": 0.16573864221572876 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.34229278564453125, "learning_rate": 1.1940843192785094e-06, "loss": 0.1837, "step": 25012, "teacher_loss": 0.1660410314798355 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.5969566106796265, "learning_rate": 1.1931964386800991e-06, "loss": 0.3656, "step": 25013, "teacher_loss": 0.3399437665939331 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.42115676403045654, "learning_rate": 1.1923088746302946e-06, "loss": 0.2115, "step": 25014, "teacher_loss": 0.18816983699798584 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.34598255157470703, "learning_rate": 1.1914216271494544e-06, "loss": 0.2146, "step": 25015, "teacher_loss": 0.20001763105392456 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.47102367877960205, "learning_rate": 1.1905346962579172e-06, "loss": 0.2344, "step": 25016, "teacher_loss": 0.20816031098365784 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.18410956859588623, "learning_rate": 1.1896480819760103e-06, "loss": 0.223, "step": 25017, "teacher_loss": 0.22734716534614563 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.42701083421707153, "learning_rate": 1.1887617843240723e-06, "loss": 0.1956, "step": 25018, "teacher_loss": 0.16984519362449646 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.3775167465209961, "learning_rate": 1.1878758033224185e-06, "loss": 0.2078, "step": 25019, "teacher_loss": 0.18896019458770752 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.1675519198179245, "learning_rate": 1.1869901389913596e-06, "loss": 0.1541, "step": 25020, "teacher_loss": 0.15263840556144714 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.36119040846824646, "learning_rate": 1.186104791351204e-06, "loss": 0.234, "step": 25021, "teacher_loss": 0.2198522388935089 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.5766996145248413, "learning_rate": 1.1852197604222508e-06, "loss": 0.3491, "step": 25022, "teacher_loss": 0.32385721802711487 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.3426487445831299, "learning_rate": 1.184335046224787e-06, "loss": 0.2941, "step": 25023, "teacher_loss": 0.2886542081832886 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.33055639266967773, "learning_rate": 1.1834506487790997e-06, "loss": 0.2151, "step": 25024, "teacher_loss": 0.20221924781799316 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.28464221954345703, "learning_rate": 1.182566568105466e-06, "loss": 0.1728, "step": 25025, "teacher_loss": 0.1603776514530182 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.3991052210330963, "learning_rate": 1.1816828042241528e-06, "loss": 0.191, "step": 25026, "teacher_loss": 0.16782169044017792 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.3157411217689514, "learning_rate": 1.1807993571554226e-06, "loss": 0.1656, "step": 25027, "teacher_loss": 0.14889967441558838 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.3775656223297119, "learning_rate": 1.179916226919534e-06, "loss": 0.1853, "step": 25028, "teacher_loss": 0.16395828127861023 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.31451690196990967, "learning_rate": 1.1790334135367292e-06, "loss": 0.1458, "step": 25029, "teacher_loss": 0.12704439461231232 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.7381253242492676, "learning_rate": 1.1781509170272536e-06, "loss": 0.2901, "step": 25030, "teacher_loss": 0.2403007447719574 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.3375489413738251, "learning_rate": 1.1772687374113344e-06, "loss": 0.2574, "step": 25031, "teacher_loss": 0.24850626289844513 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.5446094274520874, "learning_rate": 1.1763868747092039e-06, "loss": 0.2348, "step": 25032, "teacher_loss": 0.20035159587860107 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.3683244287967682, "learning_rate": 1.1755053289410738e-06, "loss": 0.1744, "step": 25033, "teacher_loss": 0.15283891558647156 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.4383699297904968, "learning_rate": 1.1746241001271584e-06, "loss": 0.1674, "step": 25034, "teacher_loss": 0.13734087347984314 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.6469532251358032, "learning_rate": 1.1737431882876632e-06, "loss": 0.2621, "step": 25035, "teacher_loss": 0.21930164098739624 }, { "compression_loss": 0.0, "epoch": 4.52, "label_loss": 0.9872376918792725, "learning_rate": 1.1728625934427818e-06, "loss": 0.2689, "step": 25036, "teacher_loss": 0.1890956461429596 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.8092731237411499, "learning_rate": 1.1719823156127062e-06, "loss": 0.3189, "step": 25037, "teacher_loss": 0.26438552141189575 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.4695800542831421, "learning_rate": 1.1711023548176193e-06, "loss": 0.246, "step": 25038, "teacher_loss": 0.2211138904094696 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.7202006578445435, "learning_rate": 1.1702227110776909e-06, "loss": 0.3699, "step": 25039, "teacher_loss": 0.33100342750549316 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.2920592129230499, "learning_rate": 1.1693433844130935e-06, "loss": 0.1784, "step": 25040, "teacher_loss": 0.16577807068824768 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.7296040058135986, "learning_rate": 1.1684643748439877e-06, "loss": 0.3363, "step": 25041, "teacher_loss": 0.2926402688026428 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.2695853114128113, "learning_rate": 1.1675856823905224e-06, "loss": 0.2339, "step": 25042, "teacher_loss": 0.22995613515377045 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.3277488648891449, "learning_rate": 1.1667073070728462e-06, "loss": 0.211, "step": 25043, "teacher_loss": 0.19802260398864746 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.46200937032699585, "learning_rate": 1.1658292489110967e-06, "loss": 0.1798, "step": 25044, "teacher_loss": 0.14841556549072266 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.19668267667293549, "learning_rate": 1.1649515079254058e-06, "loss": 0.1735, "step": 25045, "teacher_loss": 0.17093007266521454 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.3090388774871826, "learning_rate": 1.1640740841358978e-06, "loss": 0.1904, "step": 25046, "teacher_loss": 0.17718321084976196 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.5538351535797119, "learning_rate": 1.1631969775626877e-06, "loss": 0.2283, "step": 25047, "teacher_loss": 0.192184716463089 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.3783976435661316, "learning_rate": 1.1623201882258866e-06, "loss": 0.1613, "step": 25048, "teacher_loss": 0.13714444637298584 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.2524876594543457, "learning_rate": 1.1614437161455965e-06, "loss": 0.2038, "step": 25049, "teacher_loss": 0.19834274053573608 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.7641234993934631, "learning_rate": 1.1605675613419115e-06, "loss": 0.329, "step": 25050, "teacher_loss": 0.2806835472583771 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.32838648557662964, "learning_rate": 1.1596917238349202e-06, "loss": 0.1898, "step": 25051, "teacher_loss": 0.17444922029972076 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.37490326166152954, "learning_rate": 1.1588162036447003e-06, "loss": 0.189, "step": 25052, "teacher_loss": 0.16836583614349365 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.12214823812246323, "learning_rate": 1.1579410007913304e-06, "loss": 0.126, "step": 25053, "teacher_loss": 0.12637659907341003 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.33858349919319153, "learning_rate": 1.1570661152948697e-06, "loss": 0.2241, "step": 25054, "teacher_loss": 0.21137921512126923 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.4753701090812683, "learning_rate": 1.1561915471753786e-06, "loss": 0.2254, "step": 25055, "teacher_loss": 0.1975809931755066 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.18832360208034515, "learning_rate": 1.155317296452913e-06, "loss": 0.1843, "step": 25056, "teacher_loss": 0.18381932377815247 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.3490745723247528, "learning_rate": 1.1544433631475116e-06, "loss": 0.2244, "step": 25057, "teacher_loss": 0.21055419743061066 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.4071692228317261, "learning_rate": 1.1535697472792118e-06, "loss": 0.1658, "step": 25058, "teacher_loss": 0.13894259929656982 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.3694760203361511, "learning_rate": 1.1526964488680457e-06, "loss": 0.2253, "step": 25059, "teacher_loss": 0.20922821760177612 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.4758175015449524, "learning_rate": 1.151823467934031e-06, "loss": 0.1901, "step": 25060, "teacher_loss": 0.15833674371242523 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.5689733028411865, "learning_rate": 1.1509508044971867e-06, "loss": 0.2055, "step": 25061, "teacher_loss": 0.16513517498970032 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.17930416762828827, "learning_rate": 1.1500784585775215e-06, "loss": 0.1716, "step": 25062, "teacher_loss": 0.1707848459482193 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.2228250503540039, "learning_rate": 1.1492064301950295e-06, "loss": 0.2576, "step": 25063, "teacher_loss": 0.2614786624908447 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.28050899505615234, "learning_rate": 1.1483347193697063e-06, "loss": 0.2504, "step": 25064, "teacher_loss": 0.24701324105262756 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.21907195448875427, "learning_rate": 1.1474633261215428e-06, "loss": 0.1548, "step": 25065, "teacher_loss": 0.14768186211585999 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.5917607545852661, "learning_rate": 1.1465922504705128e-06, "loss": 0.2096, "step": 25066, "teacher_loss": 0.16710935533046722 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.23097911477088928, "learning_rate": 1.145721492436582e-06, "loss": 0.1686, "step": 25067, "teacher_loss": 0.16168928146362305 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.5737055540084839, "learning_rate": 1.1448510520397264e-06, "loss": 0.3113, "step": 25068, "teacher_loss": 0.28211894631385803 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.23001497983932495, "learning_rate": 1.143980929299895e-06, "loss": 0.1667, "step": 25069, "teacher_loss": 0.1596429944038391 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.5855672359466553, "learning_rate": 1.1431111242370363e-06, "loss": 0.2144, "step": 25070, "teacher_loss": 0.17311370372772217 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.44978660345077515, "learning_rate": 1.1422416368710947e-06, "loss": 0.1688, "step": 25071, "teacher_loss": 0.13755042850971222 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.7153927683830261, "learning_rate": 1.1413724672220077e-06, "loss": 0.264, "step": 25072, "teacher_loss": 0.2138305902481079 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.5571683645248413, "learning_rate": 1.140503615309696e-06, "loss": 0.2274, "step": 25073, "teacher_loss": 0.19073250889778137 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.41072770953178406, "learning_rate": 1.1396350811540852e-06, "loss": 0.1971, "step": 25074, "teacher_loss": 0.17338716983795166 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.4944639205932617, "learning_rate": 1.1387668647750875e-06, "loss": 0.2496, "step": 25075, "teacher_loss": 0.22236031293869019 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.32629427313804626, "learning_rate": 1.1378989661926077e-06, "loss": 0.1456, "step": 25076, "teacher_loss": 0.1255749762058258 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.6234003305435181, "learning_rate": 1.137031385426539e-06, "loss": 0.2217, "step": 25077, "teacher_loss": 0.17709439992904663 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.7568278312683105, "learning_rate": 1.1361641224967811e-06, "loss": 0.2334, "step": 25078, "teacher_loss": 0.17519566416740417 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.2586619257926941, "learning_rate": 1.1352971774232162e-06, "loss": 0.1707, "step": 25079, "teacher_loss": 0.16094785928726196 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.42158079147338867, "learning_rate": 1.1344305502257135e-06, "loss": 0.2289, "step": 25080, "teacher_loss": 0.20748449862003326 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.47270193696022034, "learning_rate": 1.1335642409241486e-06, "loss": 0.2528, "step": 25081, "teacher_loss": 0.2283683717250824 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.32642054557800293, "learning_rate": 1.1326982495383842e-06, "loss": 0.1876, "step": 25082, "teacher_loss": 0.1722283959388733 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.31233739852905273, "learning_rate": 1.131832576088271e-06, "loss": 0.2188, "step": 25083, "teacher_loss": 0.2083582580089569 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.267640084028244, "learning_rate": 1.1309672205936561e-06, "loss": 0.1698, "step": 25084, "teacher_loss": 0.1588984578847885 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.7057147026062012, "learning_rate": 1.1301021830743858e-06, "loss": 0.2451, "step": 25085, "teacher_loss": 0.19390638172626495 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.4710800051689148, "learning_rate": 1.1292374635502838e-06, "loss": 0.1613, "step": 25086, "teacher_loss": 0.12689335644245148 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.347565621137619, "learning_rate": 1.1283730620411814e-06, "loss": 0.1565, "step": 25087, "teacher_loss": 0.13525059819221497 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.5915053486824036, "learning_rate": 1.127508978566899e-06, "loss": 0.2188, "step": 25088, "teacher_loss": 0.17741575837135315 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.4275597929954529, "learning_rate": 1.1266452131472393e-06, "loss": 0.1728, "step": 25089, "teacher_loss": 0.14450258016586304 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.7749910354614258, "learning_rate": 1.1257817658020114e-06, "loss": 0.2719, "step": 25090, "teacher_loss": 0.21605314314365387 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.5343226790428162, "learning_rate": 1.1249186365510128e-06, "loss": 0.2768, "step": 25091, "teacher_loss": 0.2481517195701599 }, { "compression_loss": 0.0, "epoch": 4.53, "label_loss": 0.2786710560321808, "learning_rate": 1.1240558254140277e-06, "loss": 0.1726, "step": 25092, "teacher_loss": 0.16082261502742767 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.3009941875934601, "learning_rate": 1.1231933324108417e-06, "loss": 0.1556, "step": 25093, "teacher_loss": 0.13947902619838715 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.16383624076843262, "learning_rate": 1.1223311575612261e-06, "loss": 0.1959, "step": 25094, "teacher_loss": 0.19949132204055786 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 1.050405502319336, "learning_rate": 1.1214693008849531e-06, "loss": 0.3097, "step": 25095, "teacher_loss": 0.22742529213428497 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.44941550493240356, "learning_rate": 1.1206077624017752e-06, "loss": 0.2111, "step": 25096, "teacher_loss": 0.18467360734939575 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.27394360303878784, "learning_rate": 1.11974654213145e-06, "loss": 0.2215, "step": 25097, "teacher_loss": 0.21564587950706482 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.3398643732070923, "learning_rate": 1.1188856400937219e-06, "loss": 0.1948, "step": 25098, "teacher_loss": 0.17869465053081512 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 1.252565860748291, "learning_rate": 1.1180250563083282e-06, "loss": 0.2831, "step": 25099, "teacher_loss": 0.1753694713115692 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.18907268345355988, "learning_rate": 1.1171647907949983e-06, "loss": 0.1785, "step": 25100, "teacher_loss": 0.17733994126319885 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.36895787715911865, "learning_rate": 1.1163048435734596e-06, "loss": 0.252, "step": 25101, "teacher_loss": 0.23903557658195496 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.4971223473548889, "learning_rate": 1.1154452146634232e-06, "loss": 0.2092, "step": 25102, "teacher_loss": 0.177154541015625 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.659134030342102, "learning_rate": 1.1145859040846012e-06, "loss": 0.2489, "step": 25103, "teacher_loss": 0.2033492624759674 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.6527400016784668, "learning_rate": 1.1137269118566935e-06, "loss": 0.2317, "step": 25104, "teacher_loss": 0.18491825461387634 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.48554086685180664, "learning_rate": 1.112868237999392e-06, "loss": 0.2122, "step": 25105, "teacher_loss": 0.18184050917625427 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.5579530000686646, "learning_rate": 1.1120098825323898e-06, "loss": 0.2251, "step": 25106, "teacher_loss": 0.18811756372451782 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.7567838430404663, "learning_rate": 1.111151845475361e-06, "loss": 0.2668, "step": 25107, "teacher_loss": 0.2123786360025406 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.36765024065971375, "learning_rate": 1.1102941268479783e-06, "loss": 0.1497, "step": 25108, "teacher_loss": 0.1255100965499878 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.999617874622345, "learning_rate": 1.1094367266699107e-06, "loss": 0.3132, "step": 25109, "teacher_loss": 0.23692744970321655 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.642194390296936, "learning_rate": 1.1085796449608093e-06, "loss": 0.2476, "step": 25110, "teacher_loss": 0.20377424359321594 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.5008236765861511, "learning_rate": 1.1077228817403302e-06, "loss": 0.2679, "step": 25111, "teacher_loss": 0.2420269250869751 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.3790559470653534, "learning_rate": 1.106866437028114e-06, "loss": 0.2438, "step": 25112, "teacher_loss": 0.22879081964492798 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.2707124650478363, "learning_rate": 1.1060103108437969e-06, "loss": 0.1901, "step": 25113, "teacher_loss": 0.18117444217205048 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.2220154106616974, "learning_rate": 1.1051545032070081e-06, "loss": 0.1842, "step": 25114, "teacher_loss": 0.1799495667219162 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.4881695508956909, "learning_rate": 1.1042990141373654e-06, "loss": 0.1943, "step": 25115, "teacher_loss": 0.1616876721382141 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.5080932378768921, "learning_rate": 1.1034438436544863e-06, "loss": 0.1968, "step": 25116, "teacher_loss": 0.16225430369377136 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.4047453999519348, "learning_rate": 1.1025889917779735e-06, "loss": 0.2475, "step": 25117, "teacher_loss": 0.2300504446029663 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.38214755058288574, "learning_rate": 1.1017344585274297e-06, "loss": 0.1934, "step": 25118, "teacher_loss": 0.17240500450134277 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.16782429814338684, "learning_rate": 1.1008802439224474e-06, "loss": 0.1559, "step": 25119, "teacher_loss": 0.1545776128768921 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.3021412789821625, "learning_rate": 1.1000263479826078e-06, "loss": 0.1491, "step": 25120, "teacher_loss": 0.13211965560913086 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.4685192108154297, "learning_rate": 1.0991727707274885e-06, "loss": 0.209, "step": 25121, "teacher_loss": 0.18015766143798828 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.6349761486053467, "learning_rate": 1.0983195121766637e-06, "loss": 0.1997, "step": 25122, "teacher_loss": 0.15133631229400635 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.5472557544708252, "learning_rate": 1.0974665723496914e-06, "loss": 0.2031, "step": 25123, "teacher_loss": 0.16482828557491302 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.1830618679523468, "learning_rate": 1.0966139512661273e-06, "loss": 0.1837, "step": 25124, "teacher_loss": 0.18378564715385437 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.42108988761901855, "learning_rate": 1.0957616489455242e-06, "loss": 0.2916, "step": 25125, "teacher_loss": 0.2772218585014343 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.14607976377010345, "learning_rate": 1.0949096654074198e-06, "loss": 0.1428, "step": 25126, "teacher_loss": 0.14244094491004944 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 1.0830754041671753, "learning_rate": 1.0940580006713403e-06, "loss": 0.3867, "step": 25127, "teacher_loss": 0.30932193994522095 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.4186926484107971, "learning_rate": 1.0932066547568248e-06, "loss": 0.2469, "step": 25128, "teacher_loss": 0.2277604341506958 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.3768858015537262, "learning_rate": 1.0923556276833862e-06, "loss": 0.1853, "step": 25129, "teacher_loss": 0.16398711502552032 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.4666866958141327, "learning_rate": 1.0915049194705306e-06, "loss": 0.2756, "step": 25130, "teacher_loss": 0.25437361001968384 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.4775325059890747, "learning_rate": 1.0906545301377725e-06, "loss": 0.2133, "step": 25131, "teacher_loss": 0.18394434452056885 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.24803099036216736, "learning_rate": 1.0898044597046043e-06, "loss": 0.1852, "step": 25132, "teacher_loss": 0.17826637625694275 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.6650961637496948, "learning_rate": 1.0889547081905105e-06, "loss": 0.2346, "step": 25133, "teacher_loss": 0.18672078847885132 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.11538488417863846, "learning_rate": 1.088105275614979e-06, "loss": 0.1212, "step": 25134, "teacher_loss": 0.12179238349199295 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.30577942728996277, "learning_rate": 1.0872561619974858e-06, "loss": 0.2014, "step": 25135, "teacher_loss": 0.18981948494911194 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.5678088665008545, "learning_rate": 1.086407367357497e-06, "loss": 0.3802, "step": 25136, "teacher_loss": 0.3593955338001251 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.5712772607803345, "learning_rate": 1.085558891714467e-06, "loss": 0.2317, "step": 25137, "teacher_loss": 0.19399836659431458 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.6207983493804932, "learning_rate": 1.084710735087857e-06, "loss": 0.1846, "step": 25138, "teacher_loss": 0.13608869910240173 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.3965565860271454, "learning_rate": 1.0838628974971115e-06, "loss": 0.243, "step": 25139, "teacher_loss": 0.22591203451156616 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.38403910398483276, "learning_rate": 1.0830153789616614e-06, "loss": 0.2249, "step": 25140, "teacher_loss": 0.20726221799850464 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.5643201470375061, "learning_rate": 1.0821681795009498e-06, "loss": 0.2601, "step": 25141, "teacher_loss": 0.22627192735671997 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.6626375913619995, "learning_rate": 1.0813212991343924e-06, "loss": 0.2416, "step": 25142, "teacher_loss": 0.19484224915504456 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.9159227013587952, "learning_rate": 1.0804747378814039e-06, "loss": 0.442, "step": 25143, "teacher_loss": 0.38931912183761597 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.09402231127023697, "learning_rate": 1.079628495761399e-06, "loss": 0.1935, "step": 25144, "teacher_loss": 0.2045225203037262 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.6535812616348267, "learning_rate": 1.0787825727937783e-06, "loss": 0.2397, "step": 25145, "teacher_loss": 0.19369667768478394 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.6446030139923096, "learning_rate": 1.0779369689979335e-06, "loss": 0.2019, "step": 25146, "teacher_loss": 0.1527140736579895 }, { "compression_loss": 0.0, "epoch": 4.54, "label_loss": 0.6177886128425598, "learning_rate": 1.0770916843932538e-06, "loss": 0.3612, "step": 25147, "teacher_loss": 0.3326902389526367 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.24591708183288574, "learning_rate": 1.0762467189991205e-06, "loss": 0.1347, "step": 25148, "teacher_loss": 0.12238724529743195 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.39661920070648193, "learning_rate": 1.0754020728349011e-06, "loss": 0.1979, "step": 25149, "teacher_loss": 0.17583858966827393 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.5616382956504822, "learning_rate": 1.0745577459199656e-06, "loss": 0.248, "step": 25150, "teacher_loss": 0.21316689252853394 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.4333067238330841, "learning_rate": 1.0737137382736717e-06, "loss": 0.2972, "step": 25151, "teacher_loss": 0.2820393741130829 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.2262304425239563, "learning_rate": 1.0728700499153671e-06, "loss": 0.1616, "step": 25152, "teacher_loss": 0.15446409583091736 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.4539685845375061, "learning_rate": 1.072026680864398e-06, "loss": 0.2018, "step": 25153, "teacher_loss": 0.17375105619430542 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.4044560492038727, "learning_rate": 1.0711836311400974e-06, "loss": 0.1953, "step": 25154, "teacher_loss": 0.1720491349697113 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.3996172547340393, "learning_rate": 1.0703409007617965e-06, "loss": 0.3368, "step": 25155, "teacher_loss": 0.32979950308799744 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.2790040373802185, "learning_rate": 1.0694984897488146e-06, "loss": 0.2074, "step": 25156, "teacher_loss": 0.1994135081768036 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.45737403631210327, "learning_rate": 1.068656398120465e-06, "loss": 0.254, "step": 25157, "teacher_loss": 0.23138466477394104 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.9622973203659058, "learning_rate": 1.0678146258960602e-06, "loss": 0.2801, "step": 25158, "teacher_loss": 0.204318106174469 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.4137294590473175, "learning_rate": 1.06697317309489e-06, "loss": 0.2289, "step": 25159, "teacher_loss": 0.20839133858680725 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.558612585067749, "learning_rate": 1.0661320397362539e-06, "loss": 0.2709, "step": 25160, "teacher_loss": 0.23889321088790894 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.4955894947052002, "learning_rate": 1.0652912258394366e-06, "loss": 0.2128, "step": 25161, "teacher_loss": 0.18142831325531006 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.671654462814331, "learning_rate": 1.064450731423709e-06, "loss": 0.2141, "step": 25162, "teacher_loss": 0.16328689455986023 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.21502867341041565, "learning_rate": 1.063610556508346e-06, "loss": 0.1517, "step": 25163, "teacher_loss": 0.14466698467731476 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.3167605698108673, "learning_rate": 1.062770701112612e-06, "loss": 0.1566, "step": 25164, "teacher_loss": 0.1387665867805481 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.8184818029403687, "learning_rate": 1.0619311652557584e-06, "loss": 0.2098, "step": 25165, "teacher_loss": 0.14219698309898376 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.3001149296760559, "learning_rate": 1.0610919489570364e-06, "loss": 0.1871, "step": 25166, "teacher_loss": 0.1745389848947525 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.27030065655708313, "learning_rate": 1.0602530522356825e-06, "loss": 0.2793, "step": 25167, "teacher_loss": 0.280300498008728 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.2223224937915802, "learning_rate": 1.0594144751109324e-06, "loss": 0.1328, "step": 25168, "teacher_loss": 0.12287843227386475 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.7528339624404907, "learning_rate": 1.0585762176020148e-06, "loss": 0.2405, "step": 25169, "teacher_loss": 0.1835213005542755 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.30148136615753174, "learning_rate": 1.0577382797281437e-06, "loss": 0.1946, "step": 25170, "teacher_loss": 0.18274180591106415 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 1.0508067607879639, "learning_rate": 1.0569006615085325e-06, "loss": 0.3364, "step": 25171, "teacher_loss": 0.25701552629470825 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.44432538747787476, "learning_rate": 1.0560633629623872e-06, "loss": 0.2628, "step": 25172, "teacher_loss": 0.24266645312309265 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.2835889458656311, "learning_rate": 1.055226384108901e-06, "loss": 0.2546, "step": 25173, "teacher_loss": 0.25141146779060364 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.4015403389930725, "learning_rate": 1.0543897249672667e-06, "loss": 0.2117, "step": 25174, "teacher_loss": 0.1905989944934845 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.7630246877670288, "learning_rate": 1.053553385556666e-06, "loss": 0.2705, "step": 25175, "teacher_loss": 0.215753972530365 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.5852357149124146, "learning_rate": 1.052717365896273e-06, "loss": 0.226, "step": 25176, "teacher_loss": 0.18610183894634247 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.19653409719467163, "learning_rate": 1.051881666005251e-06, "loss": 0.185, "step": 25177, "teacher_loss": 0.18367037177085876 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.883455753326416, "learning_rate": 1.0510462859027648e-06, "loss": 0.2655, "step": 25178, "teacher_loss": 0.19685232639312744 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.2505333423614502, "learning_rate": 1.0502112256079688e-06, "loss": 0.1574, "step": 25179, "teacher_loss": 0.1470758616924286 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.5195713043212891, "learning_rate": 1.049376485140003e-06, "loss": 0.1961, "step": 25180, "teacher_loss": 0.1601724624633789 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.3929944932460785, "learning_rate": 1.0485420645180082e-06, "loss": 0.1848, "step": 25181, "teacher_loss": 0.16162265837192535 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.5909929275512695, "learning_rate": 1.047707963761118e-06, "loss": 0.3129, "step": 25182, "teacher_loss": 0.2819991707801819 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.0995166227221489, "learning_rate": 1.0468741828884503e-06, "loss": 0.1044, "step": 25183, "teacher_loss": 0.10490189492702484 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.44798195362091064, "learning_rate": 1.0460407219191248e-06, "loss": 0.1991, "step": 25184, "teacher_loss": 0.17145895957946777 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.279528945684433, "learning_rate": 1.0452075808722512e-06, "loss": 0.256, "step": 25185, "teacher_loss": 0.25340765714645386 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.21935133635997772, "learning_rate": 1.0443747597669256e-06, "loss": 0.1607, "step": 25186, "teacher_loss": 0.15415659546852112 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.23632782697677612, "learning_rate": 1.0435422586222465e-06, "loss": 0.164, "step": 25187, "teacher_loss": 0.15593472123146057 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.19084838032722473, "learning_rate": 1.0427100774573034e-06, "loss": 0.1279, "step": 25188, "teacher_loss": 0.12093309313058853 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.5360528826713562, "learning_rate": 1.041878216291171e-06, "loss": 0.2632, "step": 25189, "teacher_loss": 0.23288151621818542 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.5119979381561279, "learning_rate": 1.0410466751429176e-06, "loss": 0.2711, "step": 25190, "teacher_loss": 0.2443457841873169 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.27491700649261475, "learning_rate": 1.0402154540316178e-06, "loss": 0.3808, "step": 25191, "teacher_loss": 0.3925568759441376 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.23057198524475098, "learning_rate": 1.039384552976323e-06, "loss": 0.1879, "step": 25192, "teacher_loss": 0.1831241101026535 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.34249866008758545, "learning_rate": 1.038553971996083e-06, "loss": 0.1674, "step": 25193, "teacher_loss": 0.14799395203590393 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.3817611336708069, "learning_rate": 1.0377237111099425e-06, "loss": 0.2242, "step": 25194, "teacher_loss": 0.20665612816810608 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.8364651203155518, "learning_rate": 1.036893770336938e-06, "loss": 0.3327, "step": 25195, "teacher_loss": 0.2767726182937622 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.5810595154762268, "learning_rate": 1.0360641496960927e-06, "loss": 0.4188, "step": 25196, "teacher_loss": 0.4007996916770935 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.3370826840400696, "learning_rate": 1.0352348492064312e-06, "loss": 0.2211, "step": 25197, "teacher_loss": 0.2081608772277832 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.20390617847442627, "learning_rate": 1.0344058688869683e-06, "loss": 0.1645, "step": 25198, "teacher_loss": 0.16011005640029907 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.2785056531429291, "learning_rate": 1.0335772087567075e-06, "loss": 0.1424, "step": 25199, "teacher_loss": 0.1272660493850708 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.2310413122177124, "learning_rate": 1.032748868834643e-06, "loss": 0.2409, "step": 25200, "teacher_loss": 0.24204185605049133 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.2546086609363556, "learning_rate": 1.0319208491397752e-06, "loss": 0.1736, "step": 25201, "teacher_loss": 0.16459433734416962 }, { "compression_loss": 0.0, "epoch": 4.55, "label_loss": 0.4630472660064697, "learning_rate": 1.0310931496910853e-06, "loss": 0.2595, "step": 25202, "teacher_loss": 0.23683932423591614 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.9697644710540771, "learning_rate": 1.0302657705075447e-06, "loss": 0.2937, "step": 25203, "teacher_loss": 0.21855421364307404 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.17329362034797668, "learning_rate": 1.0294387116081283e-06, "loss": 0.1571, "step": 25204, "teacher_loss": 0.15532107651233673 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 1.0363295078277588, "learning_rate": 1.0286119730117976e-06, "loss": 0.2622, "step": 25205, "teacher_loss": 0.1761317253112793 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.7478715777397156, "learning_rate": 1.0277855547375025e-06, "loss": 0.2511, "step": 25206, "teacher_loss": 0.19586652517318726 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.4772930443286896, "learning_rate": 1.0269594568041961e-06, "loss": 0.1979, "step": 25207, "teacher_loss": 0.16687631607055664 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.662340521812439, "learning_rate": 1.0261336792308168e-06, "loss": 0.2659, "step": 25208, "teacher_loss": 0.2218572199344635 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.6691567897796631, "learning_rate": 1.0253082220362942e-06, "loss": 0.2393, "step": 25209, "teacher_loss": 0.1915469765663147 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.340048611164093, "learning_rate": 1.0244830852395564e-06, "loss": 0.2408, "step": 25210, "teacher_loss": 0.22972318530082703 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.6415939331054688, "learning_rate": 1.0236582688595219e-06, "loss": 0.2259, "step": 25211, "teacher_loss": 0.179696187376976 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.27981430292129517, "learning_rate": 1.022833772915099e-06, "loss": 0.2036, "step": 25212, "teacher_loss": 0.19510118663311005 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.36495888233184814, "learning_rate": 1.022009597425192e-06, "loss": 0.1686, "step": 25213, "teacher_loss": 0.14675463736057281 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.45230725407600403, "learning_rate": 1.0211857424086979e-06, "loss": 0.2597, "step": 25214, "teacher_loss": 0.23835399746894836 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.5013591051101685, "learning_rate": 1.0203622078845032e-06, "loss": 0.2377, "step": 25215, "teacher_loss": 0.2083880603313446 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.31093183159828186, "learning_rate": 1.019538993871491e-06, "loss": 0.1468, "step": 25216, "teacher_loss": 0.12855631113052368 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.5743951797485352, "learning_rate": 1.0187161003885315e-06, "loss": 0.2661, "step": 25217, "teacher_loss": 0.2318832278251648 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.40434110164642334, "learning_rate": 1.0178935274544975e-06, "loss": 0.1918, "step": 25218, "teacher_loss": 0.1682206392288208 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.46418869495391846, "learning_rate": 1.017071275088241e-06, "loss": 0.266, "step": 25219, "teacher_loss": 0.24398571252822876 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.33546945452690125, "learning_rate": 1.0162493433086167e-06, "loss": 0.205, "step": 25220, "teacher_loss": 0.19055090844631195 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.6674797534942627, "learning_rate": 1.015427732134473e-06, "loss": 0.2149, "step": 25221, "teacher_loss": 0.16464710235595703 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.2732957899570465, "learning_rate": 1.0146064415846412e-06, "loss": 0.2061, "step": 25222, "teacher_loss": 0.19866423308849335 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.35575833916664124, "learning_rate": 1.0137854716779532e-06, "loss": 0.2383, "step": 25223, "teacher_loss": 0.22522330284118652 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 1.0614213943481445, "learning_rate": 1.0129648224332321e-06, "loss": 0.3783, "step": 25224, "teacher_loss": 0.30237245559692383 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.5566887855529785, "learning_rate": 1.0121444938692914e-06, "loss": 0.2445, "step": 25225, "teacher_loss": 0.2097589522600174 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.3513622283935547, "learning_rate": 1.0113244860049425e-06, "loss": 0.1721, "step": 25226, "teacher_loss": 0.15221622586250305 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.5891441106796265, "learning_rate": 1.0105047988589805e-06, "loss": 0.3517, "step": 25227, "teacher_loss": 0.32531094551086426 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.27161553502082825, "learning_rate": 1.0096854324502003e-06, "loss": 0.1852, "step": 25228, "teacher_loss": 0.17560729384422302 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.6531555652618408, "learning_rate": 1.0088663867973903e-06, "loss": 0.2342, "step": 25229, "teacher_loss": 0.1876084953546524 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.3394317626953125, "learning_rate": 1.0080476619193235e-06, "loss": 0.1726, "step": 25230, "teacher_loss": 0.15411494672298431 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.26204758882522583, "learning_rate": 1.0072292578347753e-06, "loss": 0.1699, "step": 25231, "teacher_loss": 0.1596796065568924 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.939587414264679, "learning_rate": 1.0064111745625088e-06, "loss": 0.3059, "step": 25232, "teacher_loss": 0.23550009727478027 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.326665461063385, "learning_rate": 1.0055934121212773e-06, "loss": 0.1912, "step": 25233, "teacher_loss": 0.17610543966293335 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.5514337420463562, "learning_rate": 1.004775970529831e-06, "loss": 0.2334, "step": 25234, "teacher_loss": 0.1980901062488556 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.4697257876396179, "learning_rate": 1.0039588498069147e-06, "loss": 0.1951, "step": 25235, "teacher_loss": 0.16458944976329803 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.48229724168777466, "learning_rate": 1.0031420499712568e-06, "loss": 0.2314, "step": 25236, "teacher_loss": 0.20357276499271393 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.1518930196762085, "learning_rate": 1.0023255710415874e-06, "loss": 0.137, "step": 25237, "teacher_loss": 0.1352979987859726 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.25666019320487976, "learning_rate": 1.001509413036628e-06, "loss": 0.2107, "step": 25238, "teacher_loss": 0.2056483030319214 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.7127546072006226, "learning_rate": 1.0006935759750873e-06, "loss": 0.1945, "step": 25239, "teacher_loss": 0.13690584897994995 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.601432740688324, "learning_rate": 9.998780598756685e-07, "loss": 0.2148, "step": 25240, "teacher_loss": 0.17184464633464813 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.4649273753166199, "learning_rate": 9.990628647570732e-07, "loss": 0.2223, "step": 25241, "teacher_loss": 0.1953580379486084 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 1.0118002891540527, "learning_rate": 9.9824799063799e-07, "loss": 0.3141, "step": 25242, "teacher_loss": 0.23654355108737946 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.4516458511352539, "learning_rate": 9.974334375370985e-07, "loss": 0.1994, "step": 25243, "teacher_loss": 0.17137081921100616 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.3629573881626129, "learning_rate": 9.96619205473076e-07, "loss": 0.2502, "step": 25244, "teacher_loss": 0.23767578601837158 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.6243861317634583, "learning_rate": 9.95805294464594e-07, "loss": 0.2635, "step": 25245, "teacher_loss": 0.223362997174263 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.2881556749343872, "learning_rate": 9.949917045303076e-07, "loss": 0.1382, "step": 25246, "teacher_loss": 0.12151715159416199 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.5381479263305664, "learning_rate": 9.941784356888717e-07, "loss": 0.2254, "step": 25247, "teacher_loss": 0.19066426157951355 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.41888338327407837, "learning_rate": 9.933654879589365e-07, "loss": 0.2088, "step": 25248, "teacher_loss": 0.18550518155097961 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.8377820253372192, "learning_rate": 9.925528613591356e-07, "loss": 0.2733, "step": 25249, "teacher_loss": 0.21060319244861603 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.2254316806793213, "learning_rate": 9.917405559080956e-07, "loss": 0.1866, "step": 25250, "teacher_loss": 0.18230530619621277 }, { "epoch": 4.56, "eval_exact_match": 80.58656575212866, "eval_f1": 87.72573280071668, "step": 25250 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.4477003216743469, "learning_rate": 9.909285716244514e-07, "loss": 0.1773, "step": 25251, "teacher_loss": 0.1472342163324356 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.3066934645175934, "learning_rate": 9.90116908526812e-07, "loss": 0.1576, "step": 25252, "teacher_loss": 0.14101171493530273 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.7134931087493896, "learning_rate": 9.89305566633787e-07, "loss": 0.2304, "step": 25253, "teacher_loss": 0.17670965194702148 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.25202542543411255, "learning_rate": 9.8849454596398e-07, "loss": 0.1588, "step": 25254, "teacher_loss": 0.14839942753314972 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.5501688122749329, "learning_rate": 9.87683846535986e-07, "loss": 0.2076, "step": 25255, "teacher_loss": 0.16949230432510376 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.5021957159042358, "learning_rate": 9.868734683683873e-07, "loss": 0.2761, "step": 25256, "teacher_loss": 0.25101611018180847 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.46302330493927, "learning_rate": 9.860634114797668e-07, "loss": 0.2776, "step": 25257, "teacher_loss": 0.25701189041137695 }, { "compression_loss": 0.0, "epoch": 4.56, "label_loss": 0.8627578616142273, "learning_rate": 9.852536758886999e-07, "loss": 0.3173, "step": 25258, "teacher_loss": 0.25668299198150635 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.20451778173446655, "learning_rate": 9.844442616137467e-07, "loss": 0.2113, "step": 25259, "teacher_loss": 0.21207863092422485 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.2381524294614792, "learning_rate": 9.836351686734607e-07, "loss": 0.2133, "step": 25260, "teacher_loss": 0.21058307588100433 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.2902122735977173, "learning_rate": 9.82826397086402e-07, "loss": 0.1839, "step": 25261, "teacher_loss": 0.17207857966423035 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.16998478770256042, "learning_rate": 9.820179468711077e-07, "loss": 0.134, "step": 25262, "teacher_loss": 0.13000991940498352 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.6748837232589722, "learning_rate": 9.81209818046111e-07, "loss": 0.263, "step": 25263, "teacher_loss": 0.2172715663909912 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.3503924608230591, "learning_rate": 9.804020106299454e-07, "loss": 0.1903, "step": 25264, "teacher_loss": 0.17248988151550293 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.7435210943222046, "learning_rate": 9.795945246411298e-07, "loss": 0.3259, "step": 25265, "teacher_loss": 0.279512882232666 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.8554823994636536, "learning_rate": 9.78787360098174e-07, "loss": 0.2707, "step": 25266, "teacher_loss": 0.20569708943367004 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.4237726330757141, "learning_rate": 9.779805170195865e-07, "loss": 0.2399, "step": 25267, "teacher_loss": 0.21951819956302643 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.5574874877929688, "learning_rate": 9.771739954238662e-07, "loss": 0.241, "step": 25268, "teacher_loss": 0.2058856338262558 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.5127108693122864, "learning_rate": 9.76367795329503e-07, "loss": 0.1674, "step": 25269, "teacher_loss": 0.12907275557518005 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.8656055927276611, "learning_rate": 9.755619167549805e-07, "loss": 0.319, "step": 25270, "teacher_loss": 0.2583044171333313 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.1763477325439453, "learning_rate": 9.747563597187792e-07, "loss": 0.1515, "step": 25271, "teacher_loss": 0.14871622622013092 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.4178406000137329, "learning_rate": 9.739511242393606e-07, "loss": 0.201, "step": 25272, "teacher_loss": 0.17692402005195618 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.3171330690383911, "learning_rate": 9.731462103351919e-07, "loss": 0.2162, "step": 25273, "teacher_loss": 0.2050376534461975 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.449001669883728, "learning_rate": 9.72341618024728e-07, "loss": 0.2119, "step": 25274, "teacher_loss": 0.1855117380619049 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.3039000928401947, "learning_rate": 9.715373473264111e-07, "loss": 0.1805, "step": 25275, "teacher_loss": 0.16683441400527954 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.2751407027244568, "learning_rate": 9.707333982586863e-07, "loss": 0.2364, "step": 25276, "teacher_loss": 0.23205478489398956 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.6350975036621094, "learning_rate": 9.69929770839979e-07, "loss": 0.2434, "step": 25277, "teacher_loss": 0.19985038042068481 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.5562041997909546, "learning_rate": 9.691264650887194e-07, "loss": 0.2277, "step": 25278, "teacher_loss": 0.19123660027980804 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.36706995964050293, "learning_rate": 9.683234810233244e-07, "loss": 0.2036, "step": 25279, "teacher_loss": 0.18545326590538025 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.2762756943702698, "learning_rate": 9.675208186622008e-07, "loss": 0.1833, "step": 25280, "teacher_loss": 0.17295041680335999 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.48622292280197144, "learning_rate": 9.667184780237543e-07, "loss": 0.2228, "step": 25281, "teacher_loss": 0.1934972107410431 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.7641993761062622, "learning_rate": 9.65916459126378e-07, "loss": 0.3321, "step": 25282, "teacher_loss": 0.2840563654899597 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.5566275119781494, "learning_rate": 9.651147619884626e-07, "loss": 0.2624, "step": 25283, "teacher_loss": 0.22965875267982483 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.6244545578956604, "learning_rate": 9.643133866283865e-07, "loss": 0.2302, "step": 25284, "teacher_loss": 0.18639524281024933 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.7283577919006348, "learning_rate": 9.635123330645218e-07, "loss": 0.2526, "step": 25285, "teacher_loss": 0.19968871772289276 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.7025970220565796, "learning_rate": 9.62711601315237e-07, "loss": 0.2522, "step": 25286, "teacher_loss": 0.2022044062614441 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.3132360279560089, "learning_rate": 9.61911191398891e-07, "loss": 0.2037, "step": 25287, "teacher_loss": 0.19150398671627045 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.36863166093826294, "learning_rate": 9.611111033338287e-07, "loss": 0.1989, "step": 25288, "teacher_loss": 0.18000632524490356 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.23424896597862244, "learning_rate": 9.603113371384025e-07, "loss": 0.1411, "step": 25289, "teacher_loss": 0.13072270154953003 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.763077974319458, "learning_rate": 9.59511892830941e-07, "loss": 0.1757, "step": 25290, "teacher_loss": 0.11044125258922577 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.6429189443588257, "learning_rate": 9.587127704297777e-07, "loss": 0.2867, "step": 25291, "teacher_loss": 0.24712969362735748 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.5208631753921509, "learning_rate": 9.57913969953233e-07, "loss": 0.2748, "step": 25292, "teacher_loss": 0.24749226868152618 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.7731572389602661, "learning_rate": 9.57115491419619e-07, "loss": 0.2917, "step": 25293, "teacher_loss": 0.2382373809814453 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.48019927740097046, "learning_rate": 9.563173348472443e-07, "loss": 0.2631, "step": 25294, "teacher_loss": 0.23893752694129944 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.6336334347724915, "learning_rate": 9.555195002544093e-07, "loss": 0.1813, "step": 25295, "teacher_loss": 0.13100528717041016 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.49347177147865295, "learning_rate": 9.547219876594043e-07, "loss": 0.2019, "step": 25296, "teacher_loss": 0.1694604456424713 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.7972241640090942, "learning_rate": 9.539247970805115e-07, "loss": 0.2011, "step": 25297, "teacher_loss": 0.13484182953834534 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.30358487367630005, "learning_rate": 9.531279285360145e-07, "loss": 0.2615, "step": 25298, "teacher_loss": 0.2568710744380951 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.49980559945106506, "learning_rate": 9.523313820441804e-07, "loss": 0.183, "step": 25299, "teacher_loss": 0.14778810739517212 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.4036256670951843, "learning_rate": 9.515351576232645e-07, "loss": 0.2776, "step": 25300, "teacher_loss": 0.2635723948478699 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.42805999517440796, "learning_rate": 9.50739255291534e-07, "loss": 0.1989, "step": 25301, "teacher_loss": 0.17340236902236938 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.42422056198120117, "learning_rate": 9.499436750672291e-07, "loss": 0.1837, "step": 25302, "teacher_loss": 0.1570090353488922 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.27697575092315674, "learning_rate": 9.491484169685888e-07, "loss": 0.1608, "step": 25303, "teacher_loss": 0.14786545932292938 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.361478328704834, "learning_rate": 9.4835348101385e-07, "loss": 0.1695, "step": 25304, "teacher_loss": 0.14815561473369598 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.44979333877563477, "learning_rate": 9.475588672212381e-07, "loss": 0.1626, "step": 25305, "teacher_loss": 0.13063758611679077 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.3927963972091675, "learning_rate": 9.467645756089687e-07, "loss": 0.2498, "step": 25306, "teacher_loss": 0.23389843106269836 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.141736701130867, "learning_rate": 9.459706061952539e-07, "loss": 0.1342, "step": 25307, "teacher_loss": 0.13338759541511536 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.19363108277320862, "learning_rate": 9.451769589982989e-07, "loss": 0.1589, "step": 25308, "teacher_loss": 0.1550879180431366 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.4960261583328247, "learning_rate": 9.443836340362943e-07, "loss": 0.1793, "step": 25309, "teacher_loss": 0.14409592747688293 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.6943809986114502, "learning_rate": 9.43590631327434e-07, "loss": 0.2452, "step": 25310, "teacher_loss": 0.19528554379940033 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.3955976963043213, "learning_rate": 9.427979508898982e-07, "loss": 0.189, "step": 25311, "teacher_loss": 0.16609206795692444 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.43434083461761475, "learning_rate": 9.420055927418592e-07, "loss": 0.2621, "step": 25312, "teacher_loss": 0.24300166964530945 }, { "compression_loss": 0.0, "epoch": 4.57, "label_loss": 0.15725907683372498, "learning_rate": 9.412135569014807e-07, "loss": 0.1492, "step": 25313, "teacher_loss": 0.14826278388500214 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.6298251152038574, "learning_rate": 9.404218433869283e-07, "loss": 0.1738, "step": 25314, "teacher_loss": 0.12315648049116135 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.16814552247524261, "learning_rate": 9.396304522163506e-07, "loss": 0.2239, "step": 25315, "teacher_loss": 0.23012548685073853 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.24124358594417572, "learning_rate": 9.388393834078901e-07, "loss": 0.197, "step": 25316, "teacher_loss": 0.1921166479587555 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.35338687896728516, "learning_rate": 9.380486369796837e-07, "loss": 0.2266, "step": 25317, "teacher_loss": 0.21254099905490875 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5619615316390991, "learning_rate": 9.372582129498652e-07, "loss": 0.2827, "step": 25318, "teacher_loss": 0.25167083740234375 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.31642550230026245, "learning_rate": 9.364681113365519e-07, "loss": 0.2147, "step": 25319, "teacher_loss": 0.2033807635307312 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.4680515229701996, "learning_rate": 9.356783321578594e-07, "loss": 0.1904, "step": 25320, "teacher_loss": 0.15952345728874207 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.3304011821746826, "learning_rate": 9.348888754318979e-07, "loss": 0.1768, "step": 25321, "teacher_loss": 0.15976056456565857 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.4386477470397949, "learning_rate": 9.340997411767649e-07, "loss": 0.2116, "step": 25322, "teacher_loss": 0.1863684207201004 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5711950063705444, "learning_rate": 9.333109294105491e-07, "loss": 0.2171, "step": 25323, "teacher_loss": 0.17780913412570953 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5650689601898193, "learning_rate": 9.325224401513427e-07, "loss": 0.2189, "step": 25324, "teacher_loss": 0.18039898574352264 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.36873894929885864, "learning_rate": 9.317342734172213e-07, "loss": 0.2346, "step": 25325, "teacher_loss": 0.2196635603904724 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.4458620548248291, "learning_rate": 9.309464292262521e-07, "loss": 0.2114, "step": 25326, "teacher_loss": 0.1853460669517517 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.3334934115409851, "learning_rate": 9.301589075965005e-07, "loss": 0.1802, "step": 25327, "teacher_loss": 0.16317932307720184 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5920121669769287, "learning_rate": 9.293717085460223e-07, "loss": 0.2296, "step": 25328, "teacher_loss": 0.18938563764095306 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.292266309261322, "learning_rate": 9.285848320928647e-07, "loss": 0.186, "step": 25329, "teacher_loss": 0.1741389036178589 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.19360080361366272, "learning_rate": 9.277982782550664e-07, "loss": 0.1393, "step": 25330, "teacher_loss": 0.13323110342025757 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.29439884424209595, "learning_rate": 9.270120470506666e-07, "loss": 0.1675, "step": 25331, "teacher_loss": 0.15343180298805237 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.2746466100215912, "learning_rate": 9.262261384976855e-07, "loss": 0.1783, "step": 25332, "teacher_loss": 0.1676362156867981 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.6531040668487549, "learning_rate": 9.254405526141424e-07, "loss": 0.2108, "step": 25333, "teacher_loss": 0.16169428825378418 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.2605685889720917, "learning_rate": 9.246552894180526e-07, "loss": 0.1281, "step": 25334, "teacher_loss": 0.11335714906454086 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.4879167973995209, "learning_rate": 9.238703489274153e-07, "loss": 0.2511, "step": 25335, "teacher_loss": 0.22475658357143402 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.3654537796974182, "learning_rate": 9.230857311602275e-07, "loss": 0.2087, "step": 25336, "teacher_loss": 0.19128525257110596 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.14657816290855408, "learning_rate": 9.223014361344817e-07, "loss": 0.1383, "step": 25337, "teacher_loss": 0.13733136653900146 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.30210134387016296, "learning_rate": 9.21517463868155e-07, "loss": 0.1497, "step": 25338, "teacher_loss": 0.13279327750205994 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.440632700920105, "learning_rate": 9.207338143792266e-07, "loss": 0.2114, "step": 25339, "teacher_loss": 0.18590989708900452 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5739099383354187, "learning_rate": 9.199504876856551e-07, "loss": 0.187, "step": 25340, "teacher_loss": 0.14398100972175598 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.41731682419776917, "learning_rate": 9.191674838054065e-07, "loss": 0.1994, "step": 25341, "teacher_loss": 0.17516663670539856 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.598284900188446, "learning_rate": 9.183848027564329e-07, "loss": 0.2804, "step": 25342, "teacher_loss": 0.24504326283931732 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.2813422679901123, "learning_rate": 9.17602444556675e-07, "loss": 0.2267, "step": 25343, "teacher_loss": 0.22058749198913574 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.2508302330970764, "learning_rate": 9.168204092240734e-07, "loss": 0.1999, "step": 25344, "teacher_loss": 0.19422651827335358 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5090104341506958, "learning_rate": 9.160386967765538e-07, "loss": 0.2434, "step": 25345, "teacher_loss": 0.21392107009887695 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.29601308703422546, "learning_rate": 9.152573072320419e-07, "loss": 0.1808, "step": 25346, "teacher_loss": 0.16801732778549194 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.2985702157020569, "learning_rate": 9.144762406084534e-07, "loss": 0.1842, "step": 25347, "teacher_loss": 0.17152288556098938 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5517145395278931, "learning_rate": 9.136954969236922e-07, "loss": 0.2096, "step": 25348, "teacher_loss": 0.17156654596328735 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.2555495798587799, "learning_rate": 9.129150761956623e-07, "loss": 0.2071, "step": 25349, "teacher_loss": 0.2017345130443573 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.3936951756477356, "learning_rate": 9.121349784422528e-07, "loss": 0.2553, "step": 25350, "teacher_loss": 0.23988866806030273 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.43336057662963867, "learning_rate": 9.113552036813494e-07, "loss": 0.1636, "step": 25351, "teacher_loss": 0.13367554545402527 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.22284942865371704, "learning_rate": 9.105757519308344e-07, "loss": 0.1548, "step": 25352, "teacher_loss": 0.1472913920879364 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.3418649733066559, "learning_rate": 9.097966232085736e-07, "loss": 0.1777, "step": 25353, "teacher_loss": 0.1594507098197937 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.9070267081260681, "learning_rate": 9.090178175324293e-07, "loss": 0.5334, "step": 25354, "teacher_loss": 0.49193209409713745 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.2651379108428955, "learning_rate": 9.082393349202639e-07, "loss": 0.1697, "step": 25355, "teacher_loss": 0.15907517075538635 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.21438665688037872, "learning_rate": 9.074611753899181e-07, "loss": 0.1543, "step": 25356, "teacher_loss": 0.14762771129608154 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.33189982175827026, "learning_rate": 9.066833389592361e-07, "loss": 0.2081, "step": 25357, "teacher_loss": 0.19438773393630981 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.6817593574523926, "learning_rate": 9.059058256460533e-07, "loss": 0.3055, "step": 25358, "teacher_loss": 0.263639897108078 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5586743354797363, "learning_rate": 9.051286354681909e-07, "loss": 0.2286, "step": 25359, "teacher_loss": 0.1919099986553192 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.1953689455986023, "learning_rate": 9.04351768443471e-07, "loss": 0.1554, "step": 25360, "teacher_loss": 0.15094473958015442 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.2034609019756317, "learning_rate": 9.035752245897061e-07, "loss": 0.169, "step": 25361, "teacher_loss": 0.16513386368751526 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5530673265457153, "learning_rate": 9.027990039246986e-07, "loss": 0.2197, "step": 25362, "teacher_loss": 0.18267199397087097 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5022286772727966, "learning_rate": 9.020231064662393e-07, "loss": 0.1652, "step": 25363, "teacher_loss": 0.12779076397418976 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.34399715065956116, "learning_rate": 9.012475322321273e-07, "loss": 0.2383, "step": 25364, "teacher_loss": 0.2266007959842682 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.24834898114204407, "learning_rate": 9.004722812401383e-07, "loss": 0.162, "step": 25365, "teacher_loss": 0.1523614227771759 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.37374264001846313, "learning_rate": 8.996973535080449e-07, "loss": 0.2024, "step": 25366, "teacher_loss": 0.1833898425102234 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.5713999271392822, "learning_rate": 8.989227490536162e-07, "loss": 0.2242, "step": 25367, "teacher_loss": 0.1855934113264084 }, { "compression_loss": 0.0, "epoch": 4.58, "label_loss": 0.7535400986671448, "learning_rate": 8.981484678946145e-07, "loss": 0.2671, "step": 25368, "teacher_loss": 0.2130289077758789 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.9359387159347534, "learning_rate": 8.973745100487873e-07, "loss": 0.2699, "step": 25369, "teacher_loss": 0.19590634107589722 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.6134337186813354, "learning_rate": 8.96600875533879e-07, "loss": 0.3052, "step": 25370, "teacher_loss": 0.27093273401260376 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.2725353538990021, "learning_rate": 8.958275643676317e-07, "loss": 0.1799, "step": 25371, "teacher_loss": 0.16961748898029327 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.9865927696228027, "learning_rate": 8.950545765677732e-07, "loss": 0.3561, "step": 25372, "teacher_loss": 0.28599682450294495 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.2887454032897949, "learning_rate": 8.942819121520174e-07, "loss": 0.2021, "step": 25373, "teacher_loss": 0.19245608150959015 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.3581439256668091, "learning_rate": 8.93509571138092e-07, "loss": 0.2798, "step": 25374, "teacher_loss": 0.271075963973999 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.592839777469635, "learning_rate": 8.927375535436993e-07, "loss": 0.2574, "step": 25375, "teacher_loss": 0.2201356589794159 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.28875532746315, "learning_rate": 8.919658593865354e-07, "loss": 0.197, "step": 25376, "teacher_loss": 0.18677294254302979 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.2306651473045349, "learning_rate": 8.91194488684296e-07, "loss": 0.1379, "step": 25377, "teacher_loss": 0.12754932045936584 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.3293890953063965, "learning_rate": 8.904234414546702e-07, "loss": 0.1687, "step": 25378, "teacher_loss": 0.15081578493118286 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.9479986429214478, "learning_rate": 8.896527177153291e-07, "loss": 0.3974, "step": 25379, "teacher_loss": 0.33618634939193726 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.232370525598526, "learning_rate": 8.888823174839467e-07, "loss": 0.144, "step": 25380, "teacher_loss": 0.13421492278575897 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.31608468294143677, "learning_rate": 8.881122407781872e-07, "loss": 0.1818, "step": 25381, "teacher_loss": 0.16687358915805817 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.5733442306518555, "learning_rate": 8.873424876157016e-07, "loss": 0.2315, "step": 25382, "teacher_loss": 0.19351038336753845 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.565104603767395, "learning_rate": 8.865730580141424e-07, "loss": 0.2192, "step": 25383, "teacher_loss": 0.18073466420173645 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.4101666808128357, "learning_rate": 8.858039519911487e-07, "loss": 0.2256, "step": 25384, "teacher_loss": 0.20506048202514648 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.35804229974746704, "learning_rate": 8.85035169564355e-07, "loss": 0.1892, "step": 25385, "teacher_loss": 0.17042624950408936 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.34658539295196533, "learning_rate": 8.842667107513819e-07, "loss": 0.1539, "step": 25386, "teacher_loss": 0.13248321413993835 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.16672475636005402, "learning_rate": 8.834985755698571e-07, "loss": 0.1227, "step": 25387, "teacher_loss": 0.11782631278038025 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.3592228889465332, "learning_rate": 8.82730764037385e-07, "loss": 0.1992, "step": 25388, "teacher_loss": 0.18137149512767792 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.28662294149398804, "learning_rate": 8.819632761715696e-07, "loss": 0.2121, "step": 25389, "teacher_loss": 0.2037946730852127 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.31958743929862976, "learning_rate": 8.811961119900069e-07, "loss": 0.2149, "step": 25390, "teacher_loss": 0.20322871208190918 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.29854342341423035, "learning_rate": 8.804292715102897e-07, "loss": 0.2527, "step": 25391, "teacher_loss": 0.24761945009231567 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.22436173260211945, "learning_rate": 8.796627547499953e-07, "loss": 0.2289, "step": 25392, "teacher_loss": 0.2293996512889862 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.6443582773208618, "learning_rate": 8.78896561726698e-07, "loss": 0.2623, "step": 25393, "teacher_loss": 0.21979567408561707 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.36847367882728577, "learning_rate": 8.781306924579674e-07, "loss": 0.283, "step": 25394, "teacher_loss": 0.27347368001937866 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.46845197677612305, "learning_rate": 8.773651469613592e-07, "loss": 0.1951, "step": 25395, "teacher_loss": 0.16477200388908386 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.38287344574928284, "learning_rate": 8.76599925254426e-07, "loss": 0.2299, "step": 25396, "teacher_loss": 0.2128680944442749 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.6750470399856567, "learning_rate": 8.758350273547138e-07, "loss": 0.287, "step": 25397, "teacher_loss": 0.24384649097919464 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.5757938623428345, "learning_rate": 8.75070453279757e-07, "loss": 0.2686, "step": 25398, "teacher_loss": 0.23442384600639343 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.17242567241191864, "learning_rate": 8.743062030470867e-07, "loss": 0.2021, "step": 25399, "teacher_loss": 0.20536476373672485 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.4470711052417755, "learning_rate": 8.735422766742235e-07, "loss": 0.2164, "step": 25400, "teacher_loss": 0.19080102443695068 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.3072819113731384, "learning_rate": 8.72778674178682e-07, "loss": 0.1986, "step": 25401, "teacher_loss": 0.18655487895011902 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.3940732181072235, "learning_rate": 8.720153955779714e-07, "loss": 0.1761, "step": 25402, "teacher_loss": 0.15186211466789246 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.42614316940307617, "learning_rate": 8.712524408895878e-07, "loss": 0.2166, "step": 25403, "teacher_loss": 0.19327794015407562 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.2905365824699402, "learning_rate": 8.704898101310272e-07, "loss": 0.1825, "step": 25404, "teacher_loss": 0.17052319645881653 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 1.0438623428344727, "learning_rate": 8.697275033197738e-07, "loss": 0.3083, "step": 25405, "teacher_loss": 0.2266244888305664 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.4183996319770813, "learning_rate": 8.689655204733004e-07, "loss": 0.2045, "step": 25406, "teacher_loss": 0.18072031438350677 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.2403639853000641, "learning_rate": 8.682038616090848e-07, "loss": 0.1643, "step": 25407, "teacher_loss": 0.15587864816188812 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.5101862549781799, "learning_rate": 8.674425267445829e-07, "loss": 0.163, "step": 25408, "teacher_loss": 0.12441083043813705 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.5326485633850098, "learning_rate": 8.666815158972507e-07, "loss": 0.2274, "step": 25409, "teacher_loss": 0.19348041713237762 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.4743402600288391, "learning_rate": 8.659208290845411e-07, "loss": 0.1399, "step": 25410, "teacher_loss": 0.10278697311878204 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.3294079303741455, "learning_rate": 8.651604663238882e-07, "loss": 0.2003, "step": 25411, "teacher_loss": 0.18599094450473785 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.37071382999420166, "learning_rate": 8.6440042763273e-07, "loss": 0.2079, "step": 25412, "teacher_loss": 0.18985579907894135 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.26673635840415955, "learning_rate": 8.636407130284857e-07, "loss": 0.163, "step": 25413, "teacher_loss": 0.1514323651790619 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.3823966383934021, "learning_rate": 8.628813225285781e-07, "loss": 0.1923, "step": 25414, "teacher_loss": 0.17113789916038513 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.2793274521827698, "learning_rate": 8.621222561504183e-07, "loss": 0.1336, "step": 25415, "teacher_loss": 0.11739656329154968 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.46780461072921753, "learning_rate": 8.613635139114057e-07, "loss": 0.2251, "step": 25416, "teacher_loss": 0.1981765627861023 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.4738069772720337, "learning_rate": 8.60605095828938e-07, "loss": 0.1998, "step": 25417, "teacher_loss": 0.16933059692382812 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.2628645598888397, "learning_rate": 8.598470019204047e-07, "loss": 0.2004, "step": 25418, "teacher_loss": 0.19349327683448792 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.5679659247398376, "learning_rate": 8.590892322031835e-07, "loss": 0.2242, "step": 25419, "teacher_loss": 0.18597060441970825 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.2972109317779541, "learning_rate": 8.583317866946506e-07, "loss": 0.1537, "step": 25420, "teacher_loss": 0.13780678808689117 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.7761539816856384, "learning_rate": 8.575746654121719e-07, "loss": 0.2729, "step": 25421, "teacher_loss": 0.21698924899101257 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.29120928049087524, "learning_rate": 8.568178683731054e-07, "loss": 0.2246, "step": 25422, "teacher_loss": 0.21719610691070557 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.32808685302734375, "learning_rate": 8.560613955947971e-07, "loss": 0.2747, "step": 25423, "teacher_loss": 0.2687840163707733 }, { "compression_loss": 0.0, "epoch": 4.59, "label_loss": 0.544015109539032, "learning_rate": 8.553052470945999e-07, "loss": 0.2382, "step": 25424, "teacher_loss": 0.20420145988464355 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.7357940673828125, "learning_rate": 8.545494228898448e-07, "loss": 0.198, "step": 25425, "teacher_loss": 0.13827836513519287 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.34699589014053345, "learning_rate": 8.537939229978597e-07, "loss": 0.1623, "step": 25426, "teacher_loss": 0.14179080724716187 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.4853142499923706, "learning_rate": 8.530387474359658e-07, "loss": 0.1806, "step": 25427, "teacher_loss": 0.14677976071834564 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.40237534046173096, "learning_rate": 8.522838962214824e-07, "loss": 0.1852, "step": 25428, "teacher_loss": 0.1610666811466217 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.43966007232666016, "learning_rate": 8.515293693717091e-07, "loss": 0.2458, "step": 25429, "teacher_loss": 0.22429212927818298 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.31295913457870483, "learning_rate": 8.507751669039488e-07, "loss": 0.2676, "step": 25430, "teacher_loss": 0.26260995864868164 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.5544017553329468, "learning_rate": 8.500212888354958e-07, "loss": 0.2419, "step": 25431, "teacher_loss": 0.20718058943748474 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.36435991525650024, "learning_rate": 8.492677351836264e-07, "loss": 0.1754, "step": 25432, "teacher_loss": 0.15440791845321655 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.24574732780456543, "learning_rate": 8.485145059656219e-07, "loss": 0.1964, "step": 25433, "teacher_loss": 0.19092130661010742 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.5613336563110352, "learning_rate": 8.477616011987549e-07, "loss": 0.2638, "step": 25434, "teacher_loss": 0.23075279593467712 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.40825074911117554, "learning_rate": 8.470090209002835e-07, "loss": 0.2068, "step": 25435, "teacher_loss": 0.18436145782470703 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.6100513339042664, "learning_rate": 8.46256765087457e-07, "loss": 0.264, "step": 25436, "teacher_loss": 0.22552308440208435 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.30441269278526306, "learning_rate": 8.455048337775334e-07, "loss": 0.2179, "step": 25437, "teacher_loss": 0.20833390951156616 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.16952374577522278, "learning_rate": 8.447532269877455e-07, "loss": 0.1545, "step": 25438, "teacher_loss": 0.15283679962158203 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.2949760854244232, "learning_rate": 8.440019447353248e-07, "loss": 0.2235, "step": 25439, "teacher_loss": 0.2156073898077011 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.6164042353630066, "learning_rate": 8.432509870374971e-07, "loss": 0.381, "step": 25440, "teacher_loss": 0.35486334562301636 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.4431574046611786, "learning_rate": 8.42500353911484e-07, "loss": 0.1823, "step": 25441, "teacher_loss": 0.1533556878566742 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.3262184262275696, "learning_rate": 8.417500453744864e-07, "loss": 0.1657, "step": 25442, "teacher_loss": 0.14784082770347595 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.3469354510307312, "learning_rate": 8.410000614437141e-07, "loss": 0.1779, "step": 25443, "teacher_loss": 0.1590634137392044 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.4059832692146301, "learning_rate": 8.402504021363599e-07, "loss": 0.2614, "step": 25444, "teacher_loss": 0.24531367421150208 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.15559996664524078, "learning_rate": 8.395010674696102e-07, "loss": 0.1235, "step": 25445, "teacher_loss": 0.11989938467741013 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.26305776834487915, "learning_rate": 8.387520574606444e-07, "loss": 0.2509, "step": 25446, "teacher_loss": 0.2495480477809906 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.7707770466804504, "learning_rate": 8.380033721266405e-07, "loss": 0.2783, "step": 25447, "teacher_loss": 0.22362074255943298 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 1.3704841136932373, "learning_rate": 8.372550114847566e-07, "loss": 0.4846, "step": 25448, "teacher_loss": 0.38620078563690186 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.47335827350616455, "learning_rate": 8.365069755521537e-07, "loss": 0.2122, "step": 25449, "teacher_loss": 0.18320009112358093 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.6355802416801453, "learning_rate": 8.3575926434598e-07, "loss": 0.2603, "step": 25450, "teacher_loss": 0.21854686737060547 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.5063444375991821, "learning_rate": 8.350118778833815e-07, "loss": 0.193, "step": 25451, "teacher_loss": 0.15817567706108093 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.3749358355998993, "learning_rate": 8.342648161814898e-07, "loss": 0.2237, "step": 25452, "teacher_loss": 0.20687636733055115 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.4512612819671631, "learning_rate": 8.33518079257436e-07, "loss": 0.2675, "step": 25453, "teacher_loss": 0.247114896774292 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.5963268280029297, "learning_rate": 8.327716671283414e-07, "loss": 0.2191, "step": 25454, "teacher_loss": 0.17713937163352966 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.3764534294605255, "learning_rate": 8.320255798113141e-07, "loss": 0.2034, "step": 25455, "teacher_loss": 0.1842138022184372 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.6379756331443787, "learning_rate": 8.31279817323462e-07, "loss": 0.2181, "step": 25456, "teacher_loss": 0.17144066095352173 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.2761960029602051, "learning_rate": 8.305343796818865e-07, "loss": 0.152, "step": 25457, "teacher_loss": 0.1382140964269638 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.23853370547294617, "learning_rate": 8.297892669036721e-07, "loss": 0.1809, "step": 25458, "teacher_loss": 0.17454522848129272 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.30613529682159424, "learning_rate": 8.290444790059071e-07, "loss": 0.1841, "step": 25459, "teacher_loss": 0.17050202190876007 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.42957621812820435, "learning_rate": 8.283000160056658e-07, "loss": 0.2089, "step": 25460, "teacher_loss": 0.18433448672294617 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.48454004526138306, "learning_rate": 8.275558779200148e-07, "loss": 0.1935, "step": 25461, "teacher_loss": 0.16119526326656342 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.2686091661453247, "learning_rate": 8.268120647660188e-07, "loss": 0.1596, "step": 25462, "teacher_loss": 0.14749515056610107 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.47633621096611023, "learning_rate": 8.260685765607273e-07, "loss": 0.1991, "step": 25463, "teacher_loss": 0.16829344630241394 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.7268596887588501, "learning_rate": 8.253254133211869e-07, "loss": 0.2901, "step": 25464, "teacher_loss": 0.2415502965450287 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.6045467853546143, "learning_rate": 8.245825750644387e-07, "loss": 0.2015, "step": 25465, "teacher_loss": 0.15673410892486572 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.3046342730522156, "learning_rate": 8.238400618075109e-07, "loss": 0.1791, "step": 25466, "teacher_loss": 0.1651729792356491 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.5204113125801086, "learning_rate": 8.230978735674266e-07, "loss": 0.2551, "step": 25467, "teacher_loss": 0.22562071681022644 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.9977778792381287, "learning_rate": 8.223560103612071e-07, "loss": 0.2611, "step": 25468, "teacher_loss": 0.17923936247825623 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.14617115259170532, "learning_rate": 8.216144722058539e-07, "loss": 0.1661, "step": 25469, "teacher_loss": 0.16832619905471802 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.5440880060195923, "learning_rate": 8.208732591183749e-07, "loss": 0.2379, "step": 25470, "teacher_loss": 0.2038961946964264 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.8916402459144592, "learning_rate": 8.201323711157583e-07, "loss": 0.3007, "step": 25471, "teacher_loss": 0.2350865602493286 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.4210309386253357, "learning_rate": 8.193918082149954e-07, "loss": 0.2552, "step": 25472, "teacher_loss": 0.23679476976394653 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.3036521077156067, "learning_rate": 8.18651570433061e-07, "loss": 0.1956, "step": 25473, "teacher_loss": 0.1836080402135849 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.5067384243011475, "learning_rate": 8.179116577869283e-07, "loss": 0.2125, "step": 25474, "teacher_loss": 0.17984187602996826 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.5642558336257935, "learning_rate": 8.17172070293562e-07, "loss": 0.2493, "step": 25475, "teacher_loss": 0.2143518030643463 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.2145422250032425, "learning_rate": 8.164328079699168e-07, "loss": 0.1587, "step": 25476, "teacher_loss": 0.15252180397510529 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.4642295837402344, "learning_rate": 8.156938708329425e-07, "loss": 0.2059, "step": 25477, "teacher_loss": 0.1772378534078598 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.8076434135437012, "learning_rate": 8.149552588995824e-07, "loss": 0.2553, "step": 25478, "teacher_loss": 0.1938788741827011 }, { "compression_loss": 0.0, "epoch": 4.6, "label_loss": 0.3025141656398773, "learning_rate": 8.142169721867676e-07, "loss": 0.2193, "step": 25479, "teacher_loss": 0.21002745628356934 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.3230145573616028, "learning_rate": 8.134790107114265e-07, "loss": 0.204, "step": 25480, "teacher_loss": 0.19083133339881897 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.6536225080490112, "learning_rate": 8.127413744904805e-07, "loss": 0.2325, "step": 25481, "teacher_loss": 0.18567653000354767 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.7507622241973877, "learning_rate": 8.120040635408377e-07, "loss": 0.2449, "step": 25482, "teacher_loss": 0.1886594593524933 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.854945719242096, "learning_rate": 8.112670778794029e-07, "loss": 0.2485, "step": 25483, "teacher_loss": 0.18110023438930511 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.2710120975971222, "learning_rate": 8.10530417523076e-07, "loss": 0.1533, "step": 25484, "teacher_loss": 0.14023497700691223 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.6301982402801514, "learning_rate": 8.097940824887451e-07, "loss": 0.2576, "step": 25485, "teacher_loss": 0.21624766290187836 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.11365041881799698, "learning_rate": 8.090580727932867e-07, "loss": 0.1281, "step": 25486, "teacher_loss": 0.12970973551273346 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.525229811668396, "learning_rate": 8.08322388453584e-07, "loss": 0.1991, "step": 25487, "teacher_loss": 0.16285625100135803 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.3677288293838501, "learning_rate": 8.075870294865018e-07, "loss": 0.2031, "step": 25488, "teacher_loss": 0.18481279909610748 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.3909282088279724, "learning_rate": 8.068519959088932e-07, "loss": 0.2642, "step": 25489, "teacher_loss": 0.2501263916492462 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.3609829843044281, "learning_rate": 8.061172877376166e-07, "loss": 0.1573, "step": 25490, "teacher_loss": 0.13462717831134796 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.49601975083351135, "learning_rate": 8.053829049895184e-07, "loss": 0.2005, "step": 25491, "teacher_loss": 0.1676623672246933 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.3902442455291748, "learning_rate": 8.046488476814284e-07, "loss": 0.236, "step": 25492, "teacher_loss": 0.21890190243721008 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 1.0367602109909058, "learning_rate": 8.039151158301816e-07, "loss": 0.3502, "step": 25493, "teacher_loss": 0.27386409044265747 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.39509326219558716, "learning_rate": 8.031817094526012e-07, "loss": 0.1843, "step": 25494, "teacher_loss": 0.16091583669185638 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.6413173675537109, "learning_rate": 8.024486285654986e-07, "loss": 0.2566, "step": 25495, "teacher_loss": 0.2138597071170807 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.9523268342018127, "learning_rate": 8.017158731856788e-07, "loss": 0.301, "step": 25496, "teacher_loss": 0.22865082323551178 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 1.1157842874526978, "learning_rate": 8.009834433299502e-07, "loss": 0.2772, "step": 25497, "teacher_loss": 0.18404605984687805 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.28363293409347534, "learning_rate": 8.002513390151007e-07, "loss": 0.2075, "step": 25498, "teacher_loss": 0.19903671741485596 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.6049840450286865, "learning_rate": 7.995195602579119e-07, "loss": 0.1992, "step": 25499, "teacher_loss": 0.15406033396720886 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.37473469972610474, "learning_rate": 7.98788107075164e-07, "loss": 0.2156, "step": 25500, "teacher_loss": 0.19788554310798645 }, { "epoch": 4.61, "eval_exact_match": 80.3027436140019, "eval_f1": 87.60761573294316, "step": 25500 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.5121930837631226, "learning_rate": 7.980569794836318e-07, "loss": 0.2816, "step": 25501, "teacher_loss": 0.25595369935035706 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.6750133037567139, "learning_rate": 7.973261775000684e-07, "loss": 0.2871, "step": 25502, "teacher_loss": 0.24401862919330597 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.749721348285675, "learning_rate": 7.965957011412355e-07, "loss": 0.249, "step": 25503, "teacher_loss": 0.19331002235412598 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.2263377606868744, "learning_rate": 7.958655504238815e-07, "loss": 0.1633, "step": 25504, "teacher_loss": 0.15634959936141968 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.20941588282585144, "learning_rate": 7.951357253647412e-07, "loss": 0.1195, "step": 25505, "teacher_loss": 0.10953295230865479 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.41801372170448303, "learning_rate": 7.944062259805513e-07, "loss": 0.214, "step": 25506, "teacher_loss": 0.19129467010498047 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.306363046169281, "learning_rate": 7.936770522880365e-07, "loss": 0.1807, "step": 25507, "teacher_loss": 0.16670529544353485 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.24499621987342834, "learning_rate": 7.929482043039137e-07, "loss": 0.1745, "step": 25508, "teacher_loss": 0.16666947305202484 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.295045405626297, "learning_rate": 7.922196820448929e-07, "loss": 0.1857, "step": 25509, "teacher_loss": 0.17360520362854004 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.39220762252807617, "learning_rate": 7.914914855276806e-07, "loss": 0.1692, "step": 25510, "teacher_loss": 0.14439141750335693 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.25716090202331543, "learning_rate": 7.907636147689684e-07, "loss": 0.1795, "step": 25511, "teacher_loss": 0.17090007662773132 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.4754082262516022, "learning_rate": 7.900360697854431e-07, "loss": 0.2108, "step": 25512, "teacher_loss": 0.18141329288482666 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.24112233519554138, "learning_rate": 7.893088505937862e-07, "loss": 0.1868, "step": 25513, "teacher_loss": 0.18073318898677826 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.3661770224571228, "learning_rate": 7.885819572106745e-07, "loss": 0.2132, "step": 25514, "teacher_loss": 0.19624172151088715 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.7823975682258606, "learning_rate": 7.878553896527696e-07, "loss": 0.2745, "step": 25515, "teacher_loss": 0.2180669605731964 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.40380364656448364, "learning_rate": 7.871291479367281e-07, "loss": 0.1888, "step": 25516, "teacher_loss": 0.164909228682518 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 1.0214440822601318, "learning_rate": 7.86403232079207e-07, "loss": 0.3167, "step": 25517, "teacher_loss": 0.23839518427848816 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.210858553647995, "learning_rate": 7.856776420968409e-07, "loss": 0.1678, "step": 25518, "teacher_loss": 0.1630110740661621 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.24769769608974457, "learning_rate": 7.849523780062717e-07, "loss": 0.202, "step": 25519, "teacher_loss": 0.19696620106697083 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.4556310176849365, "learning_rate": 7.842274398241262e-07, "loss": 0.2185, "step": 25520, "teacher_loss": 0.19211503863334656 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.4361347556114197, "learning_rate": 7.835028275670225e-07, "loss": 0.2345, "step": 25521, "teacher_loss": 0.212068572640419 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.3823814392089844, "learning_rate": 7.827785412515792e-07, "loss": 0.1816, "step": 25522, "teacher_loss": 0.15929976105690002 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.7110509872436523, "learning_rate": 7.820545808943947e-07, "loss": 0.4805, "step": 25523, "teacher_loss": 0.4548507630825043 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 1.0242714881896973, "learning_rate": 7.813309465120721e-07, "loss": 0.2745, "step": 25524, "teacher_loss": 0.1912326216697693 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.424579381942749, "learning_rate": 7.806076381212018e-07, "loss": 0.1867, "step": 25525, "teacher_loss": 0.16026383638381958 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.10036291182041168, "learning_rate": 7.798846557383655e-07, "loss": 0.1271, "step": 25526, "teacher_loss": 0.13009056448936462 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.3729804754257202, "learning_rate": 7.791619993801413e-07, "loss": 0.1984, "step": 25527, "teacher_loss": 0.17896929383277893 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.5194668769836426, "learning_rate": 7.784396690630963e-07, "loss": 0.2131, "step": 25528, "teacher_loss": 0.17905762791633606 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.6632795333862305, "learning_rate": 7.777176648037887e-07, "loss": 0.2371, "step": 25529, "teacher_loss": 0.18977192044258118 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.18019890785217285, "learning_rate": 7.769959866187787e-07, "loss": 0.1243, "step": 25530, "teacher_loss": 0.11805213987827301 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.3497493267059326, "learning_rate": 7.762746345246046e-07, "loss": 0.2343, "step": 25531, "teacher_loss": 0.22145143151283264 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.1685025691986084, "learning_rate": 7.755536085378067e-07, "loss": 0.1656, "step": 25532, "teacher_loss": 0.16525262594223022 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.3634466528892517, "learning_rate": 7.748329086749217e-07, "loss": 0.1728, "step": 25533, "teacher_loss": 0.151571124792099 }, { "compression_loss": 0.0, "epoch": 4.61, "label_loss": 0.7089089155197144, "learning_rate": 7.741125349524664e-07, "loss": 0.2548, "step": 25534, "teacher_loss": 0.2043761909008026 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.712272047996521, "learning_rate": 7.733924873869608e-07, "loss": 0.2565, "step": 25535, "teacher_loss": 0.2058541178703308 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.19297215342521667, "learning_rate": 7.726727659949101e-07, "loss": 0.2351, "step": 25536, "teacher_loss": 0.23980659246444702 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.9660158157348633, "learning_rate": 7.719533707928178e-07, "loss": 0.3255, "step": 25537, "teacher_loss": 0.2543077766895294 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.342582106590271, "learning_rate": 7.712343017971774e-07, "loss": 0.2185, "step": 25538, "teacher_loss": 0.20470136404037476 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.43560075759887695, "learning_rate": 7.705155590244739e-07, "loss": 0.1936, "step": 25539, "teacher_loss": 0.1667361706495285 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.5639803409576416, "learning_rate": 7.697971424911843e-07, "loss": 0.3377, "step": 25540, "teacher_loss": 0.31259313225746155 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.27974072098731995, "learning_rate": 7.690790522137853e-07, "loss": 0.1763, "step": 25541, "teacher_loss": 0.16482123732566833 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.1829862892627716, "learning_rate": 7.683612882087354e-07, "loss": 0.1623, "step": 25542, "teacher_loss": 0.1599699854850769 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.31607168912887573, "learning_rate": 7.676438504924915e-07, "loss": 0.1649, "step": 25543, "teacher_loss": 0.14807146787643433 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.21336233615875244, "learning_rate": 7.669267390815072e-07, "loss": 0.1692, "step": 25544, "teacher_loss": 0.1643315851688385 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.28381022810935974, "learning_rate": 7.662099539922174e-07, "loss": 0.2104, "step": 25545, "teacher_loss": 0.20228806138038635 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.44479912519454956, "learning_rate": 7.654934952410559e-07, "loss": 0.2231, "step": 25546, "teacher_loss": 0.19846567511558533 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.6885885000228882, "learning_rate": 7.647773628444543e-07, "loss": 0.3207, "step": 25547, "teacher_loss": 0.2797755002975464 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.42295897006988525, "learning_rate": 7.640615568188297e-07, "loss": 0.1948, "step": 25548, "teacher_loss": 0.16939936578273773 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.45511189103126526, "learning_rate": 7.633460771805872e-07, "loss": 0.2829, "step": 25549, "teacher_loss": 0.26379692554473877 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.4520115256309509, "learning_rate": 7.626309239461387e-07, "loss": 0.1953, "step": 25550, "teacher_loss": 0.16677230596542358 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.3982808589935303, "learning_rate": 7.619160971318779e-07, "loss": 0.1543, "step": 25551, "teacher_loss": 0.12723052501678467 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.45673221349716187, "learning_rate": 7.612015967541913e-07, "loss": 0.1848, "step": 25552, "teacher_loss": 0.15456461906433105 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.5517880916595459, "learning_rate": 7.604874228294611e-07, "loss": 0.2521, "step": 25553, "teacher_loss": 0.21880221366882324 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.29940539598464966, "learning_rate": 7.597735753740659e-07, "loss": 0.1522, "step": 25554, "teacher_loss": 0.13589292764663696 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.7003461122512817, "learning_rate": 7.590600544043641e-07, "loss": 0.2888, "step": 25555, "teacher_loss": 0.24308457970619202 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.7185056209564209, "learning_rate": 7.583468599367194e-07, "loss": 0.2463, "step": 25556, "teacher_loss": 0.19388815760612488 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.5118451714515686, "learning_rate": 7.576339919874853e-07, "loss": 0.1889, "step": 25557, "teacher_loss": 0.15300381183624268 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.5901716947555542, "learning_rate": 7.569214505730021e-07, "loss": 0.2599, "step": 25558, "teacher_loss": 0.22320988774299622 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.23936668038368225, "learning_rate": 7.562092357096034e-07, "loss": 0.1534, "step": 25559, "teacher_loss": 0.14387652277946472 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.35094988346099854, "learning_rate": 7.554973474136245e-07, "loss": 0.2182, "step": 25560, "teacher_loss": 0.20348691940307617 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.3597275912761688, "learning_rate": 7.547857857013857e-07, "loss": 0.2287, "step": 25561, "teacher_loss": 0.21412979066371918 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.2918512225151062, "learning_rate": 7.540745505891972e-07, "loss": 0.2444, "step": 25562, "teacher_loss": 0.2391512393951416 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.21225619316101074, "learning_rate": 7.533636420933676e-07, "loss": 0.1826, "step": 25563, "teacher_loss": 0.17935171723365784 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.33985981345176697, "learning_rate": 7.52653060230199e-07, "loss": 0.2089, "step": 25564, "teacher_loss": 0.19436706602573395 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.3982829749584198, "learning_rate": 7.519428050159765e-07, "loss": 0.2305, "step": 25565, "teacher_loss": 0.21181833744049072 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.38846033811569214, "learning_rate": 7.51232876466989e-07, "loss": 0.189, "step": 25566, "teacher_loss": 0.1667933613061905 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.41075244545936584, "learning_rate": 7.505232745995116e-07, "loss": 0.1615, "step": 25567, "teacher_loss": 0.133827343583107 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.2745397984981537, "learning_rate": 7.498139994298131e-07, "loss": 0.1802, "step": 25568, "teacher_loss": 0.1696842908859253 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.30793362855911255, "learning_rate": 7.491050509741554e-07, "loss": 0.2666, "step": 25569, "teacher_loss": 0.2619755268096924 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.25527530908584595, "learning_rate": 7.483964292487938e-07, "loss": 0.1715, "step": 25570, "teacher_loss": 0.16220590472221375 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.5103049278259277, "learning_rate": 7.476881342699721e-07, "loss": 0.2718, "step": 25571, "teacher_loss": 0.24528378248214722 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.2960830628871918, "learning_rate": 7.469801660539321e-07, "loss": 0.1921, "step": 25572, "teacher_loss": 0.18057605624198914 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.3303460478782654, "learning_rate": 7.462725246169028e-07, "loss": 0.175, "step": 25573, "teacher_loss": 0.1577080339193344 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.3989889323711395, "learning_rate": 7.455652099751109e-07, "loss": 0.2498, "step": 25574, "teacher_loss": 0.23317056894302368 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.3871164321899414, "learning_rate": 7.448582221447702e-07, "loss": 0.218, "step": 25575, "teacher_loss": 0.1991792768239975 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.45671385526657104, "learning_rate": 7.441515611420913e-07, "loss": 0.1974, "step": 25576, "teacher_loss": 0.1685422658920288 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.5563538670539856, "learning_rate": 7.434452269832776e-07, "loss": 0.189, "step": 25577, "teacher_loss": 0.14819224178791046 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.2985634505748749, "learning_rate": 7.427392196845195e-07, "loss": 0.1743, "step": 25578, "teacher_loss": 0.16052848100662231 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.9515265226364136, "learning_rate": 7.420335392620059e-07, "loss": 0.2893, "step": 25579, "teacher_loss": 0.21571172773838043 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.3071005642414093, "learning_rate": 7.413281857319171e-07, "loss": 0.189, "step": 25580, "teacher_loss": 0.17588868737220764 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.4065256416797638, "learning_rate": 7.406231591104218e-07, "loss": 0.1607, "step": 25581, "teacher_loss": 0.13335296511650085 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.35869109630584717, "learning_rate": 7.399184594136854e-07, "loss": 0.2627, "step": 25582, "teacher_loss": 0.2520214915275574 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.2841533124446869, "learning_rate": 7.392140866578667e-07, "loss": 0.2265, "step": 25583, "teacher_loss": 0.22011739015579224 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.17395257949829102, "learning_rate": 7.385100408591111e-07, "loss": 0.1695, "step": 25584, "teacher_loss": 0.16905122995376587 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.4164174795150757, "learning_rate": 7.37806322033564e-07, "loss": 0.2321, "step": 25585, "teacher_loss": 0.21157222986221313 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.21585646271705627, "learning_rate": 7.371029301973559e-07, "loss": 0.1595, "step": 25586, "teacher_loss": 0.15321585536003113 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.5045528411865234, "learning_rate": 7.363998653666154e-07, "loss": 0.2399, "step": 25587, "teacher_loss": 0.21044325828552246 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.500564694404602, "learning_rate": 7.356971275574632e-07, "loss": 0.2539, "step": 25588, "teacher_loss": 0.22650116682052612 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.5118403434753418, "learning_rate": 7.349947167860077e-07, "loss": 0.2323, "step": 25589, "teacher_loss": 0.20127728581428528 }, { "compression_loss": 0.0, "epoch": 4.62, "label_loss": 0.3456307351589203, "learning_rate": 7.342926330683531e-07, "loss": 0.1836, "step": 25590, "teacher_loss": 0.16555237770080566 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.5445907115936279, "learning_rate": 7.335908764206012e-07, "loss": 0.2796, "step": 25591, "teacher_loss": 0.25020328164100647 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.34492504596710205, "learning_rate": 7.328894468588343e-07, "loss": 0.1381, "step": 25592, "teacher_loss": 0.11506979912519455 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.5372344255447388, "learning_rate": 7.321883443991412e-07, "loss": 0.1994, "step": 25593, "teacher_loss": 0.16191115975379944 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.30108100175857544, "learning_rate": 7.314875690575889e-07, "loss": 0.2287, "step": 25594, "teacher_loss": 0.22062557935714722 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.5125848650932312, "learning_rate": 7.307871208502498e-07, "loss": 0.2031, "step": 25595, "teacher_loss": 0.16869372129440308 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.38981691002845764, "learning_rate": 7.300869997931792e-07, "loss": 0.2319, "step": 25596, "teacher_loss": 0.2143700271844864 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.6277039051055908, "learning_rate": 7.293872059024292e-07, "loss": 0.2517, "step": 25597, "teacher_loss": 0.209959015250206 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.4528503715991974, "learning_rate": 7.286877391940472e-07, "loss": 0.3272, "step": 25598, "teacher_loss": 0.3132913112640381 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.19746926426887512, "learning_rate": 7.279885996840669e-07, "loss": 0.1739, "step": 25599, "teacher_loss": 0.17132464051246643 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.1433059573173523, "learning_rate": 7.27289787388517e-07, "loss": 0.1802, "step": 25600, "teacher_loss": 0.1842639148235321 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.21477825939655304, "learning_rate": 7.265913023234233e-07, "loss": 0.1647, "step": 25601, "teacher_loss": 0.15916195511817932 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.18871772289276123, "learning_rate": 7.258931445047945e-07, "loss": 0.1656, "step": 25602, "teacher_loss": 0.16308562457561493 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.2316170036792755, "learning_rate": 7.251953139486394e-07, "loss": 0.1533, "step": 25603, "teacher_loss": 0.14454349875450134 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.43806397914886475, "learning_rate": 7.244978106709621e-07, "loss": 0.2314, "step": 25604, "teacher_loss": 0.20841875672340393 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.3230438828468323, "learning_rate": 7.238006346877463e-07, "loss": 0.1923, "step": 25605, "teacher_loss": 0.17780175805091858 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.15291255712509155, "learning_rate": 7.231037860149791e-07, "loss": 0.1477, "step": 25606, "teacher_loss": 0.14714054763317108 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.29740434885025024, "learning_rate": 7.224072646686397e-07, "loss": 0.1349, "step": 25607, "teacher_loss": 0.11679884046316147 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.4729596972465515, "learning_rate": 7.217110706646968e-07, "loss": 0.2186, "step": 25608, "teacher_loss": 0.19033998250961304 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.5154913663864136, "learning_rate": 7.210152040191042e-07, "loss": 0.1992, "step": 25609, "teacher_loss": 0.1640281081199646 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.5146692991256714, "learning_rate": 7.203196647478277e-07, "loss": 0.1948, "step": 25610, "teacher_loss": 0.15926247835159302 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.6050976514816284, "learning_rate": 7.196244528668094e-07, "loss": 0.2294, "step": 25611, "teacher_loss": 0.1876990795135498 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.25553473830223083, "learning_rate": 7.189295683919849e-07, "loss": 0.2326, "step": 25612, "teacher_loss": 0.23007425665855408 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.43125730752944946, "learning_rate": 7.182350113392883e-07, "loss": 0.2004, "step": 25613, "teacher_loss": 0.17477869987487793 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.5232822299003601, "learning_rate": 7.175407817246466e-07, "loss": 0.2006, "step": 25614, "teacher_loss": 0.16470903158187866 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.31230634450912476, "learning_rate": 7.168468795639704e-07, "loss": 0.235, "step": 25615, "teacher_loss": 0.22640183568000793 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.37346571683883667, "learning_rate": 7.161533048731722e-07, "loss": 0.183, "step": 25616, "teacher_loss": 0.161783367395401 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.9874559044837952, "learning_rate": 7.154600576681575e-07, "loss": 0.4559, "step": 25617, "teacher_loss": 0.396862655878067 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.2866028845310211, "learning_rate": 7.147671379648152e-07, "loss": 0.202, "step": 25618, "teacher_loss": 0.19260576367378235 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.31290319561958313, "learning_rate": 7.140745457790276e-07, "loss": 0.1381, "step": 25619, "teacher_loss": 0.11868340522050858 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.8316222429275513, "learning_rate": 7.133822811266854e-07, "loss": 0.3716, "step": 25620, "teacher_loss": 0.32044750452041626 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.585129976272583, "learning_rate": 7.126903440236526e-07, "loss": 0.2385, "step": 25621, "teacher_loss": 0.20002171397209167 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.39797505736351013, "learning_rate": 7.119987344857948e-07, "loss": 0.2714, "step": 25622, "teacher_loss": 0.25735002756118774 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.2894400954246521, "learning_rate": 7.113074525289659e-07, "loss": 0.2532, "step": 25623, "teacher_loss": 0.2491888403892517 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.9540418386459351, "learning_rate": 7.1061649816902e-07, "loss": 0.3049, "step": 25624, "teacher_loss": 0.23282021284103394 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.3704535663127899, "learning_rate": 7.099258714217944e-07, "loss": 0.2433, "step": 25625, "teacher_loss": 0.22912028431892395 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.6917981505393982, "learning_rate": 7.092355723031246e-07, "loss": 0.5024, "step": 25626, "teacher_loss": 0.4813328683376312 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.35530149936676025, "learning_rate": 7.0854560082884e-07, "loss": 0.2394, "step": 25627, "teacher_loss": 0.22655722498893738 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.7794751524925232, "learning_rate": 7.078559570147542e-07, "loss": 0.3024, "step": 25628, "teacher_loss": 0.24943819642066956 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.4597625732421875, "learning_rate": 7.071666408766814e-07, "loss": 0.2521, "step": 25629, "teacher_loss": 0.22905993461608887 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.21277078986167908, "learning_rate": 7.064776524304256e-07, "loss": 0.1723, "step": 25630, "teacher_loss": 0.1677531898021698 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.40412086248397827, "learning_rate": 7.057889916917826e-07, "loss": 0.2839, "step": 25631, "teacher_loss": 0.27058184146881104 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.588576078414917, "learning_rate": 7.05100658676543e-07, "loss": 0.2404, "step": 25632, "teacher_loss": 0.20175254344940186 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.4729962944984436, "learning_rate": 7.044126534004875e-07, "loss": 0.2411, "step": 25633, "teacher_loss": 0.21537786722183228 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.2468685805797577, "learning_rate": 7.037249758793884e-07, "loss": 0.1605, "step": 25634, "teacher_loss": 0.15085354447364807 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.4662829339504242, "learning_rate": 7.030376261290134e-07, "loss": 0.2232, "step": 25635, "teacher_loss": 0.19621434807777405 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.3375188410282135, "learning_rate": 7.023506041651196e-07, "loss": 0.1931, "step": 25636, "teacher_loss": 0.17704719305038452 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.3260479271411896, "learning_rate": 7.016639100034627e-07, "loss": 0.1929, "step": 25637, "teacher_loss": 0.1780531257390976 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.09531199932098389, "learning_rate": 7.009775436597804e-07, "loss": 0.1397, "step": 25638, "teacher_loss": 0.14464889466762543 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.49762487411499023, "learning_rate": 7.002915051498132e-07, "loss": 0.1918, "step": 25639, "teacher_loss": 0.15777619183063507 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.8030431866645813, "learning_rate": 6.99605794489292e-07, "loss": 0.2045, "step": 25640, "teacher_loss": 0.1380133032798767 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.8867901563644409, "learning_rate": 6.989204116939324e-07, "loss": 0.2593, "step": 25641, "teacher_loss": 0.18958324193954468 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.4173845648765564, "learning_rate": 6.982353567794503e-07, "loss": 0.2097, "step": 25642, "teacher_loss": 0.18664461374282837 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.3869072496891022, "learning_rate": 6.975506297615547e-07, "loss": 0.2059, "step": 25643, "teacher_loss": 0.18580420315265656 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.31851133704185486, "learning_rate": 6.968662306559398e-07, "loss": 0.2094, "step": 25644, "teacher_loss": 0.19726628065109253 }, { "compression_loss": 0.0, "epoch": 4.63, "label_loss": 0.11432275176048279, "learning_rate": 6.96182159478303e-07, "loss": 0.1876, "step": 25645, "teacher_loss": 0.19571228325366974 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.7669801712036133, "learning_rate": 6.954984162443201e-07, "loss": 0.2468, "step": 25646, "teacher_loss": 0.18897007405757904 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.2976863384246826, "learning_rate": 6.948150009696736e-07, "loss": 0.1836, "step": 25647, "teacher_loss": 0.17088714241981506 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.3376414179801941, "learning_rate": 6.941319136700292e-07, "loss": 0.1776, "step": 25648, "teacher_loss": 0.15978124737739563 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.30033180117607117, "learning_rate": 6.934491543610494e-07, "loss": 0.1745, "step": 25649, "teacher_loss": 0.16054022312164307 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.6786854267120361, "learning_rate": 6.927667230583851e-07, "loss": 0.3449, "step": 25650, "teacher_loss": 0.3078462481498718 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.15645639598369598, "learning_rate": 6.920846197776887e-07, "loss": 0.204, "step": 25651, "teacher_loss": 0.20927633345127106 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.5199425220489502, "learning_rate": 6.91402844534591e-07, "loss": 0.2192, "step": 25652, "teacher_loss": 0.1858280599117279 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.46262991428375244, "learning_rate": 6.907213973447279e-07, "loss": 0.2432, "step": 25653, "teacher_loss": 0.2187812477350235 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.41090065240859985, "learning_rate": 6.90040278223722e-07, "loss": 0.1812, "step": 25654, "teacher_loss": 0.155717134475708 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.2709306478500366, "learning_rate": 6.893594871871889e-07, "loss": 0.1455, "step": 25655, "teacher_loss": 0.131536602973938 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.3858177661895752, "learning_rate": 6.88679024250738e-07, "loss": 0.1804, "step": 25656, "teacher_loss": 0.15756326913833618 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.23808319866657257, "learning_rate": 6.879988894299682e-07, "loss": 0.1945, "step": 25657, "teacher_loss": 0.18960733711719513 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.3815857172012329, "learning_rate": 6.873190827404757e-07, "loss": 0.1871, "step": 25658, "teacher_loss": 0.1655081808567047 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.43557536602020264, "learning_rate": 6.866396041978429e-07, "loss": 0.2346, "step": 25659, "teacher_loss": 0.21221569180488586 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.5912463665008545, "learning_rate": 6.859604538176506e-07, "loss": 0.2836, "step": 25660, "teacher_loss": 0.2494516223669052 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.47608721256256104, "learning_rate": 6.852816316154714e-07, "loss": 0.1997, "step": 25661, "teacher_loss": 0.1689501702785492 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.3079376518726349, "learning_rate": 6.846031376068645e-07, "loss": 0.1617, "step": 25662, "teacher_loss": 0.1454661339521408 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.35969436168670654, "learning_rate": 6.839249718073875e-07, "loss": 0.1552, "step": 25663, "teacher_loss": 0.13246308267116547 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.40328487753868103, "learning_rate": 6.83247134232593e-07, "loss": 0.2761, "step": 25664, "teacher_loss": 0.2619214653968811 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.2900179624557495, "learning_rate": 6.825696248980135e-07, "loss": 0.1598, "step": 25665, "teacher_loss": 0.14527863264083862 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.4816819429397583, "learning_rate": 6.818924438191881e-07, "loss": 0.2588, "step": 25666, "teacher_loss": 0.23401620984077454 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.16003361344337463, "learning_rate": 6.812155910116429e-07, "loss": 0.174, "step": 25667, "teacher_loss": 0.17555996775627136 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.24029219150543213, "learning_rate": 6.80539066490894e-07, "loss": 0.2021, "step": 25668, "teacher_loss": 0.19783999025821686 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.7034240961074829, "learning_rate": 6.798628702724469e-07, "loss": 0.2927, "step": 25669, "teacher_loss": 0.24701057374477386 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.16648727655410767, "learning_rate": 6.791870023718161e-07, "loss": 0.1135, "step": 25670, "teacher_loss": 0.10763737559318542 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.6138750314712524, "learning_rate": 6.785114628044908e-07, "loss": 0.2036, "step": 25671, "teacher_loss": 0.15796923637390137 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.06280869990587234, "learning_rate": 6.778362515859554e-07, "loss": 0.1131, "step": 25672, "teacher_loss": 0.1187053918838501 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.5124033689498901, "learning_rate": 6.771613687316991e-07, "loss": 0.2114, "step": 25673, "teacher_loss": 0.17797735333442688 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.23839521408081055, "learning_rate": 6.764868142571895e-07, "loss": 0.2266, "step": 25674, "teacher_loss": 0.2252405732870102 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.6884697675704956, "learning_rate": 6.758125881778926e-07, "loss": 0.3043, "step": 25675, "teacher_loss": 0.2615863084793091 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.4559873640537262, "learning_rate": 6.75138690509266e-07, "loss": 0.2664, "step": 25676, "teacher_loss": 0.24529647827148438 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.19168272614479065, "learning_rate": 6.744651212667624e-07, "loss": 0.1591, "step": 25677, "teacher_loss": 0.15546078979969025 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.5121338367462158, "learning_rate": 6.737918804658227e-07, "loss": 0.2452, "step": 25678, "teacher_loss": 0.2155478298664093 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.6976661682128906, "learning_rate": 6.73118968121883e-07, "loss": 0.2138, "step": 25679, "teacher_loss": 0.1600068062543869 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.6152940988540649, "learning_rate": 6.724463842503725e-07, "loss": 0.2473, "step": 25680, "teacher_loss": 0.2064152956008911 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.3929380774497986, "learning_rate": 6.717741288667106e-07, "loss": 0.2205, "step": 25681, "teacher_loss": 0.20134258270263672 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.9830864667892456, "learning_rate": 6.71102201986305e-07, "loss": 0.3438, "step": 25682, "teacher_loss": 0.2727489471435547 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.7680139541625977, "learning_rate": 6.7043060362457e-07, "loss": 0.204, "step": 25683, "teacher_loss": 0.14131270349025726 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.5719360113143921, "learning_rate": 6.697593337968982e-07, "loss": 0.3337, "step": 25684, "teacher_loss": 0.3072816729545593 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.7995215058326721, "learning_rate": 6.690883925186792e-07, "loss": 0.2476, "step": 25685, "teacher_loss": 0.1862201690673828 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.5603985786437988, "learning_rate": 6.68417779805297e-07, "loss": 0.2464, "step": 25686, "teacher_loss": 0.2115139365196228 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.42832452058792114, "learning_rate": 6.677474956721296e-07, "loss": 0.2659, "step": 25687, "teacher_loss": 0.24789221584796906 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.3751183748245239, "learning_rate": 6.670775401345397e-07, "loss": 0.1863, "step": 25688, "teacher_loss": 0.16534754633903503 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.4386843740940094, "learning_rate": 6.664079132078881e-07, "loss": 0.2543, "step": 25689, "teacher_loss": 0.2337934672832489 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.27081501483917236, "learning_rate": 6.657386149075328e-07, "loss": 0.1694, "step": 25690, "teacher_loss": 0.1581096351146698 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.6064146757125854, "learning_rate": 6.650696452488114e-07, "loss": 0.368, "step": 25691, "teacher_loss": 0.3415566682815552 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.5103209018707275, "learning_rate": 6.64401004247065e-07, "loss": 0.2843, "step": 25692, "teacher_loss": 0.25915688276290894 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.29974669218063354, "learning_rate": 6.637326919176246e-07, "loss": 0.137, "step": 25693, "teacher_loss": 0.11896763741970062 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.7630175352096558, "learning_rate": 6.630647082758095e-07, "loss": 0.2738, "step": 25694, "teacher_loss": 0.21943911910057068 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.17620250582695007, "learning_rate": 6.623970533369377e-07, "loss": 0.1645, "step": 25695, "teacher_loss": 0.1632494032382965 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.26038858294487, "learning_rate": 6.617297271163153e-07, "loss": 0.203, "step": 25696, "teacher_loss": 0.19657838344573975 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.7561020851135254, "learning_rate": 6.610627296292415e-07, "loss": 0.3138, "step": 25697, "teacher_loss": 0.26467224955558777 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.34898027777671814, "learning_rate": 6.603960608910076e-07, "loss": 0.2069, "step": 25698, "teacher_loss": 0.19112563133239746 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.3105284571647644, "learning_rate": 6.597297209169012e-07, "loss": 0.1863, "step": 25699, "teacher_loss": 0.1725083589553833 }, { "compression_loss": 0.0, "epoch": 4.64, "label_loss": 0.33823779225349426, "learning_rate": 6.590637097221985e-07, "loss": 0.1797, "step": 25700, "teacher_loss": 0.16206462681293488 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.5531293749809265, "learning_rate": 6.583980273221657e-07, "loss": 0.2197, "step": 25701, "teacher_loss": 0.1826213002204895 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.634173572063446, "learning_rate": 6.577326737320688e-07, "loss": 0.2187, "step": 25702, "teacher_loss": 0.1725606620311737 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.5032634735107422, "learning_rate": 6.57067648967164e-07, "loss": 0.3513, "step": 25703, "teacher_loss": 0.33441758155822754 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.7262142896652222, "learning_rate": 6.564029530426924e-07, "loss": 0.3258, "step": 25704, "teacher_loss": 0.28128886222839355 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.27903062105178833, "learning_rate": 6.557385859738985e-07, "loss": 0.1565, "step": 25705, "teacher_loss": 0.14290419220924377 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.20355144143104553, "learning_rate": 6.550745477760133e-07, "loss": 0.1509, "step": 25706, "teacher_loss": 0.14506977796554565 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.30342260003089905, "learning_rate": 6.544108384642583e-07, "loss": 0.1726, "step": 25707, "teacher_loss": 0.15808308124542236 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.2158779799938202, "learning_rate": 6.537474580538543e-07, "loss": 0.2043, "step": 25708, "teacher_loss": 0.20299354195594788 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.4494536221027374, "learning_rate": 6.530844065600078e-07, "loss": 0.2172, "step": 25709, "teacher_loss": 0.19143524765968323 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.3886851668357849, "learning_rate": 6.524216839979214e-07, "loss": 0.1932, "step": 25710, "teacher_loss": 0.17144650220870972 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.31629908084869385, "learning_rate": 6.517592903827896e-07, "loss": 0.2398, "step": 25711, "teacher_loss": 0.23134204745292664 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.4288142919540405, "learning_rate": 6.510972257297987e-07, "loss": 0.2319, "step": 25712, "teacher_loss": 0.21001750230789185 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.32861459255218506, "learning_rate": 6.504354900541282e-07, "loss": 0.205, "step": 25713, "teacher_loss": 0.19124548137187958 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.22582606971263885, "learning_rate": 6.497740833709509e-07, "loss": 0.1728, "step": 25714, "teacher_loss": 0.16688984632492065 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.26213061809539795, "learning_rate": 6.491130056954297e-07, "loss": 0.2253, "step": 25715, "teacher_loss": 0.22125375270843506 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.25960099697113037, "learning_rate": 6.484522570427193e-07, "loss": 0.1948, "step": 25716, "teacher_loss": 0.1875881552696228 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.200932577252388, "learning_rate": 6.477918374279723e-07, "loss": 0.1764, "step": 25717, "teacher_loss": 0.17363235354423523 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.2921501398086548, "learning_rate": 6.471317468663284e-07, "loss": 0.2458, "step": 25718, "teacher_loss": 0.24060551822185516 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.45219215750694275, "learning_rate": 6.464719853729206e-07, "loss": 0.2282, "step": 25719, "teacher_loss": 0.20335917174816132 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.3557063937187195, "learning_rate": 6.45812552962875e-07, "loss": 0.2059, "step": 25720, "teacher_loss": 0.189256489276886 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.24168744683265686, "learning_rate": 6.451534496513129e-07, "loss": 0.1773, "step": 25721, "teacher_loss": 0.1701613962650299 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.3478865921497345, "learning_rate": 6.444946754533438e-07, "loss": 0.2043, "step": 25722, "teacher_loss": 0.1883271038532257 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.3882679343223572, "learning_rate": 6.438362303840706e-07, "loss": 0.1598, "step": 25723, "teacher_loss": 0.1343889832496643 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 1.0647556781768799, "learning_rate": 6.43178114458593e-07, "loss": 0.312, "step": 25724, "teacher_loss": 0.22836434841156006 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.39925098419189453, "learning_rate": 6.425203276919956e-07, "loss": 0.2356, "step": 25725, "teacher_loss": 0.21741333603858948 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.2608669698238373, "learning_rate": 6.418628700993611e-07, "loss": 0.1856, "step": 25726, "teacher_loss": 0.17718997597694397 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.29058313369750977, "learning_rate": 6.412057416957645e-07, "loss": 0.1787, "step": 25727, "teacher_loss": 0.16628967225551605 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.23646050691604614, "learning_rate": 6.405489424962685e-07, "loss": 0.1688, "step": 25728, "teacher_loss": 0.16126041114330292 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.30153656005859375, "learning_rate": 6.398924725159328e-07, "loss": 0.2615, "step": 25729, "teacher_loss": 0.2570296823978424 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.17028766870498657, "learning_rate": 6.392363317698118e-07, "loss": 0.2035, "step": 25730, "teacher_loss": 0.20723098516464233 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.4668811559677124, "learning_rate": 6.385805202729455e-07, "loss": 0.223, "step": 25731, "teacher_loss": 0.19588899612426758 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.6332976818084717, "learning_rate": 6.379250380403667e-07, "loss": 0.2293, "step": 25732, "teacher_loss": 0.18445730209350586 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.5107493996620178, "learning_rate": 6.372698850871101e-07, "loss": 0.4154, "step": 25733, "teacher_loss": 0.4048248529434204 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.3618752956390381, "learning_rate": 6.366150614281934e-07, "loss": 0.2644, "step": 25734, "teacher_loss": 0.2535882592201233 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.6782310009002686, "learning_rate": 6.359605670786284e-07, "loss": 0.2652, "step": 25735, "teacher_loss": 0.2192636877298355 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.41506946086883545, "learning_rate": 6.35306402053421e-07, "loss": 0.3022, "step": 25736, "teacher_loss": 0.2896573543548584 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.2939988076686859, "learning_rate": 6.346525663675728e-07, "loss": 0.1835, "step": 25737, "teacher_loss": 0.17123769223690033 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.30140218138694763, "learning_rate": 6.339990600360701e-07, "loss": 0.2192, "step": 25738, "teacher_loss": 0.21011903882026672 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.25520533323287964, "learning_rate": 6.333458830738975e-07, "loss": 0.2916, "step": 25739, "teacher_loss": 0.29566842317581177 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.3339117765426636, "learning_rate": 6.326930354960314e-07, "loss": 0.1804, "step": 25740, "teacher_loss": 0.16332292556762695 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.1938106268644333, "learning_rate": 6.320405173174398e-07, "loss": 0.199, "step": 25741, "teacher_loss": 0.19963189959526062 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.6745530366897583, "learning_rate": 6.313883285530775e-07, "loss": 0.3251, "step": 25742, "teacher_loss": 0.2862340807914734 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.3562083840370178, "learning_rate": 6.307364692179041e-07, "loss": 0.2187, "step": 25743, "teacher_loss": 0.2034008800983429 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.2091292142868042, "learning_rate": 6.300849393268626e-07, "loss": 0.1729, "step": 25744, "teacher_loss": 0.1689196527004242 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.4034806489944458, "learning_rate": 6.294337388948895e-07, "loss": 0.1921, "step": 25745, "teacher_loss": 0.16856998205184937 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.41607001423835754, "learning_rate": 6.287828679369145e-07, "loss": 0.2601, "step": 25746, "teacher_loss": 0.24279560148715973 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.13569307327270508, "learning_rate": 6.28132326467864e-07, "loss": 0.2127, "step": 25747, "teacher_loss": 0.2212495058774948 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.6997978091239929, "learning_rate": 6.274821145026477e-07, "loss": 0.2123, "step": 25748, "teacher_loss": 0.15816038846969604 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.3258850574493408, "learning_rate": 6.268322320561753e-07, "loss": 0.1814, "step": 25749, "teacher_loss": 0.1653926521539688 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.5280095338821411, "learning_rate": 6.261826791433484e-07, "loss": 0.239, "step": 25750, "teacher_loss": 0.20692920684814453 }, { "epoch": 4.65, "eval_exact_match": 80.5771050141911, "eval_f1": 87.79693509641605, "step": 25750 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.4342218339443207, "learning_rate": 6.255334557790565e-07, "loss": 0.2126, "step": 25751, "teacher_loss": 0.1880270391702652 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.25451648235321045, "learning_rate": 6.248845619781862e-07, "loss": 0.1481, "step": 25752, "teacher_loss": 0.13623470067977905 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.45443764328956604, "learning_rate": 6.242359977556156e-07, "loss": 0.2384, "step": 25753, "teacher_loss": 0.21434694528579712 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.3491644263267517, "learning_rate": 6.235877631262093e-07, "loss": 0.1862, "step": 25754, "teacher_loss": 0.16814668476581573 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.3918796181678772, "learning_rate": 6.22939858104834e-07, "loss": 0.2335, "step": 25755, "teacher_loss": 0.2158847451210022 }, { "compression_loss": 0.0, "epoch": 4.65, "label_loss": 0.20084181427955627, "learning_rate": 6.22292282706346e-07, "loss": 0.1867, "step": 25756, "teacher_loss": 0.18509957194328308 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.4287026822566986, "learning_rate": 6.216450369455867e-07, "loss": 0.2106, "step": 25757, "teacher_loss": 0.18631555140018463 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.8670519590377808, "learning_rate": 6.209981208373993e-07, "loss": 0.2976, "step": 25758, "teacher_loss": 0.23430058360099792 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.24358630180358887, "learning_rate": 6.203515343966137e-07, "loss": 0.1545, "step": 25759, "teacher_loss": 0.14460918307304382 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.2879866361618042, "learning_rate": 6.197052776380563e-07, "loss": 0.1686, "step": 25760, "teacher_loss": 0.15537038445472717 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.24190539121627808, "learning_rate": 6.190593505765401e-07, "loss": 0.1817, "step": 25761, "teacher_loss": 0.17497220635414124 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.558539092540741, "learning_rate": 6.184137532268769e-07, "loss": 0.3034, "step": 25762, "teacher_loss": 0.27502989768981934 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.39625072479248047, "learning_rate": 6.177684856038712e-07, "loss": 0.2025, "step": 25763, "teacher_loss": 0.18094715476036072 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.29281866550445557, "learning_rate": 6.171235477223114e-07, "loss": 0.1585, "step": 25764, "teacher_loss": 0.14356368780136108 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.4449898302555084, "learning_rate": 6.164789395969855e-07, "loss": 0.1826, "step": 25765, "teacher_loss": 0.15349453687667847 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.3851661682128906, "learning_rate": 6.158346612426769e-07, "loss": 0.1965, "step": 25766, "teacher_loss": 0.17556919157505035 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.20041537284851074, "learning_rate": 6.151907126741502e-07, "loss": 0.178, "step": 25767, "teacher_loss": 0.1755596399307251 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.9967182874679565, "learning_rate": 6.145470939061754e-07, "loss": 0.697, "step": 25768, "teacher_loss": 0.6636947393417358 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.24394464492797852, "learning_rate": 6.139038049535039e-07, "loss": 0.1818, "step": 25769, "teacher_loss": 0.1748485267162323 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.6486415863037109, "learning_rate": 6.132608458308875e-07, "loss": 0.2324, "step": 25770, "teacher_loss": 0.18617627024650574 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.8195672035217285, "learning_rate": 6.126182165530658e-07, "loss": 0.2712, "step": 25771, "teacher_loss": 0.21029126644134521 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.17748457193374634, "learning_rate": 6.119759171347722e-07, "loss": 0.1706, "step": 25772, "teacher_loss": 0.1698291152715683 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.5725502967834473, "learning_rate": 6.113339475907331e-07, "loss": 0.1868, "step": 25773, "teacher_loss": 0.14397728443145752 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.142591655254364, "learning_rate": 6.106923079356703e-07, "loss": 0.1733, "step": 25774, "teacher_loss": 0.1767416149377823 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.3679741322994232, "learning_rate": 6.100509981842883e-07, "loss": 0.3127, "step": 25775, "teacher_loss": 0.3065851926803589 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.27976611256599426, "learning_rate": 6.094100183512924e-07, "loss": 0.1878, "step": 25776, "teacher_loss": 0.17762205004692078 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.5629473924636841, "learning_rate": 6.08769368451384e-07, "loss": 0.2225, "step": 25777, "teacher_loss": 0.18463751673698425 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.2546696662902832, "learning_rate": 6.08129048499243e-07, "loss": 0.199, "step": 25778, "teacher_loss": 0.19276180863380432 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.5035284161567688, "learning_rate": 6.074890585095544e-07, "loss": 0.2043, "step": 25779, "teacher_loss": 0.17109665274620056 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.4446781277656555, "learning_rate": 6.068493984969931e-07, "loss": 0.2318, "step": 25780, "teacher_loss": 0.20811288058757782 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.31238439679145813, "learning_rate": 6.062100684762223e-07, "loss": 0.2012, "step": 25781, "teacher_loss": 0.18881163001060486 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.22814130783081055, "learning_rate": 6.055710684618971e-07, "loss": 0.1909, "step": 25782, "teacher_loss": 0.18673820793628693 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.11562801897525787, "learning_rate": 6.049323984686706e-07, "loss": 0.1475, "step": 25783, "teacher_loss": 0.1510196030139923 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 1.1436388492584229, "learning_rate": 6.042940585111878e-07, "loss": 0.4129, "step": 25784, "teacher_loss": 0.33171939849853516 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.48620516061782837, "learning_rate": 6.036560486040805e-07, "loss": 0.2535, "step": 25785, "teacher_loss": 0.22760051488876343 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.28465378284454346, "learning_rate": 6.030183687619767e-07, "loss": 0.1578, "step": 25786, "teacher_loss": 0.14365479350090027 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.19246599078178406, "learning_rate": 6.023810189994983e-07, "loss": 0.1458, "step": 25787, "teacher_loss": 0.14062535762786865 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.7967947721481323, "learning_rate": 6.017439993312568e-07, "loss": 0.2367, "step": 25788, "teacher_loss": 0.17451095581054688 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.3969930410385132, "learning_rate": 6.011073097718556e-07, "loss": 0.2657, "step": 25789, "teacher_loss": 0.25114157795906067 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.41787639260292053, "learning_rate": 6.004709503358963e-07, "loss": 0.1738, "step": 25790, "teacher_loss": 0.14666375517845154 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.2856449782848358, "learning_rate": 5.998349210379656e-07, "loss": 0.1764, "step": 25791, "teacher_loss": 0.16425052285194397 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.29506975412368774, "learning_rate": 5.991992218926434e-07, "loss": 0.2261, "step": 25792, "teacher_loss": 0.21844318509101868 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.24840043485164642, "learning_rate": 5.985638529145115e-07, "loss": 0.1918, "step": 25793, "teacher_loss": 0.18555346131324768 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.39574193954467773, "learning_rate": 5.979288141181316e-07, "loss": 0.2516, "step": 25794, "teacher_loss": 0.2356257438659668 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.7872180938720703, "learning_rate": 5.972941055180603e-07, "loss": 0.2333, "step": 25795, "teacher_loss": 0.17171096801757812 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.7270216941833496, "learning_rate": 5.966597271288576e-07, "loss": 0.2162, "step": 25796, "teacher_loss": 0.15946140885353088 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.32171833515167236, "learning_rate": 5.960256789650637e-07, "loss": 0.1657, "step": 25797, "teacher_loss": 0.148350328207016 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.3438599109649658, "learning_rate": 5.95391961041215e-07, "loss": 0.2084, "step": 25798, "teacher_loss": 0.19338008761405945 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.49039924144744873, "learning_rate": 5.947585733718402e-07, "loss": 0.2439, "step": 25799, "teacher_loss": 0.21648633480072021 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.24343052506446838, "learning_rate": 5.941255159714643e-07, "loss": 0.188, "step": 25800, "teacher_loss": 0.18182893097400665 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.6688398122787476, "learning_rate": 5.934927888545972e-07, "loss": 0.2455, "step": 25801, "teacher_loss": 0.19842402637004852 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.46803441643714905, "learning_rate": 5.928603920357473e-07, "loss": 0.2016, "step": 25802, "teacher_loss": 0.1720259040594101 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.8841602802276611, "learning_rate": 5.922283255294164e-07, "loss": 0.2648, "step": 25803, "teacher_loss": 0.19603434205055237 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.264207661151886, "learning_rate": 5.91596589350093e-07, "loss": 0.1601, "step": 25804, "teacher_loss": 0.14856381714344025 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.4181753993034363, "learning_rate": 5.90965183512257e-07, "loss": 0.1864, "step": 25805, "teacher_loss": 0.16061249375343323 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.3414986729621887, "learning_rate": 5.903341080303937e-07, "loss": 0.2045, "step": 25806, "teacher_loss": 0.18928295373916626 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.29306602478027344, "learning_rate": 5.897033629189646e-07, "loss": 0.2084, "step": 25807, "teacher_loss": 0.19904470443725586 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.4902295768260956, "learning_rate": 5.890729481924334e-07, "loss": 0.2712, "step": 25808, "teacher_loss": 0.2468898892402649 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.38052690029144287, "learning_rate": 5.884428638652534e-07, "loss": 0.2066, "step": 25809, "teacher_loss": 0.18727847933769226 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.09350591897964478, "learning_rate": 5.878131099518713e-07, "loss": 0.1806, "step": 25810, "teacher_loss": 0.19027632474899292 }, { "compression_loss": 0.0, "epoch": 4.66, "label_loss": 0.8894171118736267, "learning_rate": 5.871836864667224e-07, "loss": 0.3266, "step": 25811, "teacher_loss": 0.26401466131210327 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.33329227566719055, "learning_rate": 5.8655459342424e-07, "loss": 0.2228, "step": 25812, "teacher_loss": 0.21049004793167114 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.21610203385353088, "learning_rate": 5.859258308388493e-07, "loss": 0.192, "step": 25813, "teacher_loss": 0.1893693506717682 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 1.2249739170074463, "learning_rate": 5.852973987249622e-07, "loss": 0.2797, "step": 25814, "teacher_loss": 0.17469385266304016 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.6751925945281982, "learning_rate": 5.846692970969869e-07, "loss": 0.2552, "step": 25815, "teacher_loss": 0.20848111808300018 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.3831615447998047, "learning_rate": 5.840415259693271e-07, "loss": 0.2966, "step": 25816, "teacher_loss": 0.2869266867637634 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.6506332755088806, "learning_rate": 5.83414085356373e-07, "loss": 0.2634, "step": 25817, "teacher_loss": 0.22037768363952637 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.5384520888328552, "learning_rate": 5.827869752725129e-07, "loss": 0.1952, "step": 25818, "teacher_loss": 0.15702220797538757 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.21663898229599, "learning_rate": 5.821601957321205e-07, "loss": 0.1718, "step": 25819, "teacher_loss": 0.1668156087398529 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.3887439966201782, "learning_rate": 5.815337467495674e-07, "loss": 0.206, "step": 25820, "teacher_loss": 0.18573185801506042 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.16601096093654633, "learning_rate": 5.809076283392173e-07, "loss": 0.1483, "step": 25821, "teacher_loss": 0.14637970924377441 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.22529301047325134, "learning_rate": 5.802818405154236e-07, "loss": 0.176, "step": 25822, "teacher_loss": 0.17051096260547638 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.6318166255950928, "learning_rate": 5.796563832925384e-07, "loss": 0.2397, "step": 25823, "teacher_loss": 0.19617262482643127 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.2434415966272354, "learning_rate": 5.790312566848932e-07, "loss": 0.254, "step": 25824, "teacher_loss": 0.2551822364330292 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.4386853277683258, "learning_rate": 5.784064607068268e-07, "loss": 0.2015, "step": 25825, "teacher_loss": 0.17517027258872986 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.2766663432121277, "learning_rate": 5.777819953726643e-07, "loss": 0.1875, "step": 25826, "teacher_loss": 0.1776140034198761 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.30967405438423157, "learning_rate": 5.771578606967176e-07, "loss": 0.1982, "step": 25827, "teacher_loss": 0.18580153584480286 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.40260255336761475, "learning_rate": 5.765340566932986e-07, "loss": 0.2086, "step": 25828, "teacher_loss": 0.18705829977989197 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.5702332258224487, "learning_rate": 5.759105833767125e-07, "loss": 0.2061, "step": 25829, "teacher_loss": 0.1656341254711151 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.375188410282135, "learning_rate": 5.752874407612496e-07, "loss": 0.2271, "step": 25830, "teacher_loss": 0.21059849858283997 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.7497886419296265, "learning_rate": 5.746646288612001e-07, "loss": 0.2201, "step": 25831, "teacher_loss": 0.16120293736457825 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.4084872603416443, "learning_rate": 5.740421476908391e-07, "loss": 0.2104, "step": 25832, "teacher_loss": 0.18836282193660736 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.2332880198955536, "learning_rate": 5.734199972644405e-07, "loss": 0.1626, "step": 25833, "teacher_loss": 0.1547866016626358 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.33135986328125, "learning_rate": 5.72798177596271e-07, "loss": 0.1766, "step": 25834, "teacher_loss": 0.1593727171421051 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.2378404438495636, "learning_rate": 5.721766887005808e-07, "loss": 0.2245, "step": 25835, "teacher_loss": 0.22299891710281372 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.49611324071884155, "learning_rate": 5.71555530591622e-07, "loss": 0.2316, "step": 25836, "teacher_loss": 0.202169269323349 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.46056610345840454, "learning_rate": 5.709347032836398e-07, "loss": 0.2216, "step": 25837, "teacher_loss": 0.19508647918701172 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.5267481803894043, "learning_rate": 5.703142067908613e-07, "loss": 0.207, "step": 25838, "teacher_loss": 0.17151404917240143 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.3611549437046051, "learning_rate": 5.696940411275165e-07, "loss": 0.2579, "step": 25839, "teacher_loss": 0.24646760523319244 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.17160308361053467, "learning_rate": 5.690742063078242e-07, "loss": 0.2163, "step": 25840, "teacher_loss": 0.2213120311498642 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.32062798738479614, "learning_rate": 5.68454702345993e-07, "loss": 0.2008, "step": 25841, "teacher_loss": 0.18744905292987823 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.317829430103302, "learning_rate": 5.678355292562248e-07, "loss": 0.2204, "step": 25842, "teacher_loss": 0.20954085886478424 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.24208903312683105, "learning_rate": 5.672166870527235e-07, "loss": 0.1942, "step": 25843, "teacher_loss": 0.18889720737934113 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.3107760548591614, "learning_rate": 5.665981757496691e-07, "loss": 0.1781, "step": 25844, "teacher_loss": 0.1633348912000656 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.990404486656189, "learning_rate": 5.659799953612438e-07, "loss": 0.3312, "step": 25845, "teacher_loss": 0.25790154933929443 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.2966078519821167, "learning_rate": 5.653621459016229e-07, "loss": 0.1971, "step": 25846, "teacher_loss": 0.18606823682785034 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.36667609214782715, "learning_rate": 5.647446273849716e-07, "loss": 0.2097, "step": 25847, "teacher_loss": 0.19222337007522583 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.36348435282707214, "learning_rate": 5.641274398254454e-07, "loss": 0.2474, "step": 25848, "teacher_loss": 0.23451447486877441 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.4853397011756897, "learning_rate": 5.635105832371962e-07, "loss": 0.2359, "step": 25849, "teacher_loss": 0.20815902948379517 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.6518690586090088, "learning_rate": 5.628940576343677e-07, "loss": 0.2333, "step": 25850, "teacher_loss": 0.18679219484329224 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.8329676389694214, "learning_rate": 5.622778630310921e-07, "loss": 0.2228, "step": 25851, "teacher_loss": 0.15503638982772827 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.9448129534721375, "learning_rate": 5.616619994414996e-07, "loss": 0.3441, "step": 25852, "teacher_loss": 0.2773763835430145 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.5836213827133179, "learning_rate": 5.610464668797088e-07, "loss": 0.2321, "step": 25853, "teacher_loss": 0.1930159032344818 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.41513097286224365, "learning_rate": 5.604312653598337e-07, "loss": 0.2477, "step": 25854, "teacher_loss": 0.22908906638622284 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.6520373821258545, "learning_rate": 5.598163948959745e-07, "loss": 0.2191, "step": 25855, "teacher_loss": 0.17097894847393036 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.2728990912437439, "learning_rate": 5.592018555022332e-07, "loss": 0.2029, "step": 25856, "teacher_loss": 0.1950836181640625 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.9071080088615417, "learning_rate": 5.585876471927004e-07, "loss": 0.3273, "step": 25857, "teacher_loss": 0.2628253400325775 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.5643683671951294, "learning_rate": 5.579737699814513e-07, "loss": 0.2332, "step": 25858, "teacher_loss": 0.19640018045902252 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.621512770652771, "learning_rate": 5.573602238825648e-07, "loss": 0.2175, "step": 25859, "teacher_loss": 0.17263853549957275 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.42945218086242676, "learning_rate": 5.567470089101078e-07, "loss": 0.2696, "step": 25860, "teacher_loss": 0.2518458366394043 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.20666979253292084, "learning_rate": 5.561341250781393e-07, "loss": 0.1899, "step": 25861, "teacher_loss": 0.18803198635578156 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.22259894013404846, "learning_rate": 5.555215724007079e-07, "loss": 0.1907, "step": 25862, "teacher_loss": 0.18710389733314514 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.6280340552330017, "learning_rate": 5.54909350891864e-07, "loss": 0.2556, "step": 25863, "teacher_loss": 0.21424134075641632 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.49876752495765686, "learning_rate": 5.542974605656382e-07, "loss": 0.2816, "step": 25864, "teacher_loss": 0.2574690878391266 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.2558981776237488, "learning_rate": 5.53685901436059e-07, "loss": 0.1591, "step": 25865, "teacher_loss": 0.1483120322227478 }, { "compression_loss": 0.0, "epoch": 4.67, "label_loss": 0.3983690142631531, "learning_rate": 5.530746735171521e-07, "loss": 0.1658, "step": 25866, "teacher_loss": 0.13998062908649445 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.3608073592185974, "learning_rate": 5.524637768229296e-07, "loss": 0.3361, "step": 25867, "teacher_loss": 0.3333306908607483 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.6249392032623291, "learning_rate": 5.518532113673952e-07, "loss": 0.2665, "step": 25868, "teacher_loss": 0.22670280933380127 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.4085375666618347, "learning_rate": 5.512429771645477e-07, "loss": 0.2039, "step": 25869, "teacher_loss": 0.1812104433774948 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.40966761112213135, "learning_rate": 5.506330742283827e-07, "loss": 0.2384, "step": 25870, "teacher_loss": 0.2193642556667328 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.5498589277267456, "learning_rate": 5.500235025728772e-07, "loss": 0.199, "step": 25871, "teacher_loss": 0.16002202033996582 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.5666399598121643, "learning_rate": 5.494142622120085e-07, "loss": 0.2975, "step": 25872, "teacher_loss": 0.26759302616119385 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.6202941536903381, "learning_rate": 5.488053531597487e-07, "loss": 0.2412, "step": 25873, "teacher_loss": 0.19907745718955994 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 1.4289371967315674, "learning_rate": 5.481967754300532e-07, "loss": 0.4802, "step": 25874, "teacher_loss": 0.3747596740722656 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.32796916365623474, "learning_rate": 5.47588529036876e-07, "loss": 0.1842, "step": 25875, "teacher_loss": 0.16820621490478516 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.5980788469314575, "learning_rate": 5.469806139941657e-07, "loss": 0.3136, "step": 25876, "teacher_loss": 0.28204143047332764 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.33545443415641785, "learning_rate": 5.463730303158565e-07, "loss": 0.176, "step": 25877, "teacher_loss": 0.1582593470811844 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.3517675995826721, "learning_rate": 5.457657780158787e-07, "loss": 0.2267, "step": 25878, "teacher_loss": 0.21277594566345215 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.5439707040786743, "learning_rate": 5.451588571081579e-07, "loss": 0.2126, "step": 25879, "teacher_loss": 0.17580318450927734 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.5084367394447327, "learning_rate": 5.445522676066045e-07, "loss": 0.1918, "step": 25880, "teacher_loss": 0.15664368867874146 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.6170657277107239, "learning_rate": 5.439460095251292e-07, "loss": 0.224, "step": 25881, "teacher_loss": 0.18032225966453552 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.30646902322769165, "learning_rate": 5.433400828776291e-07, "loss": 0.1885, "step": 25882, "teacher_loss": 0.17540176212787628 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.5375478267669678, "learning_rate": 5.427344876779966e-07, "loss": 0.2198, "step": 25883, "teacher_loss": 0.18446362018585205 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.46183329820632935, "learning_rate": 5.421292239401205e-07, "loss": 0.2322, "step": 25884, "teacher_loss": 0.20667783915996552 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.42828792333602905, "learning_rate": 5.415242916778729e-07, "loss": 0.2164, "step": 25885, "teacher_loss": 0.19288897514343262 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.4741644859313965, "learning_rate": 5.409196909051245e-07, "loss": 0.2713, "step": 25886, "teacher_loss": 0.248794823884964 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.19874463975429535, "learning_rate": 5.403154216357359e-07, "loss": 0.154, "step": 25887, "teacher_loss": 0.14904123544692993 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.3233036696910858, "learning_rate": 5.397114838835627e-07, "loss": 0.1596, "step": 25888, "teacher_loss": 0.14137771725654602 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.4941243529319763, "learning_rate": 5.391078776624519e-07, "loss": 0.2258, "step": 25889, "teacher_loss": 0.19601207971572876 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.3330090343952179, "learning_rate": 5.385046029862412e-07, "loss": 0.1743, "step": 25890, "teacher_loss": 0.15662416815757751 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.1532750129699707, "learning_rate": 5.379016598687625e-07, "loss": 0.192, "step": 25891, "teacher_loss": 0.1963367760181427 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 1.2537908554077148, "learning_rate": 5.372990483238382e-07, "loss": 0.3827, "step": 25892, "teacher_loss": 0.2858789563179016 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.7413392066955566, "learning_rate": 5.366967683652857e-07, "loss": 0.2431, "step": 25893, "teacher_loss": 0.18769536912441254 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.27084922790527344, "learning_rate": 5.360948200069138e-07, "loss": 0.1833, "step": 25894, "teacher_loss": 0.173615962266922 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.49576735496520996, "learning_rate": 5.354932032625215e-07, "loss": 0.1867, "step": 25895, "teacher_loss": 0.15232758224010468 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.3868037462234497, "learning_rate": 5.348919181459028e-07, "loss": 0.1668, "step": 25896, "teacher_loss": 0.1423090100288391 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.6711556911468506, "learning_rate": 5.34290964670845e-07, "loss": 0.239, "step": 25897, "teacher_loss": 0.19097892940044403 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.47073009610176086, "learning_rate": 5.336903428511236e-07, "loss": 0.2727, "step": 25898, "teacher_loss": 0.2506440579891205 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.5025217533111572, "learning_rate": 5.330900527005111e-07, "loss": 0.1952, "step": 25899, "teacher_loss": 0.1610921323299408 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.46776843070983887, "learning_rate": 5.324900942327715e-07, "loss": 0.2487, "step": 25900, "teacher_loss": 0.22436915338039398 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.32462388277053833, "learning_rate": 5.318904674616554e-07, "loss": 0.2332, "step": 25901, "teacher_loss": 0.222994863986969 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.7252461314201355, "learning_rate": 5.312911724009135e-07, "loss": 0.2046, "step": 25902, "teacher_loss": 0.1467868685722351 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.5057038068771362, "learning_rate": 5.30692209064288e-07, "loss": 0.2614, "step": 25903, "teacher_loss": 0.23421171307563782 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.4243880808353424, "learning_rate": 5.30093577465508e-07, "loss": 0.1984, "step": 25904, "teacher_loss": 0.17333757877349854 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.29853904247283936, "learning_rate": 5.294952776182976e-07, "loss": 0.1986, "step": 25905, "teacher_loss": 0.18751922249794006 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.2802067995071411, "learning_rate": 5.288973095363775e-07, "loss": 0.2131, "step": 25906, "teacher_loss": 0.20567169785499573 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.3249923288822174, "learning_rate": 5.28299673233455e-07, "loss": 0.1695, "step": 25907, "teacher_loss": 0.15224722027778625 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.6771851181983948, "learning_rate": 5.277023687232324e-07, "loss": 0.2565, "step": 25908, "teacher_loss": 0.2097373753786087 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.0902882069349289, "learning_rate": 5.271053960194022e-07, "loss": 0.1744, "step": 25909, "teacher_loss": 0.1837047040462494 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.19433481991291046, "learning_rate": 5.265087551356567e-07, "loss": 0.1422, "step": 25910, "teacher_loss": 0.13642413914203644 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.24260929226875305, "learning_rate": 5.259124460856701e-07, "loss": 0.1859, "step": 25911, "teacher_loss": 0.1795925796031952 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.25381535291671753, "learning_rate": 5.253164688831146e-07, "loss": 0.191, "step": 25912, "teacher_loss": 0.1839832216501236 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.40698927640914917, "learning_rate": 5.247208235416578e-07, "loss": 0.2184, "step": 25913, "teacher_loss": 0.19747236371040344 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.24295800924301147, "learning_rate": 5.24125510074952e-07, "loss": 0.242, "step": 25914, "teacher_loss": 0.24186952412128448 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.7454047799110413, "learning_rate": 5.235305284966446e-07, "loss": 0.2314, "step": 25915, "teacher_loss": 0.1743362843990326 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.38935086131095886, "learning_rate": 5.229358788203831e-07, "loss": 0.1942, "step": 25916, "teacher_loss": 0.1725580096244812 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.19750481843948364, "learning_rate": 5.223415610597981e-07, "loss": 0.1343, "step": 25917, "teacher_loss": 0.12725582718849182 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.18662095069885254, "learning_rate": 5.217475752285106e-07, "loss": 0.1837, "step": 25918, "teacher_loss": 0.1834072321653366 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.4534686207771301, "learning_rate": 5.211539213401462e-07, "loss": 0.1539, "step": 25919, "teacher_loss": 0.12057413160800934 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.6859146356582642, "learning_rate": 5.205605994083124e-07, "loss": 0.2754, "step": 25920, "teacher_loss": 0.22978872060775757 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.2941237986087799, "learning_rate": 5.199676094466116e-07, "loss": 0.1949, "step": 25921, "teacher_loss": 0.18383683264255524 }, { "compression_loss": 0.0, "epoch": 4.68, "label_loss": 0.27393823862075806, "learning_rate": 5.193749514686397e-07, "loss": 0.1796, "step": 25922, "teacher_loss": 0.169064462184906 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.27222585678100586, "learning_rate": 5.187826254879874e-07, "loss": 0.2022, "step": 25923, "teacher_loss": 0.19440282881259918 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.470312237739563, "learning_rate": 5.181906315182289e-07, "loss": 0.2323, "step": 25924, "teacher_loss": 0.20589923858642578 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.4206134080886841, "learning_rate": 5.175989695729432e-07, "loss": 0.2463, "step": 25925, "teacher_loss": 0.2269621193408966 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.5262861847877502, "learning_rate": 5.170076396656931e-07, "loss": 0.3077, "step": 25926, "teacher_loss": 0.2834537625312805 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.20970401167869568, "learning_rate": 5.164166418100341e-07, "loss": 0.1903, "step": 25927, "teacher_loss": 0.18818338215351105 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.51181960105896, "learning_rate": 5.158259760195155e-07, "loss": 0.2269, "step": 25928, "teacher_loss": 0.19521787762641907 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.3169773817062378, "learning_rate": 5.152356423076848e-07, "loss": 0.1959, "step": 25929, "teacher_loss": 0.18239985406398773 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.37351781129837036, "learning_rate": 5.146456406880745e-07, "loss": 0.2204, "step": 25930, "teacher_loss": 0.20340529084205627 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.7145411968231201, "learning_rate": 5.140559711742071e-07, "loss": 0.2744, "step": 25931, "teacher_loss": 0.2254788875579834 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.14445710182189941, "learning_rate": 5.134666337796051e-07, "loss": 0.2229, "step": 25932, "teacher_loss": 0.23160099983215332 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.5382006168365479, "learning_rate": 5.128776285177827e-07, "loss": 0.2505, "step": 25933, "teacher_loss": 0.21857401728630066 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.3074144423007965, "learning_rate": 5.122889554022392e-07, "loss": 0.2199, "step": 25934, "teacher_loss": 0.2101493775844574 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.45264333486557007, "learning_rate": 5.117006144464736e-07, "loss": 0.2588, "step": 25935, "teacher_loss": 0.23724156618118286 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.4242282211780548, "learning_rate": 5.11112605663977e-07, "loss": 0.2144, "step": 25936, "teacher_loss": 0.1910373866558075 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.1507379114627838, "learning_rate": 5.105249290682267e-07, "loss": 0.1449, "step": 25937, "teacher_loss": 0.14425115287303925 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.3932763934135437, "learning_rate": 5.099375846726972e-07, "loss": 0.2092, "step": 25938, "teacher_loss": 0.18879303336143494 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.19561371207237244, "learning_rate": 5.093505724908576e-07, "loss": 0.2192, "step": 25939, "teacher_loss": 0.22182133793830872 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.30664703249931335, "learning_rate": 5.087638925361621e-07, "loss": 0.169, "step": 25940, "teacher_loss": 0.15368971228599548 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 1.2652349472045898, "learning_rate": 5.081775448220666e-07, "loss": 0.2993, "step": 25941, "teacher_loss": 0.19198143482208252 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.4722893238067627, "learning_rate": 5.075915293620087e-07, "loss": 0.2134, "step": 25942, "teacher_loss": 0.18463854491710663 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.601287305355072, "learning_rate": 5.070058461694261e-07, "loss": 0.1649, "step": 25943, "teacher_loss": 0.11640842258930206 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.39461201429367065, "learning_rate": 5.064204952577494e-07, "loss": 0.2429, "step": 25944, "teacher_loss": 0.225990429520607 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.15408770740032196, "learning_rate": 5.05835476640395e-07, "loss": 0.1635, "step": 25945, "teacher_loss": 0.16453981399536133 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.4335036277770996, "learning_rate": 5.052507903307785e-07, "loss": 0.1624, "step": 25946, "teacher_loss": 0.13228052854537964 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.5900661945343018, "learning_rate": 5.046664363423042e-07, "loss": 0.2615, "step": 25947, "teacher_loss": 0.22496896982192993 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.6857079267501831, "learning_rate": 5.040824146883665e-07, "loss": 0.2572, "step": 25948, "teacher_loss": 0.20953483879566193 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.3605722188949585, "learning_rate": 5.034987253823614e-07, "loss": 0.173, "step": 25949, "teacher_loss": 0.15216027200222015 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.5704779624938965, "learning_rate": 5.029153684376664e-07, "loss": 0.278, "step": 25950, "teacher_loss": 0.24549362063407898 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.26498937606811523, "learning_rate": 5.023323438676558e-07, "loss": 0.197, "step": 25951, "teacher_loss": 0.1894627809524536 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.38061094284057617, "learning_rate": 5.017496516857006e-07, "loss": 0.1667, "step": 25952, "teacher_loss": 0.14294137060642242 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.2750745415687561, "learning_rate": 5.011672919051569e-07, "loss": 0.2452, "step": 25953, "teacher_loss": 0.24187231063842773 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.8193128108978271, "learning_rate": 5.005852645393788e-07, "loss": 0.3388, "step": 25954, "teacher_loss": 0.28544050455093384 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.3445749878883362, "learning_rate": 5.000035696017074e-07, "loss": 0.1799, "step": 25955, "teacher_loss": 0.16154810786247253 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.6730453372001648, "learning_rate": 4.994222071054805e-07, "loss": 0.3977, "step": 25956, "teacher_loss": 0.3670666217803955 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.3895450234413147, "learning_rate": 4.988411770640306e-07, "loss": 0.2199, "step": 25957, "teacher_loss": 0.2010941207408905 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.1975100040435791, "learning_rate": 4.98260479490672e-07, "loss": 0.1837, "step": 25958, "teacher_loss": 0.1822075992822647 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.347367525100708, "learning_rate": 4.976801143987242e-07, "loss": 0.239, "step": 25959, "teacher_loss": 0.2270033061504364 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.48298120498657227, "learning_rate": 4.971000818014914e-07, "loss": 0.2201, "step": 25960, "teacher_loss": 0.1908716857433319 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.8329836130142212, "learning_rate": 4.965203817122699e-07, "loss": 0.2998, "step": 25961, "teacher_loss": 0.24053703248500824 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.8319041728973389, "learning_rate": 4.959410141443538e-07, "loss": 0.2522, "step": 25962, "teacher_loss": 0.1877671480178833 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.6278356313705444, "learning_rate": 4.953619791110242e-07, "loss": 0.2482, "step": 25963, "teacher_loss": 0.20603244006633759 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.23842298984527588, "learning_rate": 4.947832766255589e-07, "loss": 0.2127, "step": 25964, "teacher_loss": 0.20988646149635315 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.7933729887008667, "learning_rate": 4.942049067012205e-07, "loss": 0.2218, "step": 25965, "teacher_loss": 0.15832099318504333 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.6896498203277588, "learning_rate": 4.936268693512769e-07, "loss": 0.246, "step": 25966, "teacher_loss": 0.19675500690937042 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.37444615364074707, "learning_rate": 4.930491645889756e-07, "loss": 0.1961, "step": 25967, "teacher_loss": 0.17629742622375488 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.48462975025177, "learning_rate": 4.924717924275629e-07, "loss": 0.2869, "step": 25968, "teacher_loss": 0.2649174928665161 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.12423699349164963, "learning_rate": 4.918947528802748e-07, "loss": 0.2151, "step": 25969, "teacher_loss": 0.22523370385169983 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.3906821012496948, "learning_rate": 4.913180459603439e-07, "loss": 0.2013, "step": 25970, "teacher_loss": 0.18030408024787903 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.4863739609718323, "learning_rate": 4.907416716809898e-07, "loss": 0.1923, "step": 25971, "teacher_loss": 0.15963782370090485 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.38046571612358093, "learning_rate": 4.901656300554286e-07, "loss": 0.2491, "step": 25972, "teacher_loss": 0.2345171868801117 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.1948980987071991, "learning_rate": 4.89589921096868e-07, "loss": 0.156, "step": 25973, "teacher_loss": 0.15168017148971558 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.7786871194839478, "learning_rate": 4.890145448185041e-07, "loss": 0.2363, "step": 25974, "teacher_loss": 0.17602795362472534 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.28241682052612305, "learning_rate": 4.884395012335313e-07, "loss": 0.2092, "step": 25975, "teacher_loss": 0.20102903246879578 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.7610867619514465, "learning_rate": 4.878647903551342e-07, "loss": 0.2401, "step": 25976, "teacher_loss": 0.18215903639793396 }, { "compression_loss": 0.0, "epoch": 4.69, "label_loss": 0.37485358119010925, "learning_rate": 4.872904121964872e-07, "loss": 0.1723, "step": 25977, "teacher_loss": 0.14974729716777802 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.33419930934906006, "learning_rate": 4.867163667707564e-07, "loss": 0.1919, "step": 25978, "teacher_loss": 0.17606335878372192 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.4441710114479065, "learning_rate": 4.861426540911095e-07, "loss": 0.2016, "step": 25979, "teacher_loss": 0.17468340694904327 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.17039218544960022, "learning_rate": 4.85569274170698e-07, "loss": 0.1716, "step": 25980, "teacher_loss": 0.17173486948013306 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.20087842643260956, "learning_rate": 4.849962270226644e-07, "loss": 0.1825, "step": 25981, "teacher_loss": 0.18042081594467163 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.5726956725120544, "learning_rate": 4.844235126601482e-07, "loss": 0.2448, "step": 25982, "teacher_loss": 0.2083328664302826 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.41342154145240784, "learning_rate": 4.838511310962823e-07, "loss": 0.2378, "step": 25983, "teacher_loss": 0.21825474500656128 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.7882672548294067, "learning_rate": 4.83279082344188e-07, "loss": 0.245, "step": 25984, "teacher_loss": 0.1845826804637909 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.19746315479278564, "learning_rate": 4.827073664169812e-07, "loss": 0.1622, "step": 25985, "teacher_loss": 0.15832996368408203 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.3702055811882019, "learning_rate": 4.8213598332777e-07, "loss": 0.1977, "step": 25986, "teacher_loss": 0.17853793501853943 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.5302959680557251, "learning_rate": 4.815649330896521e-07, "loss": 0.2118, "step": 25987, "teacher_loss": 0.1764644831418991 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.15657463669776917, "learning_rate": 4.809942157157221e-07, "loss": 0.1356, "step": 25988, "teacher_loss": 0.1332722008228302 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.244287371635437, "learning_rate": 4.804238312190662e-07, "loss": 0.1509, "step": 25989, "teacher_loss": 0.14054623246192932 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.22543209791183472, "learning_rate": 4.798537796127589e-07, "loss": 0.1807, "step": 25990, "teacher_loss": 0.17573225498199463 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.342331200838089, "learning_rate": 4.792840609098715e-07, "loss": 0.2273, "step": 25991, "teacher_loss": 0.21451804041862488 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.6907057762145996, "learning_rate": 4.787146751234634e-07, "loss": 0.2637, "step": 25992, "teacher_loss": 0.2163037657737732 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.31875383853912354, "learning_rate": 4.781456222665925e-07, "loss": 0.1873, "step": 25993, "teacher_loss": 0.1726740002632141 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.4331517517566681, "learning_rate": 4.775769023523019e-07, "loss": 0.236, "step": 25994, "teacher_loss": 0.21414396166801453 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.48033493757247925, "learning_rate": 4.770085153936326e-07, "loss": 0.2239, "step": 25995, "teacher_loss": 0.19539423286914825 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.23985852301120758, "learning_rate": 4.7644046140361774e-07, "loss": 0.1422, "step": 25996, "teacher_loss": 0.1313505619764328 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.3673751652240753, "learning_rate": 4.758727403952784e-07, "loss": 0.1761, "step": 25997, "teacher_loss": 0.154887855052948 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.625605046749115, "learning_rate": 4.7530535238163087e-07, "loss": 0.2318, "step": 25998, "teacher_loss": 0.18803386390209198 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.4149981737136841, "learning_rate": 4.747382973756847e-07, "loss": 0.2519, "step": 25999, "teacher_loss": 0.23377925157546997 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.3353227376937866, "learning_rate": 4.7417157539044133e-07, "loss": 0.2534, "step": 26000, "teacher_loss": 0.2443416714668274 }, { "epoch": 4.7, "eval_exact_match": 80.51087984862819, "eval_f1": 87.69368864892034, "step": 26000 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.4277428388595581, "learning_rate": 4.7360518643889015e-07, "loss": 0.2453, "step": 26001, "teacher_loss": 0.2250259518623352 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.44357770681381226, "learning_rate": 4.730391305340226e-07, "loss": 0.2073, "step": 26002, "teacher_loss": 0.18109026551246643 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.27479088306427, "learning_rate": 4.724734076888132e-07, "loss": 0.1647, "step": 26003, "teacher_loss": 0.15249595046043396 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.42205357551574707, "learning_rate": 4.7190801791623326e-07, "loss": 0.1689, "step": 26004, "teacher_loss": 0.1408035159111023 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.7555962800979614, "learning_rate": 4.7134296122924246e-07, "loss": 0.2225, "step": 26005, "teacher_loss": 0.16331183910369873 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.23449909687042236, "learning_rate": 4.7077823764080043e-07, "loss": 0.2155, "step": 26006, "teacher_loss": 0.2133558839559555 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.3199350833892822, "learning_rate": 4.702138471638534e-07, "loss": 0.1913, "step": 26007, "teacher_loss": 0.17700400948524475 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.6562459468841553, "learning_rate": 4.6964978981133776e-07, "loss": 0.1941, "step": 26008, "teacher_loss": 0.14278317987918854 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.4295295476913452, "learning_rate": 4.6908606559618985e-07, "loss": 0.1941, "step": 26009, "teacher_loss": 0.16790884733200073 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.3958856463432312, "learning_rate": 4.6852267453133257e-07, "loss": 0.2367, "step": 26010, "teacher_loss": 0.21905523538589478 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.49395912885665894, "learning_rate": 4.679596166296807e-07, "loss": 0.3951, "step": 26011, "teacher_loss": 0.38416701555252075 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.38436219096183777, "learning_rate": 4.673968919041488e-07, "loss": 0.2148, "step": 26012, "teacher_loss": 0.19599804282188416 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.19209221005439758, "learning_rate": 4.668345003676333e-07, "loss": 0.1337, "step": 26013, "teacher_loss": 0.12716831266880035 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.3427913784980774, "learning_rate": 4.6627244203303055e-07, "loss": 0.1866, "step": 26014, "teacher_loss": 0.1692199558019638 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.12221767753362656, "learning_rate": 4.657107169132252e-07, "loss": 0.1393, "step": 26015, "teacher_loss": 0.14114607870578766 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.9144544005393982, "learning_rate": 4.65149325021097e-07, "loss": 0.2725, "step": 26016, "teacher_loss": 0.20118333399295807 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.17010632157325745, "learning_rate": 4.645882663695189e-07, "loss": 0.2034, "step": 26017, "teacher_loss": 0.20713108777999878 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.26602333784103394, "learning_rate": 4.640275409713507e-07, "loss": 0.1686, "step": 26018, "teacher_loss": 0.15772384405136108 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.34665191173553467, "learning_rate": 4.634671488394504e-07, "loss": 0.2201, "step": 26019, "teacher_loss": 0.20604264736175537 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.4486544728279114, "learning_rate": 4.629070899866661e-07, "loss": 0.1953, "step": 26020, "teacher_loss": 0.16715312004089355 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.4529132544994354, "learning_rate": 4.623473644258375e-07, "loss": 0.1911, "step": 26021, "teacher_loss": 0.1620243340730667 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.28592199087142944, "learning_rate": 4.617879721697976e-07, "loss": 0.1505, "step": 26022, "teacher_loss": 0.1354852020740509 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.177555114030838, "learning_rate": 4.6122891323137127e-07, "loss": 0.1726, "step": 26023, "teacher_loss": 0.1719955950975418 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.22664940357208252, "learning_rate": 4.6067018762337654e-07, "loss": 0.173, "step": 26024, "teacher_loss": 0.1670922040939331 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.5536371469497681, "learning_rate": 4.601117953586231e-07, "loss": 0.1714, "step": 26025, "teacher_loss": 0.12895441055297852 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.5180556774139404, "learning_rate": 4.5955373644991585e-07, "loss": 0.198, "step": 26026, "teacher_loss": 0.16249239444732666 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.15433071553707123, "learning_rate": 4.589960109100444e-07, "loss": 0.149, "step": 26027, "teacher_loss": 0.14841899275779724 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.47172582149505615, "learning_rate": 4.5843861875179694e-07, "loss": 0.1866, "step": 26028, "teacher_loss": 0.15489903092384338 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.6010135412216187, "learning_rate": 4.5788155998795653e-07, "loss": 0.2847, "step": 26029, "teacher_loss": 0.24952954053878784 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.34171411395072937, "learning_rate": 4.5732483463129305e-07, "loss": 0.141, "step": 26030, "teacher_loss": 0.11872003972530365 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.5686646699905396, "learning_rate": 4.5676844269456786e-07, "loss": 0.1879, "step": 26031, "teacher_loss": 0.14561530947685242 }, { "compression_loss": 0.0, "epoch": 4.7, "label_loss": 0.2814827263355255, "learning_rate": 4.562123841905391e-07, "loss": 0.1975, "step": 26032, "teacher_loss": 0.18817178905010223 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.2973589599132538, "learning_rate": 4.556566591319583e-07, "loss": 0.1311, "step": 26033, "teacher_loss": 0.11266306787729263 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.6057367324829102, "learning_rate": 4.551012675315619e-07, "loss": 0.2224, "step": 26034, "teacher_loss": 0.17985178530216217 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.08601963520050049, "learning_rate": 4.5454620940208634e-07, "loss": 0.1304, "step": 26035, "teacher_loss": 0.13536955416202545 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.7770828604698181, "learning_rate": 4.5399148475625816e-07, "loss": 0.2458, "step": 26036, "teacher_loss": 0.1867215633392334 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.20638640224933624, "learning_rate": 4.534370936067922e-07, "loss": 0.123, "step": 26037, "teacher_loss": 0.11370199918746948 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.3812705874443054, "learning_rate": 4.528830359663999e-07, "loss": 0.2054, "step": 26038, "teacher_loss": 0.18582221865653992 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.48597636818885803, "learning_rate": 4.5232931184778783e-07, "loss": 0.2691, "step": 26039, "teacher_loss": 0.2450440376996994 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.35701578855514526, "learning_rate": 4.5177592126364744e-07, "loss": 0.2012, "step": 26040, "teacher_loss": 0.18386715650558472 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.2949666380882263, "learning_rate": 4.512228642266653e-07, "loss": 0.2485, "step": 26041, "teacher_loss": 0.243296816945076 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.3568854331970215, "learning_rate": 4.506701407495245e-07, "loss": 0.2041, "step": 26042, "teacher_loss": 0.18707525730133057 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.26589435338974, "learning_rate": 4.5011775084489837e-07, "loss": 0.2396, "step": 26043, "teacher_loss": 0.23667283356189728 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.16364212334156036, "learning_rate": 4.495656945254467e-07, "loss": 0.1627, "step": 26044, "teacher_loss": 0.16265049576759338 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.11040471494197845, "learning_rate": 4.490139718038294e-07, "loss": 0.1214, "step": 26045, "teacher_loss": 0.12262696027755737 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.6463939547538757, "learning_rate": 4.48462582692698e-07, "loss": 0.3244, "step": 26046, "teacher_loss": 0.2885853052139282 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.45146292448043823, "learning_rate": 4.479115272046891e-07, "loss": 0.2308, "step": 26047, "teacher_loss": 0.20631471276283264 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.49363085627555847, "learning_rate": 4.4736080535244084e-07, "loss": 0.1927, "step": 26048, "teacher_loss": 0.1592334806919098 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.4053407907485962, "learning_rate": 4.468104171485782e-07, "loss": 0.2322, "step": 26049, "teacher_loss": 0.21292461454868317 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.3578922748565674, "learning_rate": 4.4626036260571937e-07, "loss": 0.3275, "step": 26050, "teacher_loss": 0.32410258054733276 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.2743688225746155, "learning_rate": 4.457106417364759e-07, "loss": 0.2011, "step": 26051, "teacher_loss": 0.19299355149269104 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.262364000082016, "learning_rate": 4.451612545534528e-07, "loss": 0.1854, "step": 26052, "teacher_loss": 0.17681050300598145 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.31732919812202454, "learning_rate": 4.4461220106924494e-07, "loss": 0.1853, "step": 26053, "teacher_loss": 0.17058789730072021 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 1.1371077299118042, "learning_rate": 4.440634812964373e-07, "loss": 0.3412, "step": 26054, "teacher_loss": 0.25280189514160156 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.5069007873535156, "learning_rate": 4.4351509524761466e-07, "loss": 0.3737, "step": 26055, "teacher_loss": 0.35888397693634033 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.5399612188339233, "learning_rate": 4.4296704293534886e-07, "loss": 0.2601, "step": 26056, "teacher_loss": 0.22903326153755188 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.27342677116394043, "learning_rate": 4.42419324372203e-07, "loss": 0.2176, "step": 26057, "teacher_loss": 0.21142691373825073 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.3868868947029114, "learning_rate": 4.418719395707355e-07, "loss": 0.22, "step": 26058, "teacher_loss": 0.20146706700325012 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.5153133273124695, "learning_rate": 4.413248885434995e-07, "loss": 0.2056, "step": 26059, "teacher_loss": 0.17118126153945923 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.48697638511657715, "learning_rate": 4.4077817130303344e-07, "loss": 0.2276, "step": 26060, "teacher_loss": 0.19880172610282898 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.768515408039093, "learning_rate": 4.402317878618722e-07, "loss": 0.2387, "step": 26061, "teacher_loss": 0.17979514598846436 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.5323460102081299, "learning_rate": 4.396857382325459e-07, "loss": 0.1754, "step": 26062, "teacher_loss": 0.13569381833076477 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 1.0058705806732178, "learning_rate": 4.3914002242756934e-07, "loss": 0.3528, "step": 26063, "teacher_loss": 0.28025805950164795 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.4521084725856781, "learning_rate": 4.385946404594576e-07, "loss": 0.2267, "step": 26064, "teacher_loss": 0.2017059475183487 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.27968937158584595, "learning_rate": 4.380495923407124e-07, "loss": 0.1706, "step": 26065, "teacher_loss": 0.1584520936012268 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.45004379749298096, "learning_rate": 4.3750487808383033e-07, "loss": 0.2086, "step": 26066, "teacher_loss": 0.1817297786474228 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.2527501583099365, "learning_rate": 4.3696049770130307e-07, "loss": 0.1943, "step": 26067, "teacher_loss": 0.18780942261219025 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.605048656463623, "learning_rate": 4.364164512056074e-07, "loss": 0.3141, "step": 26068, "teacher_loss": 0.2817923426628113 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.7627030611038208, "learning_rate": 4.3587273860921985e-07, "loss": 0.2592, "step": 26069, "teacher_loss": 0.20321446657180786 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.18822041153907776, "learning_rate": 4.353293599246055e-07, "loss": 0.1578, "step": 26070, "teacher_loss": 0.15437328815460205 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.2526842951774597, "learning_rate": 4.3478631516421954e-07, "loss": 0.1656, "step": 26071, "teacher_loss": 0.15592005848884583 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.34720659255981445, "learning_rate": 4.342436043405168e-07, "loss": 0.1882, "step": 26072, "teacher_loss": 0.1705521047115326 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.2369542121887207, "learning_rate": 4.3370122746593575e-07, "loss": 0.1394, "step": 26073, "teacher_loss": 0.12855711579322815 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.3447191119194031, "learning_rate": 4.331591845529148e-07, "loss": 0.197, "step": 26074, "teacher_loss": 0.18061551451683044 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.2550721764564514, "learning_rate": 4.3261747561388063e-07, "loss": 0.2302, "step": 26075, "teacher_loss": 0.2274158000946045 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.6777764558792114, "learning_rate": 4.320761006612517e-07, "loss": 0.2143, "step": 26076, "teacher_loss": 0.16282817721366882 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.42767333984375, "learning_rate": 4.315350597074413e-07, "loss": 0.2211, "step": 26077, "teacher_loss": 0.19819800555706024 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.7305760383605957, "learning_rate": 4.30994352764853e-07, "loss": 0.3295, "step": 26078, "teacher_loss": 0.284932017326355 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.9923133850097656, "learning_rate": 4.304539798458834e-07, "loss": 0.2509, "step": 26079, "teacher_loss": 0.1685442328453064 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.6449244618415833, "learning_rate": 4.299139409629244e-07, "loss": 0.293, "step": 26080, "teacher_loss": 0.2539238929748535 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.2612766921520233, "learning_rate": 4.293742361283526e-07, "loss": 0.1645, "step": 26081, "teacher_loss": 0.15378305315971375 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.16080445051193237, "learning_rate": 4.2883486535454483e-07, "loss": 0.126, "step": 26082, "teacher_loss": 0.12217552214860916 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.3531619608402252, "learning_rate": 4.282958286538696e-07, "loss": 0.204, "step": 26083, "teacher_loss": 0.1874670386314392 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.6026806831359863, "learning_rate": 4.2775712603868036e-07, "loss": 0.2629, "step": 26084, "teacher_loss": 0.22519998252391815 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.873207688331604, "learning_rate": 4.2721875752133044e-07, "loss": 0.3177, "step": 26085, "teacher_loss": 0.25598400831222534 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.6222789883613586, "learning_rate": 4.2668072311416504e-07, "loss": 0.2591, "step": 26086, "teacher_loss": 0.21873164176940918 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.6674565076828003, "learning_rate": 4.2614302282951766e-07, "loss": 0.3274, "step": 26087, "teacher_loss": 0.28959396481513977 }, { "compression_loss": 0.0, "epoch": 4.71, "label_loss": 0.3951818346977234, "learning_rate": 4.2560565667971173e-07, "loss": 0.2112, "step": 26088, "teacher_loss": 0.19078782200813293 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.43137240409851074, "learning_rate": 4.2506862467707575e-07, "loss": 0.2823, "step": 26089, "teacher_loss": 0.26569920778274536 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.21788080036640167, "learning_rate": 4.245319268339198e-07, "loss": 0.2099, "step": 26090, "teacher_loss": 0.20898719131946564 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.42294490337371826, "learning_rate": 4.2399556316254254e-07, "loss": 0.2089, "step": 26091, "teacher_loss": 0.1850709468126297 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.11135952174663544, "learning_rate": 4.234595336752489e-07, "loss": 0.1277, "step": 26092, "teacher_loss": 0.12953317165374756 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.5950625538825989, "learning_rate": 4.2292383838432593e-07, "loss": 0.2582, "step": 26093, "teacher_loss": 0.2207736223936081 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.6008852124214172, "learning_rate": 4.22388477302052e-07, "loss": 0.2014, "step": 26094, "teacher_loss": 0.1570214331150055 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.6806545257568359, "learning_rate": 4.218534504407057e-07, "loss": 0.3025, "step": 26095, "teacher_loss": 0.2604466676712036 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.6116722226142883, "learning_rate": 4.213187578125538e-07, "loss": 0.2604, "step": 26096, "teacher_loss": 0.22138960659503937 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.28433072566986084, "learning_rate": 4.207843994298516e-07, "loss": 0.2057, "step": 26097, "teacher_loss": 0.19693398475646973 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.401202917098999, "learning_rate": 4.2025037530485256e-07, "loss": 0.2275, "step": 26098, "teacher_loss": 0.20817461609840393 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.3467712700366974, "learning_rate": 4.197166854498019e-07, "loss": 0.163, "step": 26099, "teacher_loss": 0.14263172447681427 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.2930585741996765, "learning_rate": 4.191833298769332e-07, "loss": 0.1743, "step": 26100, "teacher_loss": 0.16114750504493713 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.38431665301322937, "learning_rate": 4.186503085984733e-07, "loss": 0.2414, "step": 26101, "teacher_loss": 0.22548821568489075 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.14645330607891083, "learning_rate": 4.181176216266458e-07, "loss": 0.1307, "step": 26102, "teacher_loss": 0.1289098858833313 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.14754296839237213, "learning_rate": 4.175852689736642e-07, "loss": 0.1626, "step": 26103, "teacher_loss": 0.16422373056411743 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 1.0099220275878906, "learning_rate": 4.1705325065172883e-07, "loss": 0.2714, "step": 26104, "teacher_loss": 0.18939325213432312 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.6340831518173218, "learning_rate": 4.165215666730415e-07, "loss": 0.2488, "step": 26105, "teacher_loss": 0.20595046877861023 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.41640201210975647, "learning_rate": 4.1599021704979257e-07, "loss": 0.1957, "step": 26106, "teacher_loss": 0.1711452603340149 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.565737247467041, "learning_rate": 4.154592017941622e-07, "loss": 0.2176, "step": 26107, "teacher_loss": 0.1788749396800995 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.2760319709777832, "learning_rate": 4.14928520918324e-07, "loss": 0.2199, "step": 26108, "teacher_loss": 0.21361616253852844 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.25048866868019104, "learning_rate": 4.143981744344483e-07, "loss": 0.165, "step": 26109, "teacher_loss": 0.15544962882995605 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.41958439350128174, "learning_rate": 4.138681623546919e-07, "loss": 0.1934, "step": 26110, "teacher_loss": 0.16822870075702667 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.33993959426879883, "learning_rate": 4.133384846912069e-07, "loss": 0.1954, "step": 26111, "teacher_loss": 0.17929381132125854 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.35421645641326904, "learning_rate": 4.128091414561386e-07, "loss": 0.2196, "step": 26112, "teacher_loss": 0.2045917510986328 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.624624490737915, "learning_rate": 4.1228013266162044e-07, "loss": 0.2884, "step": 26113, "teacher_loss": 0.2510247230529785 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.6498924493789673, "learning_rate": 4.1175145831978454e-07, "loss": 0.3514, "step": 26114, "teacher_loss": 0.3182254433631897 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.24432280659675598, "learning_rate": 4.1122311844274786e-07, "loss": 0.1871, "step": 26115, "teacher_loss": 0.18077883124351501 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.3105872869491577, "learning_rate": 4.106951130426273e-07, "loss": 0.2795, "step": 26116, "teacher_loss": 0.2759951055049896 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.6618967056274414, "learning_rate": 4.101674421315249e-07, "loss": 0.2313, "step": 26117, "teacher_loss": 0.1834862232208252 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.460035502910614, "learning_rate": 4.0964010572154096e-07, "loss": 0.2017, "step": 26118, "teacher_loss": 0.17295563220977783 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.4694027304649353, "learning_rate": 4.0911310382476754e-07, "loss": 0.2766, "step": 26119, "teacher_loss": 0.2551938593387604 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.2026854008436203, "learning_rate": 4.0858643645328155e-07, "loss": 0.158, "step": 26120, "teacher_loss": 0.15297973155975342 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.428189754486084, "learning_rate": 4.080601036191617e-07, "loss": 0.3135, "step": 26121, "teacher_loss": 0.3007040023803711 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.3920612335205078, "learning_rate": 4.0753410533447665e-07, "loss": 0.2, "step": 26122, "teacher_loss": 0.1787053346633911 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.12669916450977325, "learning_rate": 4.0700844161128345e-07, "loss": 0.1383, "step": 26123, "teacher_loss": 0.13960862159729004 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.8708757162094116, "learning_rate": 4.0648311246163416e-07, "loss": 0.2849, "step": 26124, "teacher_loss": 0.21980786323547363 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.8162716627120972, "learning_rate": 4.059581178975741e-07, "loss": 0.279, "step": 26125, "teacher_loss": 0.21927893161773682 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.6057848930358887, "learning_rate": 4.054334579311386e-07, "loss": 0.3848, "step": 26126, "teacher_loss": 0.3601934611797333 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.4929184317588806, "learning_rate": 4.0490913257435813e-07, "loss": 0.2869, "step": 26127, "teacher_loss": 0.26400449872016907 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.6739277243614197, "learning_rate": 4.0438514183925135e-07, "loss": 0.2495, "step": 26128, "teacher_loss": 0.20239630341529846 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.42070162296295166, "learning_rate": 4.038614857378337e-07, "loss": 0.225, "step": 26129, "teacher_loss": 0.20326349139213562 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.31354451179504395, "learning_rate": 4.033381642821121e-07, "loss": 0.182, "step": 26130, "teacher_loss": 0.1674191653728485 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.40662646293640137, "learning_rate": 4.0281517748408217e-07, "loss": 0.2133, "step": 26131, "teacher_loss": 0.19178417325019836 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.4946771562099457, "learning_rate": 4.0229252535573413e-07, "loss": 0.2249, "step": 26132, "teacher_loss": 0.19493868947029114 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.5274799466133118, "learning_rate": 4.017702079090552e-07, "loss": 0.1977, "step": 26133, "teacher_loss": 0.16105802357196808 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.3925546109676361, "learning_rate": 4.012482251560157e-07, "loss": 0.1737, "step": 26134, "teacher_loss": 0.14933812618255615 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.41620999574661255, "learning_rate": 4.0072657710858776e-07, "loss": 0.2483, "step": 26135, "teacher_loss": 0.22965747117996216 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.41546422243118286, "learning_rate": 4.002052637787251e-07, "loss": 0.1976, "step": 26136, "teacher_loss": 0.17342418432235718 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.4069422483444214, "learning_rate": 3.9968428517838495e-07, "loss": 0.1785, "step": 26137, "teacher_loss": 0.15311656892299652 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.3098655939102173, "learning_rate": 3.991636413195093e-07, "loss": 0.2085, "step": 26138, "teacher_loss": 0.19723233580589294 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.28139638900756836, "learning_rate": 3.98643332214037e-07, "loss": 0.2009, "step": 26139, "teacher_loss": 0.19198496639728546 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.2162252813577652, "learning_rate": 3.981233578738952e-07, "loss": 0.2662, "step": 26140, "teacher_loss": 0.2717888355255127 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.255289226770401, "learning_rate": 3.9760371831100594e-07, "loss": 0.2187, "step": 26141, "teacher_loss": 0.21460360288619995 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.4985791742801666, "learning_rate": 3.970844135372831e-07, "loss": 0.186, "step": 26142, "teacher_loss": 0.1512521356344223 }, { "compression_loss": 0.0, "epoch": 4.72, "label_loss": 0.7005516290664673, "learning_rate": 3.9656544356463375e-07, "loss": 0.2162, "step": 26143, "teacher_loss": 0.1623440384864807 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 1.2586867809295654, "learning_rate": 3.9604680840495345e-07, "loss": 0.3848, "step": 26144, "teacher_loss": 0.2877376079559326 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.18884801864624023, "learning_rate": 3.9552850807013595e-07, "loss": 0.1459, "step": 26145, "teacher_loss": 0.14108259975910187 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.2994023859500885, "learning_rate": 3.9501054257206514e-07, "loss": 0.3531, "step": 26146, "teacher_loss": 0.35908254981040955 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.5666463375091553, "learning_rate": 3.9449291192261317e-07, "loss": 0.3425, "step": 26147, "teacher_loss": 0.31757134199142456 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.6052114963531494, "learning_rate": 3.939756161336472e-07, "loss": 0.272, "step": 26148, "teacher_loss": 0.23494423925876617 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.17341214418411255, "learning_rate": 3.9345865521703273e-07, "loss": 0.2166, "step": 26149, "teacher_loss": 0.22141587734222412 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.3797691762447357, "learning_rate": 3.9294202918461694e-07, "loss": 0.2345, "step": 26150, "teacher_loss": 0.21834754943847656 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.7920696139335632, "learning_rate": 3.9242573804824543e-07, "loss": 0.3129, "step": 26151, "teacher_loss": 0.2597135305404663 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.48059970140457153, "learning_rate": 3.91909781819757e-07, "loss": 0.1888, "step": 26152, "teacher_loss": 0.15642619132995605 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.4991934299468994, "learning_rate": 3.9139416051098053e-07, "loss": 0.2204, "step": 26153, "teacher_loss": 0.18938395380973816 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.6187795996665955, "learning_rate": 3.908788741337349e-07, "loss": 0.3121, "step": 26154, "teacher_loss": 0.27805233001708984 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.3074015974998474, "learning_rate": 3.9036392269983736e-07, "loss": 0.2087, "step": 26155, "teacher_loss": 0.19772422313690186 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.7527005076408386, "learning_rate": 3.8984930622109507e-07, "loss": 0.2548, "step": 26156, "teacher_loss": 0.19952982664108276 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.17522814869880676, "learning_rate": 3.893350247093019e-07, "loss": 0.2196, "step": 26157, "teacher_loss": 0.22454112768173218 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.31965523958206177, "learning_rate": 3.888210781762519e-07, "loss": 0.3283, "step": 26158, "teacher_loss": 0.3292592167854309 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.5545949339866638, "learning_rate": 3.8830746663372886e-07, "loss": 0.2564, "step": 26159, "teacher_loss": 0.2232506275177002 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.35756444931030273, "learning_rate": 3.8779419009350837e-07, "loss": 0.1973, "step": 26160, "teacher_loss": 0.17946264147758484 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.5310564041137695, "learning_rate": 3.8728124856735435e-07, "loss": 0.2763, "step": 26161, "teacher_loss": 0.24804642796516418 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.19012349843978882, "learning_rate": 3.867686420670341e-07, "loss": 0.1359, "step": 26162, "teacher_loss": 0.12989000976085663 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.26397842168807983, "learning_rate": 3.862563706042949e-07, "loss": 0.1557, "step": 26163, "teacher_loss": 0.1436896026134491 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.731393575668335, "learning_rate": 3.8574443419088057e-07, "loss": 0.2783, "step": 26164, "teacher_loss": 0.22794075310230255 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.11459334194660187, "learning_rate": 3.852328328385318e-07, "loss": 0.1372, "step": 26165, "teacher_loss": 0.13972651958465576 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.3915087580680847, "learning_rate": 3.8472156655897926e-07, "loss": 0.1979, "step": 26166, "teacher_loss": 0.17642842233181 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.25729283690452576, "learning_rate": 3.842106353639385e-07, "loss": 0.2336, "step": 26167, "teacher_loss": 0.230980783700943 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.4644719958305359, "learning_rate": 3.837000392651285e-07, "loss": 0.2421, "step": 26168, "teacher_loss": 0.21739768981933594 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.19753041863441467, "learning_rate": 3.831897782742566e-07, "loss": 0.1821, "step": 26169, "teacher_loss": 0.1803959608078003 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.9388679265975952, "learning_rate": 3.826798524030184e-07, "loss": 0.3157, "step": 26170, "teacher_loss": 0.24650216102600098 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.6640670299530029, "learning_rate": 3.8217026166310454e-07, "loss": 0.24, "step": 26171, "teacher_loss": 0.19287711381912231 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.48650598526000977, "learning_rate": 3.8166100606620234e-07, "loss": 0.2109, "step": 26172, "teacher_loss": 0.18028786778450012 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.28243106603622437, "learning_rate": 3.8115208562398417e-07, "loss": 0.3322, "step": 26173, "teacher_loss": 0.33770421147346497 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.5037869215011597, "learning_rate": 3.80643500348119e-07, "loss": 0.18, "step": 26174, "teacher_loss": 0.14400310814380646 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.33066973090171814, "learning_rate": 3.8013525025026916e-07, "loss": 0.1794, "step": 26175, "teacher_loss": 0.16258709132671356 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.2360270917415619, "learning_rate": 3.7962733534208536e-07, "loss": 0.1916, "step": 26176, "teacher_loss": 0.18664349615573883 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.21871665120124817, "learning_rate": 3.7911975563520995e-07, "loss": 0.2018, "step": 26177, "teacher_loss": 0.1999211609363556 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.3610538840293884, "learning_rate": 3.786125111412836e-07, "loss": 0.2254, "step": 26178, "teacher_loss": 0.21032965183258057 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.23414433002471924, "learning_rate": 3.78105601871937e-07, "loss": 0.2504, "step": 26179, "teacher_loss": 0.25220608711242676 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.5131825804710388, "learning_rate": 3.775990278387875e-07, "loss": 0.1804, "step": 26180, "teacher_loss": 0.1434488743543625 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.27662405371665955, "learning_rate": 3.7709278905345255e-07, "loss": 0.1833, "step": 26181, "teacher_loss": 0.17290785908699036 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.23825427889823914, "learning_rate": 3.7658688552754116e-07, "loss": 0.1539, "step": 26182, "teacher_loss": 0.14451012015342712 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.1817144751548767, "learning_rate": 3.760813172726457e-07, "loss": 0.1536, "step": 26183, "teacher_loss": 0.15052466094493866 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.406525194644928, "learning_rate": 3.755760843003619e-07, "loss": 0.2731, "step": 26184, "teacher_loss": 0.2582969665527344 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.2960628569126129, "learning_rate": 3.750711866222739e-07, "loss": 0.1623, "step": 26185, "teacher_loss": 0.14741212129592896 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.19659802317619324, "learning_rate": 3.74566624249954e-07, "loss": 0.1973, "step": 26186, "teacher_loss": 0.19743013381958008 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.46065518260002136, "learning_rate": 3.74062397194973e-07, "loss": 0.2236, "step": 26187, "teacher_loss": 0.1972934901714325 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.1878177523612976, "learning_rate": 3.7355850546889e-07, "loss": 0.1964, "step": 26188, "teacher_loss": 0.19739489257335663 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.2511180639266968, "learning_rate": 3.7305494908325744e-07, "loss": 0.1808, "step": 26189, "teacher_loss": 0.17302405834197998 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.2679293155670166, "learning_rate": 3.725517280496227e-07, "loss": 0.2293, "step": 26190, "teacher_loss": 0.22504764795303345 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.2622528374195099, "learning_rate": 3.7204884237951987e-07, "loss": 0.1406, "step": 26191, "teacher_loss": 0.127126544713974 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.47975707054138184, "learning_rate": 3.7154629208447974e-07, "loss": 0.2444, "step": 26192, "teacher_loss": 0.21826599538326263 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.12180589139461517, "learning_rate": 3.7104407717602815e-07, "loss": 0.1292, "step": 26193, "teacher_loss": 0.13007289171218872 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.8054293990135193, "learning_rate": 3.7054219766567256e-07, "loss": 0.3094, "step": 26194, "teacher_loss": 0.2542540431022644 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.564100444316864, "learning_rate": 3.700406535649237e-07, "loss": 0.2136, "step": 26195, "teacher_loss": 0.1746487021446228 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.4617871642112732, "learning_rate": 3.695394448852807e-07, "loss": 0.1909, "step": 26196, "teacher_loss": 0.1607992947101593 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.27024996280670166, "learning_rate": 3.6903857163823274e-07, "loss": 0.211, "step": 26197, "teacher_loss": 0.20439936220645905 }, { "compression_loss": 0.0, "epoch": 4.73, "label_loss": 0.11742550134658813, "learning_rate": 3.6853803383526565e-07, "loss": 0.1543, "step": 26198, "teacher_loss": 0.15844564139842987 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.40123188495635986, "learning_rate": 3.680378314878535e-07, "loss": 0.2098, "step": 26199, "teacher_loss": 0.188495472073555 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.6872580051422119, "learning_rate": 3.675379646074656e-07, "loss": 0.2847, "step": 26200, "teacher_loss": 0.23996597528457642 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.6345182657241821, "learning_rate": 3.67038433205561e-07, "loss": 0.2273, "step": 26201, "teacher_loss": 0.1820010393857956 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.39220041036605835, "learning_rate": 3.665392372935922e-07, "loss": 0.3509, "step": 26202, "teacher_loss": 0.3463330864906311 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.112884022295475, "learning_rate": 3.6604037688300676e-07, "loss": 0.1651, "step": 26203, "teacher_loss": 0.1709403693675995 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.38889506459236145, "learning_rate": 3.655418519852405e-07, "loss": 0.2284, "step": 26204, "teacher_loss": 0.2105635404586792 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.33332061767578125, "learning_rate": 3.6504366261172263e-07, "loss": 0.2127, "step": 26205, "teacher_loss": 0.1992582380771637 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.5565102100372314, "learning_rate": 3.6454580877387567e-07, "loss": 0.2361, "step": 26206, "teacher_loss": 0.20051544904708862 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.16748939454555511, "learning_rate": 3.6404829048311384e-07, "loss": 0.1792, "step": 26207, "teacher_loss": 0.18048053979873657 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.3263118863105774, "learning_rate": 3.6355110775084467e-07, "loss": 0.1656, "step": 26208, "teacher_loss": 0.14771801233291626 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.3977610170841217, "learning_rate": 3.630542605884657e-07, "loss": 0.1966, "step": 26209, "teacher_loss": 0.17423516511917114 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.14937418699264526, "learning_rate": 3.625577490073695e-07, "loss": 0.1239, "step": 26210, "teacher_loss": 0.12106631696224213 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.3014680743217468, "learning_rate": 3.6206157301893695e-07, "loss": 0.1704, "step": 26211, "teacher_loss": 0.1558433622121811 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.5079413652420044, "learning_rate": 3.6156573263454727e-07, "loss": 0.2183, "step": 26212, "teacher_loss": 0.1861562728881836 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.46815937757492065, "learning_rate": 3.610702278655681e-07, "loss": 0.1905, "step": 26213, "teacher_loss": 0.15967296063899994 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.30157017707824707, "learning_rate": 3.6057505872335527e-07, "loss": 0.2238, "step": 26214, "teacher_loss": 0.21517033874988556 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.4151687026023865, "learning_rate": 3.600802252192681e-07, "loss": 0.2399, "step": 26215, "teacher_loss": 0.22041943669319153 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.7945215702056885, "learning_rate": 3.5958572736464913e-07, "loss": 0.2648, "step": 26216, "teacher_loss": 0.205949604511261 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.42355990409851074, "learning_rate": 3.5909156517083266e-07, "loss": 0.2111, "step": 26217, "teacher_loss": 0.18747074902057648 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.2900615930557251, "learning_rate": 3.585977386491512e-07, "loss": 0.1492, "step": 26218, "teacher_loss": 0.1335279792547226 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.27687835693359375, "learning_rate": 3.5810424781092754e-07, "loss": 0.1833, "step": 26219, "teacher_loss": 0.17291682958602905 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.5882298946380615, "learning_rate": 3.5761109266747417e-07, "loss": 0.2302, "step": 26220, "teacher_loss": 0.1904669553041458 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.5582160949707031, "learning_rate": 3.571182732300987e-07, "loss": 0.2034, "step": 26221, "teacher_loss": 0.1639569252729416 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.5095123052597046, "learning_rate": 3.566257895101005e-07, "loss": 0.2756, "step": 26222, "teacher_loss": 0.2496471405029297 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.4840032458305359, "learning_rate": 3.561336415187688e-07, "loss": 0.194, "step": 26223, "teacher_loss": 0.16177856922149658 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.2597351670265198, "learning_rate": 3.556418292673863e-07, "loss": 0.1968, "step": 26224, "teacher_loss": 0.18977373838424683 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.43789416551589966, "learning_rate": 3.55150352767234e-07, "loss": 0.2061, "step": 26225, "teacher_loss": 0.1803433895111084 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.3409907817840576, "learning_rate": 3.546592120295744e-07, "loss": 0.147, "step": 26226, "teacher_loss": 0.12547388672828674 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.6100951433181763, "learning_rate": 3.5416840706567035e-07, "loss": 0.3121, "step": 26227, "teacher_loss": 0.27901214361190796 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.35740572214126587, "learning_rate": 3.5367793788677437e-07, "loss": 0.1897, "step": 26228, "teacher_loss": 0.1710374504327774 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 1.332581877708435, "learning_rate": 3.5318780450413255e-07, "loss": 0.4067, "step": 26229, "teacher_loss": 0.3038719594478607 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.4084565043449402, "learning_rate": 3.5269800692897925e-07, "loss": 0.2451, "step": 26230, "teacher_loss": 0.2269318401813507 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.24748177826404572, "learning_rate": 3.522085451725454e-07, "loss": 0.242, "step": 26231, "teacher_loss": 0.24144527316093445 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.45086929202079773, "learning_rate": 3.5171941924605543e-07, "loss": 0.2021, "step": 26232, "teacher_loss": 0.17442019283771515 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.36637169122695923, "learning_rate": 3.5123062916072033e-07, "loss": 0.1946, "step": 26233, "teacher_loss": 0.17552852630615234 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.6955926418304443, "learning_rate": 3.5074217492774616e-07, "loss": 0.2219, "step": 26234, "teacher_loss": 0.1692207157611847 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.6517864465713501, "learning_rate": 3.5025405655833564e-07, "loss": 0.2825, "step": 26235, "teacher_loss": 0.24144214391708374 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.4459074139595032, "learning_rate": 3.4976627406367477e-07, "loss": 0.2516, "step": 26236, "teacher_loss": 0.2300581932067871 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.29085665941238403, "learning_rate": 3.49278827454953e-07, "loss": 0.2075, "step": 26237, "teacher_loss": 0.1982668936252594 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.4197015166282654, "learning_rate": 3.4879171674333967e-07, "loss": 0.1982, "step": 26238, "teacher_loss": 0.17358699440956116 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.20697128772735596, "learning_rate": 3.483049419400075e-07, "loss": 0.1879, "step": 26239, "teacher_loss": 0.18579816818237305 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.5080598592758179, "learning_rate": 3.478185030561126e-07, "loss": 0.2192, "step": 26240, "teacher_loss": 0.18715938925743103 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.16831433773040771, "learning_rate": 3.473324001028111e-07, "loss": 0.1319, "step": 26241, "teacher_loss": 0.12785357236862183 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.2480318248271942, "learning_rate": 3.468466330912473e-07, "loss": 0.2338, "step": 26242, "teacher_loss": 0.2322331815958023 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.5446599721908569, "learning_rate": 3.463612020325574e-07, "loss": 0.169, "step": 26243, "teacher_loss": 0.12728184461593628 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.42143183946609497, "learning_rate": 3.4587610693787075e-07, "loss": 0.1947, "step": 26244, "teacher_loss": 0.1694851815700531 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.4771069586277008, "learning_rate": 3.453913478183102e-07, "loss": 0.212, "step": 26245, "teacher_loss": 0.18257340788841248 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.2887862026691437, "learning_rate": 3.449069246849901e-07, "loss": 0.1565, "step": 26246, "teacher_loss": 0.1417803019285202 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.2929953336715698, "learning_rate": 3.444228375490133e-07, "loss": 0.2148, "step": 26247, "teacher_loss": 0.20610585808753967 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.21139714121818542, "learning_rate": 3.439390864214842e-07, "loss": 0.1808, "step": 26248, "teacher_loss": 0.17742875218391418 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.39774900674819946, "learning_rate": 3.4345567131348896e-07, "loss": 0.1762, "step": 26249, "teacher_loss": 0.1515520066022873 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.4918766915798187, "learning_rate": 3.4297259223611376e-07, "loss": 0.2063, "step": 26250, "teacher_loss": 0.17459869384765625 }, { "epoch": 4.74, "eval_exact_match": 80.69063386944181, "eval_f1": 87.91655648696916, "step": 26250 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.4254281520843506, "learning_rate": 3.4248984920043137e-07, "loss": 0.1926, "step": 26251, "teacher_loss": 0.16677838563919067 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.9359781742095947, "learning_rate": 3.4200744221751125e-07, "loss": 0.2905, "step": 26252, "teacher_loss": 0.21872839331626892 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.4856973886489868, "learning_rate": 3.415253712984162e-07, "loss": 0.1758, "step": 26253, "teacher_loss": 0.14137227833271027 }, { "compression_loss": 0.0, "epoch": 4.74, "label_loss": 0.9807087779045105, "learning_rate": 3.4104363645419246e-07, "loss": 0.3067, "step": 26254, "teacher_loss": 0.2318429946899414 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.18705926835536957, "learning_rate": 3.4056223769588944e-07, "loss": 0.181, "step": 26255, "teacher_loss": 0.1802825629711151 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.36990833282470703, "learning_rate": 3.4008117503454506e-07, "loss": 0.2258, "step": 26256, "teacher_loss": 0.20978787541389465 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.44578301906585693, "learning_rate": 3.396004484811838e-07, "loss": 0.1968, "step": 26257, "teacher_loss": 0.16919000446796417 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.2843993902206421, "learning_rate": 3.391200580468318e-07, "loss": 0.1589, "step": 26258, "teacher_loss": 0.1449570506811142 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.5973250865936279, "learning_rate": 3.386400037425019e-07, "loss": 0.2229, "step": 26259, "teacher_loss": 0.18125376105308533 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.2596374452114105, "learning_rate": 3.381602855791988e-07, "loss": 0.1482, "step": 26260, "teacher_loss": 0.13577735424041748 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 1.0355414152145386, "learning_rate": 3.376809035679218e-07, "loss": 0.219, "step": 26261, "teacher_loss": 0.1282472312450409 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.4196634292602539, "learning_rate": 3.372018577196606e-07, "loss": 0.2368, "step": 26262, "teacher_loss": 0.21652746200561523 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.4443909525871277, "learning_rate": 3.367231480454014e-07, "loss": 0.2303, "step": 26263, "teacher_loss": 0.20652136206626892 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.3333846628665924, "learning_rate": 3.36244774556117e-07, "loss": 0.2416, "step": 26264, "teacher_loss": 0.23137059807777405 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.7903521060943604, "learning_rate": 3.357667372627754e-07, "loss": 0.2566, "step": 26265, "teacher_loss": 0.19729208946228027 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.2967739999294281, "learning_rate": 3.352890361763378e-07, "loss": 0.2082, "step": 26266, "teacher_loss": 0.19836321473121643 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.4878973960876465, "learning_rate": 3.348116713077537e-07, "loss": 0.2785, "step": 26267, "teacher_loss": 0.2552553713321686 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.6344353556632996, "learning_rate": 3.3433464266796945e-07, "loss": 0.2351, "step": 26268, "teacher_loss": 0.19077935814857483 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.16268765926361084, "learning_rate": 3.3385795026792454e-07, "loss": 0.1362, "step": 26269, "teacher_loss": 0.1332828551530838 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.4555031657218933, "learning_rate": 3.3338159411854363e-07, "loss": 0.1983, "step": 26270, "teacher_loss": 0.16968286037445068 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.5620983839035034, "learning_rate": 3.3290557423074964e-07, "loss": 0.1706, "step": 26271, "teacher_loss": 0.1271371841430664 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.6019840240478516, "learning_rate": 3.3242989061545715e-07, "loss": 0.3218, "step": 26272, "teacher_loss": 0.290721595287323 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.42375150322914124, "learning_rate": 3.3195454328357246e-07, "loss": 0.1991, "step": 26273, "teacher_loss": 0.17414425313472748 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.5715345144271851, "learning_rate": 3.3147953224599017e-07, "loss": 0.2348, "step": 26274, "teacher_loss": 0.19743695855140686 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.4060556888580322, "learning_rate": 3.3100485751360656e-07, "loss": 0.2333, "step": 26275, "teacher_loss": 0.2141450047492981 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.7044281363487244, "learning_rate": 3.3053051909730123e-07, "loss": 0.2395, "step": 26276, "teacher_loss": 0.1878378540277481 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.19606342911720276, "learning_rate": 3.300565170079489e-07, "loss": 0.2074, "step": 26277, "teacher_loss": 0.2086627334356308 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.48753833770751953, "learning_rate": 3.295828512564175e-07, "loss": 0.1909, "step": 26278, "teacher_loss": 0.15795306861400604 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.16648554801940918, "learning_rate": 3.2910952185357e-07, "loss": 0.2165, "step": 26279, "teacher_loss": 0.22205622494220734 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.1778557002544403, "learning_rate": 3.2863652881025273e-07, "loss": 0.1175, "step": 26280, "teacher_loss": 0.11084824055433273 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.3704199194908142, "learning_rate": 3.2816387213731205e-07, "loss": 0.1731, "step": 26281, "teacher_loss": 0.1512129008769989 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.5568274855613708, "learning_rate": 3.2769155184558754e-07, "loss": 0.2153, "step": 26282, "teacher_loss": 0.17738838493824005 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.3030836284160614, "learning_rate": 3.272195679459072e-07, "loss": 0.2037, "step": 26283, "teacher_loss": 0.1926092952489853 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 1.03810715675354, "learning_rate": 3.2674792044908587e-07, "loss": 0.327, "step": 26284, "teacher_loss": 0.24797660112380981 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.5469939708709717, "learning_rate": 3.262766093659464e-07, "loss": 0.2262, "step": 26285, "teacher_loss": 0.19051909446716309 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.36931300163269043, "learning_rate": 3.258056347072902e-07, "loss": 0.1747, "step": 26286, "teacher_loss": 0.1530396193265915 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.6724317073822021, "learning_rate": 3.2533499648391027e-07, "loss": 0.3356, "step": 26287, "teacher_loss": 0.29819542169570923 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.30945494771003723, "learning_rate": 3.248646947066064e-07, "loss": 0.141, "step": 26288, "teacher_loss": 0.12223626673221588 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.3482980728149414, "learning_rate": 3.243947293861582e-07, "loss": 0.1832, "step": 26289, "teacher_loss": 0.16480231285095215 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.603256344795227, "learning_rate": 3.2392510053333544e-07, "loss": 0.2733, "step": 26290, "teacher_loss": 0.23659095168113708 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.41624385118484497, "learning_rate": 3.234558081589095e-07, "loss": 0.2476, "step": 26291, "teacher_loss": 0.22891342639923096 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.3951801061630249, "learning_rate": 3.229868522736418e-07, "loss": 0.1865, "step": 26292, "teacher_loss": 0.16329112648963928 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.5902349948883057, "learning_rate": 3.2251823288827863e-07, "loss": 0.2538, "step": 26293, "teacher_loss": 0.2164284586906433 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.3152114748954773, "learning_rate": 3.220499500135682e-07, "loss": 0.1829, "step": 26294, "teacher_loss": 0.16816270351409912 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.613085925579071, "learning_rate": 3.2158200366024684e-07, "loss": 0.2025, "step": 26295, "teacher_loss": 0.1568540334701538 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.5086848735809326, "learning_rate": 3.211143938390393e-07, "loss": 0.23, "step": 26296, "teacher_loss": 0.19899138808250427 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.5446643829345703, "learning_rate": 3.2064712056067036e-07, "loss": 0.2495, "step": 26297, "teacher_loss": 0.21670971810817719 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.32474637031555176, "learning_rate": 3.2018018383585315e-07, "loss": 0.2054, "step": 26298, "teacher_loss": 0.1921561360359192 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.39813587069511414, "learning_rate": 3.197135836752907e-07, "loss": 0.2352, "step": 26299, "teacher_loss": 0.21707241237163544 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.45196977257728577, "learning_rate": 3.192473200896828e-07, "loss": 0.1938, "step": 26300, "teacher_loss": 0.1651480793952942 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.5202337503433228, "learning_rate": 3.1878139308971765e-07, "loss": 0.1896, "step": 26301, "teacher_loss": 0.15282979607582092 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.3252509832382202, "learning_rate": 3.183158026860816e-07, "loss": 0.2581, "step": 26302, "teacher_loss": 0.25064074993133545 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.43483084440231323, "learning_rate": 3.1785054888944286e-07, "loss": 0.2261, "step": 26303, "teacher_loss": 0.20291420817375183 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.38876041769981384, "learning_rate": 3.173856317104712e-07, "loss": 0.1841, "step": 26304, "teacher_loss": 0.16133934259414673 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.42302560806274414, "learning_rate": 3.169210511598297e-07, "loss": 0.2019, "step": 26305, "teacher_loss": 0.17732861638069153 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.5700348615646362, "learning_rate": 3.1645680724816326e-07, "loss": 0.2735, "step": 26306, "teacher_loss": 0.24056124687194824 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.8543387055397034, "learning_rate": 3.1599289998611834e-07, "loss": 0.3369, "step": 26307, "teacher_loss": 0.27941831946372986 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.37396278977394104, "learning_rate": 3.1552932938433476e-07, "loss": 0.2127, "step": 26308, "teacher_loss": 0.1947641372680664 }, { "compression_loss": 0.0, "epoch": 4.75, "label_loss": 0.40257367491722107, "learning_rate": 3.15066095453434e-07, "loss": 0.2388, "step": 26309, "teacher_loss": 0.220563143491745 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.4022144675254822, "learning_rate": 3.146031982040426e-07, "loss": 0.2384, "step": 26310, "teacher_loss": 0.22024913132190704 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.23328836262226105, "learning_rate": 3.141406376467687e-07, "loss": 0.1356, "step": 26311, "teacher_loss": 0.12469995766878128 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3091126084327698, "learning_rate": 3.1367841379221885e-07, "loss": 0.2096, "step": 26312, "teacher_loss": 0.19856882095336914 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.6228529214859009, "learning_rate": 3.132165266509945e-07, "loss": 0.325, "step": 26313, "teacher_loss": 0.29187560081481934 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.2866888642311096, "learning_rate": 3.12754976233679e-07, "loss": 0.1454, "step": 26314, "teacher_loss": 0.12974685430526733 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.8244979381561279, "learning_rate": 3.1229376255085707e-07, "loss": 0.2211, "step": 26315, "teacher_loss": 0.15406744182109833 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.41625070571899414, "learning_rate": 3.118328856131053e-07, "loss": 0.2463, "step": 26316, "teacher_loss": 0.22736340761184692 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.28182893991470337, "learning_rate": 3.1137234543098524e-07, "loss": 0.1761, "step": 26317, "teacher_loss": 0.16436265408992767 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.44042664766311646, "learning_rate": 3.1091214201506015e-07, "loss": 0.2201, "step": 26318, "teacher_loss": 0.19556373357772827 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.532873272895813, "learning_rate": 3.1045227537587984e-07, "loss": 0.2351, "step": 26319, "teacher_loss": 0.20199242234230042 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.5981099605560303, "learning_rate": 3.099927455239876e-07, "loss": 0.2278, "step": 26320, "teacher_loss": 0.18663232028484344 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.29750481247901917, "learning_rate": 3.0953355246991663e-07, "loss": 0.2348, "step": 26321, "teacher_loss": 0.22787630558013916 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.11789701879024506, "learning_rate": 3.0907469622420024e-07, "loss": 0.1564, "step": 26322, "teacher_loss": 0.16062262654304504 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.21908657252788544, "learning_rate": 3.08616176797355e-07, "loss": 0.1993, "step": 26323, "teacher_loss": 0.19707436859607697 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3766312599182129, "learning_rate": 3.081579941998908e-07, "loss": 0.192, "step": 26324, "teacher_loss": 0.17143815755844116 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.5554318428039551, "learning_rate": 3.077001484423175e-07, "loss": 0.259, "step": 26325, "teacher_loss": 0.22609932720661163 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.33158108592033386, "learning_rate": 3.072426395351302e-07, "loss": 0.1857, "step": 26326, "teacher_loss": 0.16948994994163513 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.30557170510292053, "learning_rate": 3.0678546748881544e-07, "loss": 0.2006, "step": 26327, "teacher_loss": 0.18898791074752808 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.48600152134895325, "learning_rate": 3.063286323138598e-07, "loss": 0.2139, "step": 26328, "teacher_loss": 0.18365202844142914 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.5222252011299133, "learning_rate": 3.058721340207349e-07, "loss": 0.215, "step": 26329, "teacher_loss": 0.1808834820985794 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.5014474391937256, "learning_rate": 3.054159726199057e-07, "loss": 0.2845, "step": 26330, "teacher_loss": 0.2604144811630249 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.4072541296482086, "learning_rate": 3.049601481218306e-07, "loss": 0.2692, "step": 26331, "teacher_loss": 0.2538619041442871 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.5288593769073486, "learning_rate": 3.045046605369628e-07, "loss": 0.2626, "step": 26332, "teacher_loss": 0.2330707609653473 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.486158549785614, "learning_rate": 3.0404950987574566e-07, "loss": 0.2574, "step": 26333, "teacher_loss": 0.23201040923595428 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.4799298644065857, "learning_rate": 3.0359469614860745e-07, "loss": 0.1861, "step": 26334, "teacher_loss": 0.15346582233905792 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.1772279143333435, "learning_rate": 3.031402193659849e-07, "loss": 0.1796, "step": 26335, "teacher_loss": 0.17987008392810822 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3721959888935089, "learning_rate": 3.0268607953829297e-07, "loss": 0.1887, "step": 26336, "teacher_loss": 0.16830384731292725 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.5456550717353821, "learning_rate": 3.022322766759417e-07, "loss": 0.2174, "step": 26337, "teacher_loss": 0.1809774935245514 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3592498302459717, "learning_rate": 3.01778810789341e-07, "loss": 0.1746, "step": 26338, "teacher_loss": 0.1540893018245697 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3503061532974243, "learning_rate": 3.0132568188888433e-07, "loss": 0.1834, "step": 26339, "teacher_loss": 0.1648324429988861 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.6267522573471069, "learning_rate": 3.0087288998496e-07, "loss": 0.5017, "step": 26340, "teacher_loss": 0.4878247082233429 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.25746411085128784, "learning_rate": 3.004204350879497e-07, "loss": 0.2292, "step": 26341, "teacher_loss": 0.22608506679534912 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3029187023639679, "learning_rate": 2.999683172082301e-07, "loss": 0.2235, "step": 26342, "teacher_loss": 0.2146538347005844 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.32246890664100647, "learning_rate": 2.9951653635616137e-07, "loss": 0.2214, "step": 26343, "teacher_loss": 0.2101290076971054 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.19092437624931335, "learning_rate": 2.9906509254210344e-07, "loss": 0.1965, "step": 26344, "teacher_loss": 0.19709143042564392 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.367572158575058, "learning_rate": 2.986139857764097e-07, "loss": 0.2081, "step": 26345, "teacher_loss": 0.19033510982990265 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.5233211517333984, "learning_rate": 2.981632160694187e-07, "loss": 0.2731, "step": 26346, "teacher_loss": 0.24531182646751404 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.35632947087287903, "learning_rate": 2.9771278343146526e-07, "loss": 0.1611, "step": 26347, "teacher_loss": 0.13943755626678467 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.14506271481513977, "learning_rate": 2.972626878728812e-07, "loss": 0.1811, "step": 26348, "teacher_loss": 0.18512314558029175 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.2981428802013397, "learning_rate": 2.9681292940398173e-07, "loss": 0.2308, "step": 26349, "teacher_loss": 0.2233605831861496 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.8253594636917114, "learning_rate": 2.963635080350785e-07, "loss": 0.2061, "step": 26350, "teacher_loss": 0.1373479813337326 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.338609516620636, "learning_rate": 2.9591442377647496e-07, "loss": 0.2608, "step": 26351, "teacher_loss": 0.2521321773529053 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.4732528328895569, "learning_rate": 2.954656766384711e-07, "loss": 0.2211, "step": 26352, "teacher_loss": 0.19309645891189575 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.43659302592277527, "learning_rate": 2.950172666313522e-07, "loss": 0.2237, "step": 26353, "teacher_loss": 0.20003335177898407 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.4235661029815674, "learning_rate": 2.9456919376539825e-07, "loss": 0.2094, "step": 26354, "teacher_loss": 0.1856456696987152 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3486536741256714, "learning_rate": 2.9412145805088443e-07, "loss": 0.216, "step": 26355, "teacher_loss": 0.20124486088752747 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.24341291189193726, "learning_rate": 2.9367405949807413e-07, "loss": 0.1615, "step": 26356, "teacher_loss": 0.15240870416164398 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.7425260543823242, "learning_rate": 2.932269981172275e-07, "loss": 0.3513, "step": 26357, "teacher_loss": 0.307780921459198 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.2214815616607666, "learning_rate": 2.927802739185914e-07, "loss": 0.2012, "step": 26358, "teacher_loss": 0.1988985240459442 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3996084928512573, "learning_rate": 2.9233388691240927e-07, "loss": 0.248, "step": 26359, "teacher_loss": 0.23118874430656433 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3956166207790375, "learning_rate": 2.918878371089162e-07, "loss": 0.1943, "step": 26360, "teacher_loss": 0.17198149859905243 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3624839782714844, "learning_rate": 2.914421245183374e-07, "loss": 0.1648, "step": 26361, "teacher_loss": 0.14287975430488586 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.3230903446674347, "learning_rate": 2.9099674915089136e-07, "loss": 0.1902, "step": 26362, "teacher_loss": 0.1754865050315857 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.29984021186828613, "learning_rate": 2.905517110167899e-07, "loss": 0.1526, "step": 26363, "teacher_loss": 0.13621768355369568 }, { "compression_loss": 0.0, "epoch": 4.76, "label_loss": 0.4239395260810852, "learning_rate": 2.9010701012623655e-07, "loss": 0.3026, "step": 26364, "teacher_loss": 0.2891601324081421 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.5674530267715454, "learning_rate": 2.896626464894281e-07, "loss": 0.2767, "step": 26365, "teacher_loss": 0.24441049993038177 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.44827908277511597, "learning_rate": 2.8921862011654974e-07, "loss": 0.217, "step": 26366, "teacher_loss": 0.1913110762834549 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.4237147569656372, "learning_rate": 2.8877493101778505e-07, "loss": 0.2401, "step": 26367, "teacher_loss": 0.21964557468891144 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.27484995126724243, "learning_rate": 2.883315792033042e-07, "loss": 0.1901, "step": 26368, "teacher_loss": 0.18069766461849213 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.4476448595523834, "learning_rate": 2.878885646832707e-07, "loss": 0.2332, "step": 26369, "teacher_loss": 0.20936471223831177 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.4599837064743042, "learning_rate": 2.874458874678432e-07, "loss": 0.1811, "step": 26370, "teacher_loss": 0.15007565915584564 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.31840455532073975, "learning_rate": 2.870035475671734e-07, "loss": 0.2986, "step": 26371, "teacher_loss": 0.2964409291744232 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.5051140785217285, "learning_rate": 2.865615449913983e-07, "loss": 0.2456, "step": 26372, "teacher_loss": 0.21673625707626343 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.49418067932128906, "learning_rate": 2.8611987975065643e-07, "loss": 0.2602, "step": 26373, "teacher_loss": 0.2341851145029068 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.39917486906051636, "learning_rate": 2.856785518550681e-07, "loss": 0.2121, "step": 26374, "teacher_loss": 0.19130854308605194 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.6162400245666504, "learning_rate": 2.852375613147551e-07, "loss": 0.2296, "step": 26375, "teacher_loss": 0.18666520714759827 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.20338264107704163, "learning_rate": 2.847969081398294e-07, "loss": 0.2139, "step": 26376, "teacher_loss": 0.21512313187122345 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.24729056656360626, "learning_rate": 2.8435659234039127e-07, "loss": 0.1405, "step": 26377, "teacher_loss": 0.12858551740646362 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.35191404819488525, "learning_rate": 2.839166139265359e-07, "loss": 0.1491, "step": 26378, "teacher_loss": 0.12652051448822021 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.34629392623901367, "learning_rate": 2.834769729083536e-07, "loss": 0.1931, "step": 26379, "teacher_loss": 0.17610520124435425 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.38561904430389404, "learning_rate": 2.830376692959197e-07, "loss": 0.1867, "step": 26380, "teacher_loss": 0.16459277272224426 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.28386735916137695, "learning_rate": 2.8259870309930933e-07, "loss": 0.2168, "step": 26381, "teacher_loss": 0.2093191146850586 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.24095045030117035, "learning_rate": 2.8216007432858625e-07, "loss": 0.1971, "step": 26382, "teacher_loss": 0.1922469586133957 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.2198868691921234, "learning_rate": 2.8172178299380567e-07, "loss": 0.1287, "step": 26383, "teacher_loss": 0.11860324442386627 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.5486941337585449, "learning_rate": 2.8128382910501626e-07, "loss": 0.234, "step": 26384, "teacher_loss": 0.1990022361278534 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.24689486622810364, "learning_rate": 2.8084621267226e-07, "loss": 0.1895, "step": 26385, "teacher_loss": 0.18308451771736145 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.12628792226314545, "learning_rate": 2.804089337055704e-07, "loss": 0.1404, "step": 26386, "teacher_loss": 0.14197522401809692 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.5132458209991455, "learning_rate": 2.7997199221497126e-07, "loss": 0.2338, "step": 26387, "teacher_loss": 0.20276933908462524 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 1.0073460340499878, "learning_rate": 2.7953538821048284e-07, "loss": 0.4536, "step": 26388, "teacher_loss": 0.39209312200546265 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.45065838098526, "learning_rate": 2.7909912170211217e-07, "loss": 0.2622, "step": 26389, "teacher_loss": 0.24121883511543274 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.6563032865524292, "learning_rate": 2.786631926998645e-07, "loss": 0.1917, "step": 26390, "teacher_loss": 0.14005742967128754 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.21098321676254272, "learning_rate": 2.7822760121373193e-07, "loss": 0.2678, "step": 26391, "teacher_loss": 0.27416369318962097 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.3288878798484802, "learning_rate": 2.7779234725370304e-07, "loss": 0.3039, "step": 26392, "teacher_loss": 0.3010862171649933 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.3994484841823578, "learning_rate": 2.7735743082975494e-07, "loss": 0.1921, "step": 26393, "teacher_loss": 0.1691109538078308 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.6256210803985596, "learning_rate": 2.769228519518613e-07, "loss": 0.2387, "step": 26394, "teacher_loss": 0.19569721817970276 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.250484824180603, "learning_rate": 2.764886106299841e-07, "loss": 0.1844, "step": 26395, "teacher_loss": 0.17708684504032135 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.3137631118297577, "learning_rate": 2.760547068740804e-07, "loss": 0.1524, "step": 26396, "teacher_loss": 0.13451674580574036 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.18493443727493286, "learning_rate": 2.756211406940956e-07, "loss": 0.1588, "step": 26397, "teacher_loss": 0.15591105818748474 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.5682260990142822, "learning_rate": 2.7518791209997505e-07, "loss": 0.1838, "step": 26398, "teacher_loss": 0.14107362926006317 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.6092061400413513, "learning_rate": 2.747550211016475e-07, "loss": 0.268, "step": 26399, "teacher_loss": 0.23008251190185547 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.4932122826576233, "learning_rate": 2.7432246770903835e-07, "loss": 0.207, "step": 26400, "teacher_loss": 0.1751624494791031 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.3317599296569824, "learning_rate": 2.7389025193206297e-07, "loss": 0.1604, "step": 26401, "teacher_loss": 0.14130732417106628 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.33530908823013306, "learning_rate": 2.734583737806368e-07, "loss": 0.3463, "step": 26402, "teacher_loss": 0.34751635789871216 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.2231946736574173, "learning_rate": 2.730268332646552e-07, "loss": 0.1852, "step": 26403, "teacher_loss": 0.18094466626644135 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.6410014629364014, "learning_rate": 2.725956303940136e-07, "loss": 0.2475, "step": 26404, "teacher_loss": 0.2037343978881836 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.18390896916389465, "learning_rate": 2.7216476517860245e-07, "loss": 0.1635, "step": 26405, "teacher_loss": 0.16128680109977722 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.4148280918598175, "learning_rate": 2.717342376282955e-07, "loss": 0.1901, "step": 26406, "teacher_loss": 0.16518238186836243 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.21636715531349182, "learning_rate": 2.7130404775296323e-07, "loss": 0.1077, "step": 26407, "teacher_loss": 0.09557149559259415 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.5851198434829712, "learning_rate": 2.708741955624727e-07, "loss": 0.2315, "step": 26408, "teacher_loss": 0.19225898385047913 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.47046583890914917, "learning_rate": 2.7044468106667607e-07, "loss": 0.1835, "step": 26409, "teacher_loss": 0.15158924460411072 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.4412728548049927, "learning_rate": 2.7001550427541877e-07, "loss": 0.1856, "step": 26410, "teacher_loss": 0.15719375014305115 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.27848392724990845, "learning_rate": 2.6958666519854626e-07, "loss": 0.1596, "step": 26411, "teacher_loss": 0.1463763415813446 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.28710201382637024, "learning_rate": 2.6915816384588566e-07, "loss": 0.2505, "step": 26412, "teacher_loss": 0.24644553661346436 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.25141000747680664, "learning_rate": 2.687300002272641e-07, "loss": 0.2122, "step": 26413, "teacher_loss": 0.20789504051208496 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.49327850341796875, "learning_rate": 2.683021743524955e-07, "loss": 0.2618, "step": 26414, "teacher_loss": 0.23603224754333496 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.37122592329978943, "learning_rate": 2.6787468623139186e-07, "loss": 0.1799, "step": 26415, "teacher_loss": 0.15866342186927795 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.30191725492477417, "learning_rate": 2.6744753587375216e-07, "loss": 0.1692, "step": 26416, "teacher_loss": 0.15440186858177185 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.38406866788864136, "learning_rate": 2.670207232893684e-07, "loss": 0.2757, "step": 26417, "teacher_loss": 0.26361894607543945 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.558273196220398, "learning_rate": 2.665942484880296e-07, "loss": 0.2015, "step": 26418, "teacher_loss": 0.16183751821517944 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.33274343609809875, "learning_rate": 2.6616811147951116e-07, "loss": 0.1573, "step": 26419, "teacher_loss": 0.1377846896648407 }, { "compression_loss": 0.0, "epoch": 4.77, "label_loss": 0.5601588487625122, "learning_rate": 2.657423122735836e-07, "loss": 0.1978, "step": 26420, "teacher_loss": 0.15754413604736328 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.7319083213806152, "learning_rate": 2.653168508800091e-07, "loss": 0.2985, "step": 26421, "teacher_loss": 0.2503550052642822 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.3340081572532654, "learning_rate": 2.648917273085416e-07, "loss": 0.1891, "step": 26422, "teacher_loss": 0.1730543076992035 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.10499230027198792, "learning_rate": 2.644669415689299e-07, "loss": 0.1734, "step": 26423, "teacher_loss": 0.1809711456298828 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.3077067732810974, "learning_rate": 2.6404249367091126e-07, "loss": 0.1808, "step": 26424, "teacher_loss": 0.166713684797287 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.4672786593437195, "learning_rate": 2.6361838362421784e-07, "loss": 0.2314, "step": 26425, "teacher_loss": 0.20522728562355042 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.4785950183868408, "learning_rate": 2.631946114385719e-07, "loss": 0.1805, "step": 26426, "teacher_loss": 0.14734497666358948 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.643587052822113, "learning_rate": 2.627711771236907e-07, "loss": 0.2314, "step": 26427, "teacher_loss": 0.1855829656124115 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.18949052691459656, "learning_rate": 2.6234808068928305e-07, "loss": 0.1623, "step": 26428, "teacher_loss": 0.15924076735973358 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.31570878624916077, "learning_rate": 2.619253221450479e-07, "loss": 0.2534, "step": 26429, "teacher_loss": 0.24648021161556244 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.7868441343307495, "learning_rate": 2.6150290150067593e-07, "loss": 0.2048, "step": 26430, "teacher_loss": 0.1401652991771698 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.5456027984619141, "learning_rate": 2.6108081876585755e-07, "loss": 0.1727, "step": 26431, "teacher_loss": 0.1312258541584015 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.28667229413986206, "learning_rate": 2.606590739502634e-07, "loss": 0.1831, "step": 26432, "teacher_loss": 0.1716083437204361 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.16899427771568298, "learning_rate": 2.6023766706356756e-07, "loss": 0.1447, "step": 26433, "teacher_loss": 0.1420421004295349 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.37226301431655884, "learning_rate": 2.598165981154288e-07, "loss": 0.1579, "step": 26434, "teacher_loss": 0.13403035700321198 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.44914135336875916, "learning_rate": 2.593958671155028e-07, "loss": 0.2185, "step": 26435, "teacher_loss": 0.19283771514892578 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.21628999710083008, "learning_rate": 2.5897547407343516e-07, "loss": 0.1639, "step": 26436, "teacher_loss": 0.1580483615398407 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.2801150977611542, "learning_rate": 2.5855541899886314e-07, "loss": 0.2193, "step": 26437, "teacher_loss": 0.212594673037529 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.8119360208511353, "learning_rate": 2.5813570190141747e-07, "loss": 0.4668, "step": 26438, "teacher_loss": 0.42847010493278503 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.4729951322078705, "learning_rate": 2.5771632279072363e-07, "loss": 0.2072, "step": 26439, "teacher_loss": 0.17761777341365814 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.22920605540275574, "learning_rate": 2.5729728167639413e-07, "loss": 0.1669, "step": 26440, "teacher_loss": 0.159982830286026 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.8074127435684204, "learning_rate": 2.5687857856803775e-07, "loss": 0.3316, "step": 26441, "teacher_loss": 0.27872878313064575 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.20560744404792786, "learning_rate": 2.564602134752536e-07, "loss": 0.1814, "step": 26442, "teacher_loss": 0.17868748307228088 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.430389404296875, "learning_rate": 2.560421864076307e-07, "loss": 0.2, "step": 26443, "teacher_loss": 0.17438830435276031 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.3647538125514984, "learning_rate": 2.556244973747579e-07, "loss": 0.1727, "step": 26444, "teacher_loss": 0.15137921273708344 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 1.0523293018341064, "learning_rate": 2.552071463862093e-07, "loss": 0.31, "step": 26445, "teacher_loss": 0.2275182604789734 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.6857502460479736, "learning_rate": 2.54790133451554e-07, "loss": 0.2519, "step": 26446, "teacher_loss": 0.20367178320884705 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.16118204593658447, "learning_rate": 2.5437345858035253e-07, "loss": 0.173, "step": 26447, "teacher_loss": 0.1743564009666443 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.3464326858520508, "learning_rate": 2.539571217821557e-07, "loss": 0.1638, "step": 26448, "teacher_loss": 0.14353898167610168 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.28996506333351135, "learning_rate": 2.5354112306651414e-07, "loss": 0.2213, "step": 26449, "teacher_loss": 0.2136838138103485 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.17318224906921387, "learning_rate": 2.531254624429602e-07, "loss": 0.165, "step": 26450, "teacher_loss": 0.16404922306537628 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.40535157918930054, "learning_rate": 2.5271013992102796e-07, "loss": 0.2462, "step": 26451, "teacher_loss": 0.228561669588089 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.33080509305000305, "learning_rate": 2.522951555102365e-07, "loss": 0.2033, "step": 26452, "teacher_loss": 0.18909047544002533 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.3085571527481079, "learning_rate": 2.518805092201015e-07, "loss": 0.184, "step": 26453, "teacher_loss": 0.1702013909816742 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.14445427060127258, "learning_rate": 2.5146620106012706e-07, "loss": 0.1399, "step": 26454, "teacher_loss": 0.1394350528717041 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.5394058227539062, "learning_rate": 2.5105223103981723e-07, "loss": 0.341, "step": 26455, "teacher_loss": 0.31900209188461304 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.29711222648620605, "learning_rate": 2.5063859916866107e-07, "loss": 0.2425, "step": 26456, "teacher_loss": 0.23638153076171875 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.2803683876991272, "learning_rate": 2.50225305456136e-07, "loss": 0.1703, "step": 26457, "teacher_loss": 0.158098965883255 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.35664355754852295, "learning_rate": 2.498123499117261e-07, "loss": 0.1625, "step": 26458, "teacher_loss": 0.14097297191619873 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.5782793760299683, "learning_rate": 2.493997325448971e-07, "loss": 0.2614, "step": 26459, "teacher_loss": 0.22613874077796936 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.4967818856239319, "learning_rate": 2.489874533651032e-07, "loss": 0.1739, "step": 26460, "teacher_loss": 0.1380581110715866 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.9243016242980957, "learning_rate": 2.4857551238180507e-07, "loss": 0.3001, "step": 26461, "teacher_loss": 0.23076987266540527 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.5367703437805176, "learning_rate": 2.4816390960444193e-07, "loss": 0.2458, "step": 26462, "teacher_loss": 0.2134486436843872 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.481407105922699, "learning_rate": 2.477526450424511e-07, "loss": 0.2575, "step": 26463, "teacher_loss": 0.23266759514808655 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.3602825403213501, "learning_rate": 2.4734171870526343e-07, "loss": 0.2303, "step": 26464, "teacher_loss": 0.21589820086956024 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.13266390562057495, "learning_rate": 2.469311306022998e-07, "loss": 0.1847, "step": 26465, "teacher_loss": 0.1905362606048584 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.4018552899360657, "learning_rate": 2.4652088074297254e-07, "loss": 0.1965, "step": 26466, "teacher_loss": 0.17366188764572144 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.16853779554367065, "learning_rate": 2.461109691366892e-07, "loss": 0.1851, "step": 26467, "teacher_loss": 0.18689538538455963 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.20183219015598297, "learning_rate": 2.457013957928472e-07, "loss": 0.134, "step": 26468, "teacher_loss": 0.1264268010854721 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.801593005657196, "learning_rate": 2.4529216072083583e-07, "loss": 0.2409, "step": 26469, "teacher_loss": 0.17864912748336792 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.23200616240501404, "learning_rate": 2.448832639300358e-07, "loss": 0.1947, "step": 26470, "teacher_loss": 0.19052262604236603 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.5465121269226074, "learning_rate": 2.4447470542982797e-07, "loss": 0.2173, "step": 26471, "teacher_loss": 0.18067757785320282 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.7589600086212158, "learning_rate": 2.440664852295749e-07, "loss": 0.2681, "step": 26472, "teacher_loss": 0.21354196965694427 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.6373481750488281, "learning_rate": 2.4365860333863733e-07, "loss": 0.2424, "step": 26473, "teacher_loss": 0.19853463768959045 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.2517188787460327, "learning_rate": 2.4325105976636453e-07, "loss": 0.2081, "step": 26474, "teacher_loss": 0.20324605703353882 }, { "compression_loss": 0.0, "epoch": 4.78, "label_loss": 0.3956500291824341, "learning_rate": 2.4284385452210235e-07, "loss": 0.2211, "step": 26475, "teacher_loss": 0.20167234539985657 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.45357558131217957, "learning_rate": 2.424369876151866e-07, "loss": 0.2909, "step": 26476, "teacher_loss": 0.2727966904640198 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.49215489625930786, "learning_rate": 2.4203045905494494e-07, "loss": 0.1995, "step": 26477, "teacher_loss": 0.16700318455696106 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.44279059767723083, "learning_rate": 2.416242688506998e-07, "loss": 0.2294, "step": 26478, "teacher_loss": 0.2057175189256668 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.24307036399841309, "learning_rate": 2.412184170117604e-07, "loss": 0.16, "step": 26479, "teacher_loss": 0.150734543800354 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.7411606311798096, "learning_rate": 2.4081290354743437e-07, "loss": 0.2158, "step": 26480, "teacher_loss": 0.1573847532272339 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.36068233847618103, "learning_rate": 2.4040772846701753e-07, "loss": 0.1761, "step": 26481, "teacher_loss": 0.15562735497951508 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.2730235755443573, "learning_rate": 2.4000289177980086e-07, "loss": 0.151, "step": 26482, "teacher_loss": 0.13745997846126556 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.6067962646484375, "learning_rate": 2.395983934950652e-07, "loss": 0.2142, "step": 26483, "teacher_loss": 0.1705491542816162 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.5053513050079346, "learning_rate": 2.3919423362208314e-07, "loss": 0.244, "step": 26484, "teacher_loss": 0.21493184566497803 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.2645168900489807, "learning_rate": 2.3879041217012233e-07, "loss": 0.1899, "step": 26485, "teacher_loss": 0.18162211775779724 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.6817524433135986, "learning_rate": 2.3838692914844195e-07, "loss": 0.2229, "step": 26486, "teacher_loss": 0.1719537377357483 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.41481921076774597, "learning_rate": 2.3798378456628965e-07, "loss": 0.2059, "step": 26487, "teacher_loss": 0.18273188173770905 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.24145865440368652, "learning_rate": 2.3758097843291138e-07, "loss": 0.1735, "step": 26488, "teacher_loss": 0.16597720980644226 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.3588376045227051, "learning_rate": 2.3717851075754305e-07, "loss": 0.1788, "step": 26489, "teacher_loss": 0.15881605446338654 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.3366454243659973, "learning_rate": 2.3677638154940895e-07, "loss": 0.1678, "step": 26490, "teacher_loss": 0.14909303188323975 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.5983163118362427, "learning_rate": 2.3637459081773005e-07, "loss": 0.1556, "step": 26491, "teacher_loss": 0.10645299404859543 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.33436524868011475, "learning_rate": 2.3597313857171732e-07, "loss": 0.152, "step": 26492, "teacher_loss": 0.13169236481189728 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.4331894814968109, "learning_rate": 2.3557202482057671e-07, "loss": 0.2406, "step": 26493, "teacher_loss": 0.21915876865386963 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.3451586365699768, "learning_rate": 2.3517124957350257e-07, "loss": 0.2379, "step": 26494, "teacher_loss": 0.22603173553943634 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.36440956592559814, "learning_rate": 2.3477081283968582e-07, "loss": 0.1448, "step": 26495, "teacher_loss": 0.12035196274518967 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.39625445008277893, "learning_rate": 2.3437071462830585e-07, "loss": 0.2064, "step": 26496, "teacher_loss": 0.18532824516296387 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.2373029589653015, "learning_rate": 2.339709549485336e-07, "loss": 0.2252, "step": 26497, "teacher_loss": 0.2238886058330536 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.27212581038475037, "learning_rate": 2.3357153380953845e-07, "loss": 0.1484, "step": 26498, "teacher_loss": 0.13464315235614777 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.4619075059890747, "learning_rate": 2.331724512204747e-07, "loss": 0.1848, "step": 26499, "teacher_loss": 0.15403109788894653 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.2452515959739685, "learning_rate": 2.327737071904934e-07, "loss": 0.1512, "step": 26500, "teacher_loss": 0.1407509446144104 }, { "epoch": 4.79, "eval_exact_match": 80.66225165562913, "eval_f1": 87.93879348439872, "step": 26500 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.5733667612075806, "learning_rate": 2.3237530172873722e-07, "loss": 0.2678, "step": 26501, "teacher_loss": 0.23388376832008362 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.7125282287597656, "learning_rate": 2.3197723484434054e-07, "loss": 0.283, "step": 26502, "teacher_loss": 0.23530232906341553 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.2018968164920807, "learning_rate": 2.3157950654642767e-07, "loss": 0.1515, "step": 26503, "teacher_loss": 0.14585034549236298 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.38363999128341675, "learning_rate": 2.3118211684411972e-07, "loss": 0.2264, "step": 26504, "teacher_loss": 0.20890435576438904 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.26954373717308044, "learning_rate": 2.30785065746526e-07, "loss": 0.1929, "step": 26505, "teacher_loss": 0.18433818221092224 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.5033186674118042, "learning_rate": 2.30388353262751e-07, "loss": 0.2231, "step": 26506, "teacher_loss": 0.19196519255638123 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.21033942699432373, "learning_rate": 2.2999197940188732e-07, "loss": 0.2462, "step": 26507, "teacher_loss": 0.25014275312423706 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.2414764165878296, "learning_rate": 2.2959594417302776e-07, "loss": 0.2291, "step": 26508, "teacher_loss": 0.22773322463035583 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.5087376236915588, "learning_rate": 2.2920024758524837e-07, "loss": 0.1813, "step": 26509, "teacher_loss": 0.14496511220932007 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.24111220240592957, "learning_rate": 2.2880488964762192e-07, "loss": 0.1185, "step": 26510, "teacher_loss": 0.1048772931098938 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.6570134162902832, "learning_rate": 2.2840987036921112e-07, "loss": 0.2597, "step": 26511, "teacher_loss": 0.21555306017398834 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.5416872501373291, "learning_rate": 2.280151897590771e-07, "loss": 0.2234, "step": 26512, "teacher_loss": 0.18802523612976074 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.4544157385826111, "learning_rate": 2.2762084782626425e-07, "loss": 0.1993, "step": 26513, "teacher_loss": 0.1710004210472107 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.3629343509674072, "learning_rate": 2.2722684457981702e-07, "loss": 0.2457, "step": 26514, "teacher_loss": 0.23268982768058777 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.3244015872478485, "learning_rate": 2.2683318002876653e-07, "loss": 0.1966, "step": 26515, "teacher_loss": 0.1823570877313614 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.6716141700744629, "learning_rate": 2.2643985418213885e-07, "loss": 0.2243, "step": 26516, "teacher_loss": 0.17457574605941772 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.5024648904800415, "learning_rate": 2.2604686704895184e-07, "loss": 0.2098, "step": 26517, "teacher_loss": 0.17724426090717316 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.5091278553009033, "learning_rate": 2.2565421863821488e-07, "loss": 0.1863, "step": 26518, "teacher_loss": 0.15045419335365295 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.26302212476730347, "learning_rate": 2.2526190895893252e-07, "loss": 0.2046, "step": 26519, "teacher_loss": 0.19807936251163483 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.37165072560310364, "learning_rate": 2.2486993802009582e-07, "loss": 0.2328, "step": 26520, "teacher_loss": 0.21733957529067993 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.42701277136802673, "learning_rate": 2.244783058306943e-07, "loss": 0.2234, "step": 26521, "teacher_loss": 0.20080187916755676 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.2805967926979065, "learning_rate": 2.2408701239970741e-07, "loss": 0.1739, "step": 26522, "teacher_loss": 0.1620882749557495 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.5354892611503601, "learning_rate": 2.23696057736103e-07, "loss": 0.3474, "step": 26523, "teacher_loss": 0.3264472782611847 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.5913339853286743, "learning_rate": 2.233054418488456e-07, "loss": 0.2342, "step": 26524, "teacher_loss": 0.19456440210342407 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.6555658578872681, "learning_rate": 2.22915164746893e-07, "loss": 0.2652, "step": 26525, "teacher_loss": 0.22181519865989685 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.8262344598770142, "learning_rate": 2.225252264391914e-07, "loss": 0.3093, "step": 26526, "teacher_loss": 0.2518256604671478 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.4635121822357178, "learning_rate": 2.2213562693468026e-07, "loss": 0.3697, "step": 26527, "teacher_loss": 0.35927560925483704 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.323217511177063, "learning_rate": 2.2174636624229416e-07, "loss": 0.2255, "step": 26528, "teacher_loss": 0.21462669968605042 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.10991115868091583, "learning_rate": 2.2135744437095595e-07, "loss": 0.1601, "step": 26529, "teacher_loss": 0.16572898626327515 }, { "compression_loss": 0.0, "epoch": 4.79, "label_loss": 0.4349479079246521, "learning_rate": 2.2096886132958184e-07, "loss": 0.2743, "step": 26530, "teacher_loss": 0.2564626634120941 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.5304349064826965, "learning_rate": 2.2058061712708466e-07, "loss": 0.4099, "step": 26531, "teacher_loss": 0.39650243520736694 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.7071031332015991, "learning_rate": 2.2019271177236067e-07, "loss": 0.2578, "step": 26532, "teacher_loss": 0.20789135992527008 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.23349101841449738, "learning_rate": 2.1980514527430606e-07, "loss": 0.1776, "step": 26533, "teacher_loss": 0.17133569717407227 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 1.0195116996765137, "learning_rate": 2.1941791764180542e-07, "loss": 0.9554, "step": 26534, "teacher_loss": 0.9482549428939819 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.42511966824531555, "learning_rate": 2.1903102888373993e-07, "loss": 0.2786, "step": 26535, "teacher_loss": 0.262276828289032 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.4225679039955139, "learning_rate": 2.186444790089742e-07, "loss": 0.1886, "step": 26536, "teacher_loss": 0.16264456510543823 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.24601562321186066, "learning_rate": 2.1825826802637617e-07, "loss": 0.1944, "step": 26537, "teacher_loss": 0.18864095211029053 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.5502891540527344, "learning_rate": 2.1787239594479702e-07, "loss": 0.2373, "step": 26538, "teacher_loss": 0.20250718295574188 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.3730257749557495, "learning_rate": 2.1748686277308472e-07, "loss": 0.2807, "step": 26539, "teacher_loss": 0.2704831063747406 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.34546980261802673, "learning_rate": 2.171016685200772e-07, "loss": 0.2523, "step": 26540, "teacher_loss": 0.2419959306716919 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.23065312206745148, "learning_rate": 2.16716813194609e-07, "loss": 0.175, "step": 26541, "teacher_loss": 0.16879487037658691 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.27378618717193604, "learning_rate": 2.163322968055015e-07, "loss": 0.2371, "step": 26542, "teacher_loss": 0.23298847675323486 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.352588415145874, "learning_rate": 2.1594811936156923e-07, "loss": 0.1551, "step": 26543, "teacher_loss": 0.13316959142684937 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.5132936239242554, "learning_rate": 2.155642808716235e-07, "loss": 0.2136, "step": 26544, "teacher_loss": 0.18031004071235657 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.21607151627540588, "learning_rate": 2.151807813444606e-07, "loss": 0.1608, "step": 26545, "teacher_loss": 0.15461014211177826 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.2549065053462982, "learning_rate": 2.1479762078887687e-07, "loss": 0.1343, "step": 26546, "teacher_loss": 0.12088148295879364 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.5719956755638123, "learning_rate": 2.1441479921365526e-07, "loss": 0.2295, "step": 26547, "teacher_loss": 0.19141870737075806 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.320910781621933, "learning_rate": 2.1403231662757206e-07, "loss": 0.2137, "step": 26548, "teacher_loss": 0.20182648301124573 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.3157101273536682, "learning_rate": 2.136501730393986e-07, "loss": 0.2345, "step": 26549, "teacher_loss": 0.22548353672027588 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.663744330406189, "learning_rate": 2.132683684578929e-07, "loss": 0.2917, "step": 26550, "teacher_loss": 0.25032591819763184 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.6276815533638, "learning_rate": 2.128869028918129e-07, "loss": 0.2062, "step": 26551, "teacher_loss": 0.15932975709438324 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.3351443409919739, "learning_rate": 2.1250577634989999e-07, "loss": 0.2057, "step": 26552, "teacher_loss": 0.19129127264022827 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.2985061705112457, "learning_rate": 2.121249888408955e-07, "loss": 0.1619, "step": 26553, "teacher_loss": 0.14672841131687164 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.23061443865299225, "learning_rate": 2.1174454037352908e-07, "loss": 0.1337, "step": 26554, "teacher_loss": 0.12288016080856323 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.3937075436115265, "learning_rate": 2.1136443095652213e-07, "loss": 0.1821, "step": 26555, "teacher_loss": 0.1586090326309204 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.25787076354026794, "learning_rate": 2.10984660598591e-07, "loss": 0.1911, "step": 26556, "teacher_loss": 0.18364334106445312 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.2706703841686249, "learning_rate": 2.1060522930844039e-07, "loss": 0.1944, "step": 26557, "teacher_loss": 0.18589171767234802 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.5510338544845581, "learning_rate": 2.1022613709477167e-07, "loss": 0.1931, "step": 26558, "teacher_loss": 0.15334567427635193 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.3310496211051941, "learning_rate": 2.0984738396627623e-07, "loss": 0.2061, "step": 26559, "teacher_loss": 0.19226107001304626 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.43700751662254333, "learning_rate": 2.0946896993163545e-07, "loss": 0.247, "step": 26560, "teacher_loss": 0.2259148210287094 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.4481600522994995, "learning_rate": 2.0909089499952737e-07, "loss": 0.2181, "step": 26561, "teacher_loss": 0.19256016612052917 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.36073723435401917, "learning_rate": 2.0871315917862177e-07, "loss": 0.1654, "step": 26562, "teacher_loss": 0.14373403787612915 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.35423365235328674, "learning_rate": 2.0833576247757502e-07, "loss": 0.1731, "step": 26563, "teacher_loss": 0.1529546082019806 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.4972625970840454, "learning_rate": 2.079587049050402e-07, "loss": 0.2428, "step": 26564, "teacher_loss": 0.21449661254882812 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.6349788904190063, "learning_rate": 2.0758198646966708e-07, "loss": 0.2157, "step": 26565, "teacher_loss": 0.1691613495349884 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.18579766154289246, "learning_rate": 2.072056071800854e-07, "loss": 0.2132, "step": 26566, "teacher_loss": 0.21627789735794067 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.41962379217147827, "learning_rate": 2.0682956704492993e-07, "loss": 0.2324, "step": 26567, "teacher_loss": 0.21155965328216553 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.4972507357597351, "learning_rate": 2.0645386607282048e-07, "loss": 0.1863, "step": 26568, "teacher_loss": 0.1517573744058609 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.5470048785209656, "learning_rate": 2.0607850427237006e-07, "loss": 0.2697, "step": 26569, "teacher_loss": 0.23885944485664368 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.40810444951057434, "learning_rate": 2.0570348165218355e-07, "loss": 0.2274, "step": 26570, "teacher_loss": 0.2073659598827362 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.42419108748435974, "learning_rate": 2.0532879822086237e-07, "loss": 0.2191, "step": 26571, "teacher_loss": 0.19627100229263306 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.646525502204895, "learning_rate": 2.049544539869963e-07, "loss": 0.2571, "step": 26572, "teacher_loss": 0.21382969617843628 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.3486188054084778, "learning_rate": 2.0458044895916516e-07, "loss": 0.2363, "step": 26573, "teacher_loss": 0.22386500239372253 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.3252182602882385, "learning_rate": 2.0420678314594542e-07, "loss": 0.2471, "step": 26574, "teacher_loss": 0.23841966688632965 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.30904310941696167, "learning_rate": 2.0383345655590358e-07, "loss": 0.1908, "step": 26575, "teacher_loss": 0.17765100300312042 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.43673503398895264, "learning_rate": 2.0346046919759942e-07, "loss": 0.2027, "step": 26576, "teacher_loss": 0.17672871053218842 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.7287284135818481, "learning_rate": 2.0308782107958445e-07, "loss": 0.2266, "step": 26577, "teacher_loss": 0.17080318927764893 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.11723659932613373, "learning_rate": 2.0271551221040352e-07, "loss": 0.1334, "step": 26578, "teacher_loss": 0.13521084189414978 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.2978176474571228, "learning_rate": 2.0234354259859144e-07, "loss": 0.2185, "step": 26579, "teacher_loss": 0.20963937044143677 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.4201890826225281, "learning_rate": 2.0197191225267308e-07, "loss": 0.258, "step": 26580, "teacher_loss": 0.23993264138698578 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.5311610698699951, "learning_rate": 2.0160062118117496e-07, "loss": 0.2511, "step": 26581, "teacher_loss": 0.22003470361232758 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.6326667070388794, "learning_rate": 2.012296693926069e-07, "loss": 0.2509, "step": 26582, "teacher_loss": 0.20848089456558228 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.6323836445808411, "learning_rate": 2.0085905689547212e-07, "loss": 0.2972, "step": 26583, "teacher_loss": 0.25994256138801575 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.5764419436454773, "learning_rate": 2.0048878369826884e-07, "loss": 0.1992, "step": 26584, "teacher_loss": 0.15727734565734863 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.25306063890457153, "learning_rate": 2.0011884980948858e-07, "loss": 0.1361, "step": 26585, "teacher_loss": 0.12312394380569458 }, { "compression_loss": 0.0, "epoch": 4.8, "label_loss": 0.342345654964447, "learning_rate": 1.9974925523760957e-07, "loss": 0.2002, "step": 26586, "teacher_loss": 0.1844092756509781 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.9469391107559204, "learning_rate": 1.9937999999110502e-07, "loss": 0.3288, "step": 26587, "teacher_loss": 0.26008307933807373 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.40020638704299927, "learning_rate": 1.9901108407844483e-07, "loss": 0.1876, "step": 26588, "teacher_loss": 0.1639908254146576 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.23812930285930634, "learning_rate": 1.9864250750808387e-07, "loss": 0.1519, "step": 26589, "teacher_loss": 0.14229774475097656 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.3367200195789337, "learning_rate": 1.9827427028847212e-07, "loss": 0.2074, "step": 26590, "teacher_loss": 0.19302572309970856 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.5522750020027161, "learning_rate": 1.979063724280561e-07, "loss": 0.2354, "step": 26591, "teacher_loss": 0.20022502541542053 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.2836771607398987, "learning_rate": 1.9753881393526574e-07, "loss": 0.1721, "step": 26592, "teacher_loss": 0.15974655747413635 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 1.2989203929901123, "learning_rate": 1.9717159481853097e-07, "loss": 0.4299, "step": 26593, "teacher_loss": 0.3332933783531189 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.40130162239074707, "learning_rate": 1.9680471508627173e-07, "loss": 0.2785, "step": 26594, "teacher_loss": 0.2648827135562897 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.5575899481773376, "learning_rate": 1.9643817474689795e-07, "loss": 0.2638, "step": 26595, "teacher_loss": 0.2311980426311493 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.3221670389175415, "learning_rate": 1.9607197380881127e-07, "loss": 0.1929, "step": 26596, "teacher_loss": 0.17848925292491913 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.9010970592498779, "learning_rate": 1.9570611228041158e-07, "loss": 0.2609, "step": 26597, "teacher_loss": 0.18973296880722046 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.35315263271331787, "learning_rate": 1.9534059017008554e-07, "loss": 0.1966, "step": 26598, "teacher_loss": 0.17917457222938538 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.25410693883895874, "learning_rate": 1.9497540748621145e-07, "loss": 0.1369, "step": 26599, "teacher_loss": 0.12386883050203323 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.5609424114227295, "learning_rate": 1.946105642371626e-07, "loss": 0.2188, "step": 26600, "teacher_loss": 0.1808059811592102 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.309104323387146, "learning_rate": 1.9424606043130733e-07, "loss": 0.163, "step": 26601, "teacher_loss": 0.14675165712833405 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.06861025094985962, "learning_rate": 1.938818960769989e-07, "loss": 0.1344, "step": 26602, "teacher_loss": 0.1417549103498459 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.21902698278427124, "learning_rate": 1.9351807118258734e-07, "loss": 0.2169, "step": 26603, "teacher_loss": 0.21667072176933289 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.3767809271812439, "learning_rate": 1.931545857564143e-07, "loss": 0.2217, "step": 26604, "teacher_loss": 0.20444533228874207 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.3183096945285797, "learning_rate": 1.9279143980681312e-07, "loss": 0.1932, "step": 26605, "teacher_loss": 0.17925378680229187 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.4314734637737274, "learning_rate": 1.9242863334211213e-07, "loss": 0.2083, "step": 26606, "teacher_loss": 0.18349778652191162 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.6829096674919128, "learning_rate": 1.9206616637062468e-07, "loss": 0.2428, "step": 26607, "teacher_loss": 0.19393488764762878 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.6199914216995239, "learning_rate": 1.9170403890066412e-07, "loss": 0.2224, "step": 26608, "teacher_loss": 0.178226500749588 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.3411809504032135, "learning_rate": 1.9134225094053215e-07, "loss": 0.2127, "step": 26609, "teacher_loss": 0.19847092032432556 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.41728919744491577, "learning_rate": 1.9098080249852379e-07, "loss": 0.306, "step": 26610, "teacher_loss": 0.2935827672481537 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.5277500152587891, "learning_rate": 1.9061969358292574e-07, "loss": 0.2272, "step": 26611, "teacher_loss": 0.19379499554634094 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.1921905130147934, "learning_rate": 1.9025892420201807e-07, "loss": 0.1696, "step": 26612, "teacher_loss": 0.16709628701210022 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.32954180240631104, "learning_rate": 1.8989849436407082e-07, "loss": 0.3565, "step": 26613, "teacher_loss": 0.359546959400177 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.5536991953849792, "learning_rate": 1.89538404077349e-07, "loss": 0.2744, "step": 26614, "teacher_loss": 0.24337589740753174 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.7158051133155823, "learning_rate": 1.891786533501061e-07, "loss": 0.2979, "step": 26615, "teacher_loss": 0.2514929473400116 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.45972874760627747, "learning_rate": 1.888192421905921e-07, "loss": 0.2135, "step": 26616, "teacher_loss": 0.18612083792686462 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.36339297890663147, "learning_rate": 1.8846017060704712e-07, "loss": 0.208, "step": 26617, "teacher_loss": 0.1907695084810257 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.6676833629608154, "learning_rate": 1.881014386077029e-07, "loss": 0.271, "step": 26618, "teacher_loss": 0.22695128619670868 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.15162141621112823, "learning_rate": 1.8774304620078453e-07, "loss": 0.1368, "step": 26619, "teacher_loss": 0.13515296578407288 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.1612195074558258, "learning_rate": 1.8738499339450709e-07, "loss": 0.1828, "step": 26620, "teacher_loss": 0.18524453043937683 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.22180190682411194, "learning_rate": 1.8702728019708237e-07, "loss": 0.2077, "step": 26621, "teacher_loss": 0.20616436004638672 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.7415960431098938, "learning_rate": 1.8666990661671214e-07, "loss": 0.2228, "step": 26622, "teacher_loss": 0.16519324481487274 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.33962446451187134, "learning_rate": 1.8631287266158649e-07, "loss": 0.2096, "step": 26623, "teacher_loss": 0.1951010823249817 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.557237982749939, "learning_rate": 1.859561783398922e-07, "loss": 0.2383, "step": 26624, "teacher_loss": 0.2028655707836151 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.15978658199310303, "learning_rate": 1.8559982365980943e-07, "loss": 0.167, "step": 26625, "teacher_loss": 0.16779188811779022 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.24664750695228577, "learning_rate": 1.8524380862950663e-07, "loss": 0.1499, "step": 26626, "teacher_loss": 0.13915738463401794 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.24275988340377808, "learning_rate": 1.8488813325714558e-07, "loss": 0.1498, "step": 26627, "teacher_loss": 0.13952095806598663 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.34714895486831665, "learning_rate": 1.845327975508815e-07, "loss": 0.1662, "step": 26628, "teacher_loss": 0.14604011178016663 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.2680235207080841, "learning_rate": 1.8417780151886275e-07, "loss": 0.1744, "step": 26629, "teacher_loss": 0.16394749283790588 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.17373903095722198, "learning_rate": 1.838231451692246e-07, "loss": 0.2282, "step": 26630, "teacher_loss": 0.23427070677280426 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.5175390243530273, "learning_rate": 1.8346882851010383e-07, "loss": 0.1862, "step": 26631, "teacher_loss": 0.14937494695186615 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.3268944323062897, "learning_rate": 1.8311485154961892e-07, "loss": 0.2037, "step": 26632, "teacher_loss": 0.19006142020225525 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.34421366453170776, "learning_rate": 1.827612142958851e-07, "loss": 0.1783, "step": 26633, "teacher_loss": 0.15990768373012543 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.5900821685791016, "learning_rate": 1.824079167570142e-07, "loss": 0.1903, "step": 26634, "teacher_loss": 0.14584311842918396 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.21630124747753143, "learning_rate": 1.820549589411047e-07, "loss": 0.1252, "step": 26635, "teacher_loss": 0.11506427079439163 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.4203346371650696, "learning_rate": 1.817023408562485e-07, "loss": 0.199, "step": 26636, "teacher_loss": 0.17444080114364624 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.708977460861206, "learning_rate": 1.813500625105291e-07, "loss": 0.2219, "step": 26637, "teacher_loss": 0.1677657663822174 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 1.1075928211212158, "learning_rate": 1.8099812391202508e-07, "loss": 0.3448, "step": 26638, "teacher_loss": 0.2600868344306946 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.15651877224445343, "learning_rate": 1.806465250688033e-07, "loss": 0.1213, "step": 26639, "teacher_loss": 0.11742302775382996 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.4752455949783325, "learning_rate": 1.8029526598892565e-07, "loss": 0.2591, "step": 26640, "teacher_loss": 0.23503944277763367 }, { "compression_loss": 0.0, "epoch": 4.81, "label_loss": 0.2704382538795471, "learning_rate": 1.7994434668044735e-07, "loss": 0.2266, "step": 26641, "teacher_loss": 0.22171147167682648 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.3054630756378174, "learning_rate": 1.7959376715141195e-07, "loss": 0.1703, "step": 26642, "teacher_loss": 0.15529881417751312 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.31029456853866577, "learning_rate": 1.792435274098564e-07, "loss": 0.1749, "step": 26643, "teacher_loss": 0.15984606742858887 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.5667407512664795, "learning_rate": 1.788936274638142e-07, "loss": 0.245, "step": 26644, "teacher_loss": 0.20928195118904114 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.23693910241127014, "learning_rate": 1.7854406732130402e-07, "loss": 0.1826, "step": 26645, "teacher_loss": 0.1765764355659485 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.19595250487327576, "learning_rate": 1.7819484699034106e-07, "loss": 0.1442, "step": 26646, "teacher_loss": 0.13843604922294617 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.37882000207901, "learning_rate": 1.7784596647893059e-07, "loss": 0.1858, "step": 26647, "teacher_loss": 0.16432680189609528 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.17673683166503906, "learning_rate": 1.774974257950762e-07, "loss": 0.1408, "step": 26648, "teacher_loss": 0.13685429096221924 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.5808209776878357, "learning_rate": 1.7714922494676323e-07, "loss": 0.3513, "step": 26649, "teacher_loss": 0.32579556107521057 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.3273374140262604, "learning_rate": 1.7680136394197689e-07, "loss": 0.2337, "step": 26650, "teacher_loss": 0.22333180904388428 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.6616871356964111, "learning_rate": 1.764538427886958e-07, "loss": 0.288, "step": 26651, "teacher_loss": 0.24653145670890808 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.27064263820648193, "learning_rate": 1.7610666149488198e-07, "loss": 0.2513, "step": 26652, "teacher_loss": 0.24920299649238586 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.4270990490913391, "learning_rate": 1.75759820068499e-07, "loss": 0.1714, "step": 26653, "teacher_loss": 0.14296120405197144 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.4188765287399292, "learning_rate": 1.7541331851749885e-07, "loss": 0.2489, "step": 26654, "teacher_loss": 0.22999805212020874 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.17144450545310974, "learning_rate": 1.7506715684982522e-07, "loss": 0.1775, "step": 26655, "teacher_loss": 0.17820894718170166 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.5406796932220459, "learning_rate": 1.747213350734117e-07, "loss": 0.2812, "step": 26656, "teacher_loss": 0.2523888349533081 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.42695268988609314, "learning_rate": 1.74375853196192e-07, "loss": 0.3482, "step": 26657, "teacher_loss": 0.3394070863723755 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.4141199588775635, "learning_rate": 1.740307112260847e-07, "loss": 0.1921, "step": 26658, "teacher_loss": 0.1674213856458664 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.5092183351516724, "learning_rate": 1.7368590917100025e-07, "loss": 0.2976, "step": 26659, "teacher_loss": 0.27409225702285767 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.4832767844200134, "learning_rate": 1.7334144703884726e-07, "loss": 0.1884, "step": 26660, "teacher_loss": 0.15559455752372742 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.3427741527557373, "learning_rate": 1.729973248375244e-07, "loss": 0.1403, "step": 26661, "teacher_loss": 0.11775478720664978 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.4078224301338196, "learning_rate": 1.7265354257491706e-07, "loss": 0.3148, "step": 26662, "teacher_loss": 0.3044697344303131 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.13483723998069763, "learning_rate": 1.723101002589089e-07, "loss": 0.1592, "step": 26663, "teacher_loss": 0.16192759573459625 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.2916278541088104, "learning_rate": 1.7196699789737535e-07, "loss": 0.2142, "step": 26664, "teacher_loss": 0.20564445853233337 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 1.108000636100769, "learning_rate": 1.7162423549818175e-07, "loss": 0.2881, "step": 26665, "teacher_loss": 0.19698545336723328 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.3294225335121155, "learning_rate": 1.7128181306918512e-07, "loss": 0.1887, "step": 26666, "teacher_loss": 0.17303289473056793 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.3252015709877014, "learning_rate": 1.7093973061824087e-07, "loss": 0.2123, "step": 26667, "teacher_loss": 0.1998099386692047 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.2935095429420471, "learning_rate": 1.7059798815318605e-07, "loss": 0.1733, "step": 26668, "teacher_loss": 0.15995007753372192 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.5394579172134399, "learning_rate": 1.7025658568185942e-07, "loss": 0.2341, "step": 26669, "teacher_loss": 0.2001829743385315 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.44556427001953125, "learning_rate": 1.6991552321208804e-07, "loss": 0.1831, "step": 26670, "teacher_loss": 0.15393027663230896 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.664936900138855, "learning_rate": 1.6957480075168896e-07, "loss": 0.2308, "step": 26671, "teacher_loss": 0.18256829679012299 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.5034996271133423, "learning_rate": 1.6923441830847765e-07, "loss": 0.3355, "step": 26672, "teacher_loss": 0.3168873190879822 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.291450560092926, "learning_rate": 1.6889437589025613e-07, "loss": 0.2241, "step": 26673, "teacher_loss": 0.216669499874115 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.9409869909286499, "learning_rate": 1.685546735048199e-07, "loss": 0.3544, "step": 26674, "teacher_loss": 0.289250910282135 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.31029993295669556, "learning_rate": 1.682153111599577e-07, "loss": 0.1713, "step": 26675, "teacher_loss": 0.1558365374803543 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.2216094434261322, "learning_rate": 1.6787628886345162e-07, "loss": 0.1747, "step": 26676, "teacher_loss": 0.16944356262683868 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.2281738817691803, "learning_rate": 1.6753760662307217e-07, "loss": 0.1549, "step": 26677, "teacher_loss": 0.146717369556427 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.3380930423736572, "learning_rate": 1.6719926444658472e-07, "loss": 0.213, "step": 26678, "teacher_loss": 0.19913268089294434 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.37729179859161377, "learning_rate": 1.6686126234174981e-07, "loss": 0.1871, "step": 26679, "teacher_loss": 0.16599278151988983 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.2691551148891449, "learning_rate": 1.665236003163112e-07, "loss": 0.19, "step": 26680, "teacher_loss": 0.1811722218990326 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.41396981477737427, "learning_rate": 1.6618627837801437e-07, "loss": 0.1885, "step": 26681, "teacher_loss": 0.1634656935930252 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.20239734649658203, "learning_rate": 1.6584929653459313e-07, "loss": 0.1714, "step": 26682, "teacher_loss": 0.16795362532138824 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.18324315547943115, "learning_rate": 1.655126547937713e-07, "loss": 0.1732, "step": 26683, "teacher_loss": 0.1720539927482605 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.2754879891872406, "learning_rate": 1.6517635316326772e-07, "loss": 0.1398, "step": 26684, "teacher_loss": 0.1247037947177887 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.2101212441921234, "learning_rate": 1.6484039165079455e-07, "loss": 0.1703, "step": 26685, "teacher_loss": 0.16590210795402527 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.6218718886375427, "learning_rate": 1.6450477026405232e-07, "loss": 0.233, "step": 26686, "teacher_loss": 0.18975883722305298 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.9356488585472107, "learning_rate": 1.6416948901073648e-07, "loss": 0.3311, "step": 26687, "teacher_loss": 0.26387500762939453 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.7857467532157898, "learning_rate": 1.6383454789853425e-07, "loss": 0.3579, "step": 26688, "teacher_loss": 0.3104079067707062 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.15420745313167572, "learning_rate": 1.634999469351245e-07, "loss": 0.2001, "step": 26689, "teacher_loss": 0.20521730184555054 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.5287690758705139, "learning_rate": 1.631656861281794e-07, "loss": 0.1781, "step": 26690, "teacher_loss": 0.13909929990768433 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.41596531867980957, "learning_rate": 1.6283176548536283e-07, "loss": 0.1924, "step": 26691, "teacher_loss": 0.16759786009788513 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.12350337207317352, "learning_rate": 1.6249818501432866e-07, "loss": 0.1363, "step": 26692, "teacher_loss": 0.1377372443675995 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.24458357691764832, "learning_rate": 1.621649447227258e-07, "loss": 0.1729, "step": 26693, "teacher_loss": 0.1649162769317627 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.2510814964771271, "learning_rate": 1.6183204461819478e-07, "loss": 0.2103, "step": 26694, "teacher_loss": 0.20580416917800903 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.6063018441200256, "learning_rate": 1.6149948470836785e-07, "loss": 0.2943, "step": 26695, "teacher_loss": 0.2595962882041931 }, { "compression_loss": 0.0, "epoch": 4.82, "label_loss": 0.33197978138923645, "learning_rate": 1.6116726500087052e-07, "loss": 0.1793, "step": 26696, "teacher_loss": 0.162329763174057 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.5743714570999146, "learning_rate": 1.608353855033168e-07, "loss": 0.2847, "step": 26697, "teacher_loss": 0.25247690081596375 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.20145922899246216, "learning_rate": 1.6050384622331882e-07, "loss": 0.1412, "step": 26698, "teacher_loss": 0.1345311403274536 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.33690956234931946, "learning_rate": 1.6017264716847557e-07, "loss": 0.1714, "step": 26699, "teacher_loss": 0.15300868451595306 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.44958004355430603, "learning_rate": 1.5984178834638264e-07, "loss": 0.2363, "step": 26700, "teacher_loss": 0.21259824931621552 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.3916698098182678, "learning_rate": 1.5951126976462392e-07, "loss": 0.1956, "step": 26701, "teacher_loss": 0.17384150624275208 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.5447748899459839, "learning_rate": 1.5918109143077842e-07, "loss": 0.184, "step": 26702, "teacher_loss": 0.14386287331581116 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.36514341831207275, "learning_rate": 1.5885125335241334e-07, "loss": 0.2097, "step": 26703, "teacher_loss": 0.19239550828933716 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.6458698511123657, "learning_rate": 1.5852175553709436e-07, "loss": 0.2878, "step": 26704, "teacher_loss": 0.24798163771629333 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.2681099772453308, "learning_rate": 1.581925979923754e-07, "loss": 0.1616, "step": 26705, "teacher_loss": 0.14973688125610352 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.3242238461971283, "learning_rate": 1.5786378072580044e-07, "loss": 0.2206, "step": 26706, "teacher_loss": 0.20908290147781372 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.2812730669975281, "learning_rate": 1.575353037449101e-07, "loss": 0.1751, "step": 26707, "teacher_loss": 0.16328656673431396 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.679393470287323, "learning_rate": 1.572071670572367e-07, "loss": 0.1939, "step": 26708, "teacher_loss": 0.13993003964424133 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.4808571934700012, "learning_rate": 1.568793706703009e-07, "loss": 0.1729, "step": 26709, "teacher_loss": 0.13870534300804138 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.5350741147994995, "learning_rate": 1.5655191459161833e-07, "loss": 0.2422, "step": 26710, "teacher_loss": 0.20962196588516235 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.3469955325126648, "learning_rate": 1.5622479882869968e-07, "loss": 0.1776, "step": 26711, "teacher_loss": 0.1588045358657837 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.23887155950069427, "learning_rate": 1.5589802338904058e-07, "loss": 0.1475, "step": 26712, "teacher_loss": 0.1373380869626999 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.5115737318992615, "learning_rate": 1.5557158828013508e-07, "loss": 0.2444, "step": 26713, "teacher_loss": 0.21470607817173004 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.25544148683547974, "learning_rate": 1.5524549350946716e-07, "loss": 0.1577, "step": 26714, "teacher_loss": 0.14681033790111542 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.34575194120407104, "learning_rate": 1.549197390845142e-07, "loss": 0.2106, "step": 26715, "teacher_loss": 0.19560036063194275 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.6323339343070984, "learning_rate": 1.5459432501274184e-07, "loss": 0.2306, "step": 26716, "teacher_loss": 0.18596941232681274 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.4580984115600586, "learning_rate": 1.5426925130161417e-07, "loss": 0.2443, "step": 26717, "teacher_loss": 0.22054031491279602 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.22569376230239868, "learning_rate": 1.5394451795858355e-07, "loss": 0.1348, "step": 26718, "teacher_loss": 0.12474530935287476 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.5164756178855896, "learning_rate": 1.53620124991094e-07, "loss": 0.2107, "step": 26719, "teacher_loss": 0.17669570446014404 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.3976835012435913, "learning_rate": 1.532960724065813e-07, "loss": 0.1673, "step": 26720, "teacher_loss": 0.14166569709777832 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.5056062936782837, "learning_rate": 1.529723602124794e-07, "loss": 0.2076, "step": 26721, "teacher_loss": 0.17444908618927002 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.4073508381843567, "learning_rate": 1.5264898841620577e-07, "loss": 0.187, "step": 26722, "teacher_loss": 0.16257119178771973 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.1556430160999298, "learning_rate": 1.5232595702517616e-07, "loss": 0.1709, "step": 26723, "teacher_loss": 0.17256800830364227 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.6193772554397583, "learning_rate": 1.5200326604679794e-07, "loss": 0.3093, "step": 26724, "teacher_loss": 0.2748171091079712 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.669753909111023, "learning_rate": 1.5168091548846686e-07, "loss": 0.2303, "step": 26725, "teacher_loss": 0.18145973980426788 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.6311920881271362, "learning_rate": 1.5135890535757535e-07, "loss": 0.2521, "step": 26726, "teacher_loss": 0.20994237065315247 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.33619800209999084, "learning_rate": 1.5103723566150752e-07, "loss": 0.1732, "step": 26727, "teacher_loss": 0.1551414430141449 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.1990974247455597, "learning_rate": 1.5071590640763412e-07, "loss": 0.2076, "step": 26728, "teacher_loss": 0.20851171016693115 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.7425979375839233, "learning_rate": 1.503949176033259e-07, "loss": 0.2227, "step": 26729, "teacher_loss": 0.164947509765625 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.32283148169517517, "learning_rate": 1.500742692559387e-07, "loss": 0.2775, "step": 26730, "teacher_loss": 0.2724207043647766 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.2430761754512787, "learning_rate": 1.4975396137282827e-07, "loss": 0.1839, "step": 26731, "teacher_loss": 0.17733968794345856 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.47460079193115234, "learning_rate": 1.494339939613354e-07, "loss": 0.2338, "step": 26732, "teacher_loss": 0.207024484872818 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.582412838935852, "learning_rate": 1.491143670287959e-07, "loss": 0.2134, "step": 26733, "teacher_loss": 0.17242959141731262 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.4968038499355316, "learning_rate": 1.487950805825389e-07, "loss": 0.2068, "step": 26734, "teacher_loss": 0.1745903342962265 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.532997727394104, "learning_rate": 1.484761346298852e-07, "loss": 0.2522, "step": 26735, "teacher_loss": 0.22101366519927979 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.6254366040229797, "learning_rate": 1.481575291781473e-07, "loss": 0.2791, "step": 26736, "teacher_loss": 0.24064716696739197 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.4757692217826843, "learning_rate": 1.4783926423462769e-07, "loss": 0.248, "step": 26737, "teacher_loss": 0.22274474799633026 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.4548686742782593, "learning_rate": 1.4752133980662553e-07, "loss": 0.1997, "step": 26738, "teacher_loss": 0.17135955393314362 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.529330313205719, "learning_rate": 1.4720375590142833e-07, "loss": 0.3162, "step": 26739, "teacher_loss": 0.2925126850605011 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.30171215534210205, "learning_rate": 1.4688651252631857e-07, "loss": 0.2308, "step": 26740, "teacher_loss": 0.22292988002300262 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.4987463355064392, "learning_rate": 1.465696096885688e-07, "loss": 0.1903, "step": 26741, "teacher_loss": 0.15598398447036743 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.46644288301467896, "learning_rate": 1.4625304739544487e-07, "loss": 0.2191, "step": 26742, "teacher_loss": 0.1915905922651291 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.24813279509544373, "learning_rate": 1.4593682565420597e-07, "loss": 0.1833, "step": 26743, "teacher_loss": 0.17605122923851013 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 1.0945628881454468, "learning_rate": 1.4562094447209962e-07, "loss": 0.3875, "step": 26744, "teacher_loss": 0.30895352363586426 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.5107935667037964, "learning_rate": 1.4530540385637003e-07, "loss": 0.2381, "step": 26745, "teacher_loss": 0.20775283873081207 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 1.1282711029052734, "learning_rate": 1.4499020381424978e-07, "loss": 0.396, "step": 26746, "teacher_loss": 0.31461966037750244 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.32896384596824646, "learning_rate": 1.4467534435296638e-07, "loss": 0.2452, "step": 26747, "teacher_loss": 0.23587128520011902 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.24232040345668793, "learning_rate": 1.4436082547974073e-07, "loss": 0.1504, "step": 26748, "teacher_loss": 0.14023542404174805 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.3009645342826843, "learning_rate": 1.4404664720177874e-07, "loss": 0.1469, "step": 26749, "teacher_loss": 0.12974300980567932 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.19984322786331177, "learning_rate": 1.4373280952628963e-07, "loss": 0.1788, "step": 26750, "teacher_loss": 0.17648035287857056 }, { "epoch": 4.83, "eval_exact_match": 80.70009460737937, "eval_f1": 87.97766082430648, "step": 26750 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.39254486560821533, "learning_rate": 1.4341931246046436e-07, "loss": 0.1866, "step": 26751, "teacher_loss": 0.16370701789855957 }, { "compression_loss": 0.0, "epoch": 4.83, "label_loss": 0.4887201189994812, "learning_rate": 1.4310615601149214e-07, "loss": 0.2157, "step": 26752, "teacher_loss": 0.18532304465770721 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.5046596527099609, "learning_rate": 1.4279334018655222e-07, "loss": 0.2162, "step": 26753, "teacher_loss": 0.1841968297958374 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.2084541618824005, "learning_rate": 1.4248086499281722e-07, "loss": 0.1795, "step": 26754, "teacher_loss": 0.17626312375068665 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.28819823265075684, "learning_rate": 1.421687304374514e-07, "loss": 0.2147, "step": 26755, "teacher_loss": 0.20652630925178528 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.3354499638080597, "learning_rate": 1.41856936527609e-07, "loss": 0.1628, "step": 26756, "teacher_loss": 0.1435767114162445 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.2262590229511261, "learning_rate": 1.4154548327044104e-07, "loss": 0.1712, "step": 26757, "teacher_loss": 0.1651121973991394 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.9490615725517273, "learning_rate": 1.4123437067308843e-07, "loss": 0.3106, "step": 26758, "teacher_loss": 0.2396979182958603 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.4114399552345276, "learning_rate": 1.4092359874268047e-07, "loss": 0.1969, "step": 26759, "teacher_loss": 0.17306527495384216 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.27269312739372253, "learning_rate": 1.4061316748634479e-07, "loss": 0.1436, "step": 26760, "teacher_loss": 0.12920212745666504 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.34421443939208984, "learning_rate": 1.403030769112007e-07, "loss": 0.1914, "step": 26761, "teacher_loss": 0.17443488538265228 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.1878020316362381, "learning_rate": 1.399933270243525e-07, "loss": 0.1548, "step": 26762, "teacher_loss": 0.15115824341773987 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.40942880511283875, "learning_rate": 1.3968391783290625e-07, "loss": 0.2043, "step": 26763, "teacher_loss": 0.18147704005241394 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.4460648000240326, "learning_rate": 1.393748493439545e-07, "loss": 0.1974, "step": 26764, "teacher_loss": 0.16976875066757202 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.356748104095459, "learning_rate": 1.3906612156458332e-07, "loss": 0.192, "step": 26765, "teacher_loss": 0.17365685105323792 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.21428439021110535, "learning_rate": 1.3875773450186868e-07, "loss": 0.1825, "step": 26766, "teacher_loss": 0.17899960279464722 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.28257933259010315, "learning_rate": 1.3844968816288328e-07, "loss": 0.202, "step": 26767, "teacher_loss": 0.19306910037994385 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.37761276960372925, "learning_rate": 1.3814198255469147e-07, "loss": 0.2199, "step": 26768, "teacher_loss": 0.20238202810287476 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.5026804208755493, "learning_rate": 1.3783461768434423e-07, "loss": 0.2607, "step": 26769, "teacher_loss": 0.23379553854465485 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.5623822212219238, "learning_rate": 1.3752759355888932e-07, "loss": 0.2537, "step": 26770, "teacher_loss": 0.21943864226341248 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.33137398958206177, "learning_rate": 1.3722091018536776e-07, "loss": 0.2464, "step": 26771, "teacher_loss": 0.23698797821998596 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.2528013586997986, "learning_rate": 1.3691456757080888e-07, "loss": 0.1793, "step": 26772, "teacher_loss": 0.17111703753471375 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.227402925491333, "learning_rate": 1.3660856572223878e-07, "loss": 0.205, "step": 26773, "teacher_loss": 0.20248326659202576 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.6011565923690796, "learning_rate": 1.3630290464667016e-07, "loss": 0.274, "step": 26774, "teacher_loss": 0.2376442700624466 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.6047711372375488, "learning_rate": 1.359975843511124e-07, "loss": 0.2068, "step": 26775, "teacher_loss": 0.16255077719688416 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.31677764654159546, "learning_rate": 1.3569260484256497e-07, "loss": 0.1712, "step": 26776, "teacher_loss": 0.15498243272304535 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.7407759428024292, "learning_rate": 1.3538796612802218e-07, "loss": 0.2186, "step": 26777, "teacher_loss": 0.1606256663799286 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.2662595808506012, "learning_rate": 1.3508366821446682e-07, "loss": 0.1357, "step": 26778, "teacher_loss": 0.1211710125207901 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.4532569646835327, "learning_rate": 1.3477971110887333e-07, "loss": 0.2795, "step": 26779, "teacher_loss": 0.2601798176765442 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.7087517976760864, "learning_rate": 1.344760948182161e-07, "loss": 0.262, "step": 26780, "teacher_loss": 0.21233215928077698 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.32195156812667847, "learning_rate": 1.3417281934945125e-07, "loss": 0.2158, "step": 26781, "teacher_loss": 0.20396125316619873 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.4480322003364563, "learning_rate": 1.338698847095332e-07, "loss": 0.1972, "step": 26782, "teacher_loss": 0.16933530569076538 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.2205275148153305, "learning_rate": 1.335672909054081e-07, "loss": 0.1748, "step": 26783, "teacher_loss": 0.1696779727935791 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.5798348188400269, "learning_rate": 1.332650379440137e-07, "loss": 0.2208, "step": 26784, "teacher_loss": 0.1808832883834839 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.3848309814929962, "learning_rate": 1.3296312583227777e-07, "loss": 0.1918, "step": 26785, "teacher_loss": 0.1703270673751831 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.6933592557907104, "learning_rate": 1.326615545771248e-07, "loss": 0.2568, "step": 26786, "teacher_loss": 0.2082470953464508 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.47149622440338135, "learning_rate": 1.3236032418546762e-07, "loss": 0.2117, "step": 26787, "teacher_loss": 0.18282335996627808 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.29308730363845825, "learning_rate": 1.3205943466421234e-07, "loss": 0.2063, "step": 26788, "teacher_loss": 0.19665983319282532 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.38950711488723755, "learning_rate": 1.3175888602025676e-07, "loss": 0.1993, "step": 26789, "teacher_loss": 0.17814357578754425 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.12176640331745148, "learning_rate": 1.314586782604954e-07, "loss": 0.1542, "step": 26790, "teacher_loss": 0.15783900022506714 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.4021381735801697, "learning_rate": 1.3115881139180607e-07, "loss": 0.1973, "step": 26791, "teacher_loss": 0.17456963658332825 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.33463454246520996, "learning_rate": 1.308592854210666e-07, "loss": 0.1622, "step": 26792, "teacher_loss": 0.14306236803531647 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.5154446959495544, "learning_rate": 1.3056010035514487e-07, "loss": 0.2613, "step": 26793, "teacher_loss": 0.23306185007095337 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.39722907543182373, "learning_rate": 1.30261256200897e-07, "loss": 0.1604, "step": 26794, "teacher_loss": 0.13413554430007935 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.3304789662361145, "learning_rate": 1.2996275296517923e-07, "loss": 0.1834, "step": 26795, "teacher_loss": 0.16709625720977783 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.3725399971008301, "learning_rate": 1.2966459065483105e-07, "loss": 0.2318, "step": 26796, "teacher_loss": 0.21615807712078094 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.4241170287132263, "learning_rate": 1.2936676927669033e-07, "loss": 0.2089, "step": 26797, "teacher_loss": 0.18494050204753876 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.4959632158279419, "learning_rate": 1.290692888375866e-07, "loss": 0.2152, "step": 26798, "teacher_loss": 0.1839810609817505 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.3915627598762512, "learning_rate": 1.287721493443378e-07, "loss": 0.1549, "step": 26799, "teacher_loss": 0.12855836749076843 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.31806451082229614, "learning_rate": 1.2847535080375673e-07, "loss": 0.1812, "step": 26800, "teacher_loss": 0.16598491370677948 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.31957975029945374, "learning_rate": 1.2817889322264963e-07, "loss": 0.1816, "step": 26801, "teacher_loss": 0.1662653237581253 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.1761966347694397, "learning_rate": 1.2788277660781278e-07, "loss": 0.1459, "step": 26802, "teacher_loss": 0.14253179728984833 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.3448896110057831, "learning_rate": 1.2758700096603405e-07, "loss": 0.2201, "step": 26803, "teacher_loss": 0.2062658965587616 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.4098774194717407, "learning_rate": 1.2729156630409466e-07, "loss": 0.2139, "step": 26804, "teacher_loss": 0.19208040833473206 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.5843693017959595, "learning_rate": 1.269964726287709e-07, "loss": 0.2911, "step": 26805, "teacher_loss": 0.25848501920700073 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.45490562915802, "learning_rate": 1.2670171994682566e-07, "loss": 0.1792, "step": 26806, "teacher_loss": 0.14853361248970032 }, { "compression_loss": 0.0, "epoch": 4.84, "label_loss": 0.4464746415615082, "learning_rate": 1.264073082650169e-07, "loss": 0.2423, "step": 26807, "teacher_loss": 0.21965396404266357 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.2948860824108124, "learning_rate": 1.2611323759009585e-07, "loss": 0.1561, "step": 26808, "teacher_loss": 0.14068950712680817 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.4873923063278198, "learning_rate": 1.2581950792880382e-07, "loss": 0.3008, "step": 26809, "teacher_loss": 0.28002995252609253 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.578059196472168, "learning_rate": 1.255261192878754e-07, "loss": 0.2434, "step": 26810, "teacher_loss": 0.2061932384967804 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.8028722405433655, "learning_rate": 1.2523307167403852e-07, "loss": 0.3352, "step": 26811, "teacher_loss": 0.2832902669906616 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.6091962456703186, "learning_rate": 1.2494036509400785e-07, "loss": 0.2024, "step": 26812, "teacher_loss": 0.15720829367637634 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.10960127413272858, "learning_rate": 1.24647999554498e-07, "loss": 0.1399, "step": 26813, "teacher_loss": 0.14322137832641602 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.5605015158653259, "learning_rate": 1.2435597506221197e-07, "loss": 0.2665, "step": 26814, "teacher_loss": 0.23381273448467255 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.481201708316803, "learning_rate": 1.2406429162384436e-07, "loss": 0.2238, "step": 26815, "teacher_loss": 0.19522669911384583 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.4563407301902771, "learning_rate": 1.2377294924607986e-07, "loss": 0.2042, "step": 26816, "teacher_loss": 0.1761331707239151 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.8363826274871826, "learning_rate": 1.2348194793560143e-07, "loss": 0.283, "step": 26817, "teacher_loss": 0.22154870629310608 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.5762732028961182, "learning_rate": 1.231912876990804e-07, "loss": 0.2178, "step": 26818, "teacher_loss": 0.17797914147377014 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.3689744472503662, "learning_rate": 1.2290096854317813e-07, "loss": 0.2462, "step": 26819, "teacher_loss": 0.2325284779071808 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.25947481393814087, "learning_rate": 1.2261099047455426e-07, "loss": 0.2404, "step": 26820, "teacher_loss": 0.23828186094760895 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.3992815315723419, "learning_rate": 1.2232135349985684e-07, "loss": 0.2001, "step": 26821, "teacher_loss": 0.17792943120002747 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.5432816743850708, "learning_rate": 1.220320576257239e-07, "loss": 0.235, "step": 26822, "teacher_loss": 0.20079104602336884 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.5239790678024292, "learning_rate": 1.2174310285878842e-07, "loss": 0.2267, "step": 26823, "teacher_loss": 0.19364714622497559 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.4411095380783081, "learning_rate": 1.2145448920567847e-07, "loss": 0.2178, "step": 26824, "teacher_loss": 0.1930195689201355 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.14479069411754608, "learning_rate": 1.211662166730071e-07, "loss": 0.1255, "step": 26825, "teacher_loss": 0.12339788675308228 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.3370789885520935, "learning_rate": 1.2087828526738565e-07, "loss": 0.2304, "step": 26826, "teacher_loss": 0.21853172779083252 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.32711243629455566, "learning_rate": 1.2059069499541552e-07, "loss": 0.1873, "step": 26827, "teacher_loss": 0.17173263430595398 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.5149683356285095, "learning_rate": 1.2030344586369145e-07, "loss": 0.297, "step": 26828, "teacher_loss": 0.2728150486946106 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.2778320610523224, "learning_rate": 1.2001653787879485e-07, "loss": 0.1692, "step": 26829, "teacher_loss": 0.15709394216537476 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.7342612147331238, "learning_rate": 1.1972997104730875e-07, "loss": 0.2136, "step": 26830, "teacher_loss": 0.15571829676628113 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.3499950170516968, "learning_rate": 1.194437453757996e-07, "loss": 0.2315, "step": 26831, "teacher_loss": 0.21832574903964996 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.4568008482456207, "learning_rate": 1.1915786087083213e-07, "loss": 0.2601, "step": 26832, "teacher_loss": 0.23823484778404236 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.23635557293891907, "learning_rate": 1.1887231753895945e-07, "loss": 0.2074, "step": 26833, "teacher_loss": 0.2041378617286682 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.28120511770248413, "learning_rate": 1.1858711538672795e-07, "loss": 0.202, "step": 26834, "teacher_loss": 0.19322821497917175 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.4861868917942047, "learning_rate": 1.1830225442067744e-07, "loss": 0.2165, "step": 26835, "teacher_loss": 0.1865139603614807 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.36253222823143005, "learning_rate": 1.1801773464733934e-07, "loss": 0.2461, "step": 26836, "teacher_loss": 0.23315288126468658 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.9164342880249023, "learning_rate": 1.1773355607323678e-07, "loss": 0.296, "step": 26837, "teacher_loss": 0.22711709141731262 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.6806163191795349, "learning_rate": 1.1744971870488286e-07, "loss": 0.3816, "step": 26838, "teacher_loss": 0.3483601212501526 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.19825324416160583, "learning_rate": 1.1716622254878573e-07, "loss": 0.1121, "step": 26839, "teacher_loss": 0.1024901270866394 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.3776912987232208, "learning_rate": 1.1688306761144851e-07, "loss": 0.2678, "step": 26840, "teacher_loss": 0.25562527775764465 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.2523205876350403, "learning_rate": 1.1660025389935935e-07, "loss": 0.2139, "step": 26841, "teacher_loss": 0.20966464281082153 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.15859457850456238, "learning_rate": 1.1631778141900474e-07, "loss": 0.1547, "step": 26842, "teacher_loss": 0.15424680709838867 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.39105015993118286, "learning_rate": 1.160356501768578e-07, "loss": 0.1833, "step": 26843, "teacher_loss": 0.1602357029914856 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.8814551830291748, "learning_rate": 1.1575386017939172e-07, "loss": 0.3331, "step": 26844, "teacher_loss": 0.2721233367919922 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.29704976081848145, "learning_rate": 1.154724114330613e-07, "loss": 0.2202, "step": 26845, "teacher_loss": 0.21168142557144165 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.36234503984451294, "learning_rate": 1.1519130394432476e-07, "loss": 0.2869, "step": 26846, "teacher_loss": 0.2784821391105652 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.13989108800888062, "learning_rate": 1.1491053771962357e-07, "loss": 0.1252, "step": 26847, "teacher_loss": 0.12361115962266922 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.5687955021858215, "learning_rate": 1.1463011276539592e-07, "loss": 0.2507, "step": 26848, "teacher_loss": 0.21539413928985596 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.20778605341911316, "learning_rate": 1.1435002908807001e-07, "loss": 0.1519, "step": 26849, "teacher_loss": 0.1457262933254242 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.5297324657440186, "learning_rate": 1.1407028669407072e-07, "loss": 0.2724, "step": 26850, "teacher_loss": 0.2438240498304367 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.20228345692157745, "learning_rate": 1.1379088558980788e-07, "loss": 0.1699, "step": 26851, "teacher_loss": 0.16630682349205017 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.6676709651947021, "learning_rate": 1.1351182578168972e-07, "loss": 0.2796, "step": 26852, "teacher_loss": 0.23645912110805511 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.29342755675315857, "learning_rate": 1.132331072761128e-07, "loss": 0.163, "step": 26853, "teacher_loss": 0.14854636788368225 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.7951908111572266, "learning_rate": 1.1295473007946867e-07, "loss": 0.2842, "step": 26854, "teacher_loss": 0.22737964987754822 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.273654580116272, "learning_rate": 1.1267669419813886e-07, "loss": 0.2138, "step": 26855, "teacher_loss": 0.2071772813796997 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.3578317165374756, "learning_rate": 1.1239899963849665e-07, "loss": 0.2586, "step": 26856, "teacher_loss": 0.24753229320049286 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.2813372313976288, "learning_rate": 1.1212164640691025e-07, "loss": 0.1449, "step": 26857, "teacher_loss": 0.1296910047531128 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.41558530926704407, "learning_rate": 1.1184463450973959e-07, "loss": 0.2149, "step": 26858, "teacher_loss": 0.19255319237709045 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.5574991703033447, "learning_rate": 1.1156796395333457e-07, "loss": 0.2552, "step": 26859, "teacher_loss": 0.2216319441795349 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.16101016104221344, "learning_rate": 1.1129163474403681e-07, "loss": 0.128, "step": 26860, "teacher_loss": 0.12430883944034576 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.2909768521785736, "learning_rate": 1.1101564688818455e-07, "loss": 0.2272, "step": 26861, "teacher_loss": 0.2201298028230667 }, { "compression_loss": 0.0, "epoch": 4.85, "label_loss": 0.25344839692115784, "learning_rate": 1.1074000039210275e-07, "loss": 0.185, "step": 26862, "teacher_loss": 0.17744457721710205 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.2925449013710022, "learning_rate": 1.10464695262113e-07, "loss": 0.1655, "step": 26863, "teacher_loss": 0.1513696163892746 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.4761926233768463, "learning_rate": 1.1018973150452694e-07, "loss": 0.2378, "step": 26864, "teacher_loss": 0.2113034725189209 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.48531460762023926, "learning_rate": 1.0991510912564784e-07, "loss": 0.2693, "step": 26865, "teacher_loss": 0.2453259527683258 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.2347651720046997, "learning_rate": 1.0964082813177401e-07, "loss": 0.17, "step": 26866, "teacher_loss": 0.16285812854766846 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.3395181894302368, "learning_rate": 1.0936688852919042e-07, "loss": 0.1927, "step": 26867, "teacher_loss": 0.1764277070760727 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.28666597604751587, "learning_rate": 1.0909329032418036e-07, "loss": 0.1485, "step": 26868, "teacher_loss": 0.13313794136047363 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.4869145154953003, "learning_rate": 1.088200335230155e-07, "loss": 0.2576, "step": 26869, "teacher_loss": 0.23217150568962097 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.49133551120758057, "learning_rate": 1.085471181319625e-07, "loss": 0.2701, "step": 26870, "teacher_loss": 0.2455025017261505 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.31149953603744507, "learning_rate": 1.0827454415727633e-07, "loss": 0.1962, "step": 26871, "teacher_loss": 0.18339239060878754 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.378991037607193, "learning_rate": 1.0800231160520702e-07, "loss": 0.2426, "step": 26872, "teacher_loss": 0.22748887538909912 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.49645933508872986, "learning_rate": 1.0773042048199621e-07, "loss": 0.2306, "step": 26873, "teacher_loss": 0.20109760761260986 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.2333805114030838, "learning_rate": 1.0745887079387729e-07, "loss": 0.1726, "step": 26874, "teacher_loss": 0.16584709286689758 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.1795811951160431, "learning_rate": 1.071876625470769e-07, "loss": 0.1597, "step": 26875, "teacher_loss": 0.1574731171131134 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.6857481002807617, "learning_rate": 1.0691679574781177e-07, "loss": 0.2676, "step": 26876, "teacher_loss": 0.22109919786453247 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.3866026699542999, "learning_rate": 1.0664627040229357e-07, "loss": 0.2202, "step": 26877, "teacher_loss": 0.20175501704216003 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.4250245988368988, "learning_rate": 1.0637608651672404e-07, "loss": 0.2766, "step": 26878, "teacher_loss": 0.2601517140865326 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.6606869697570801, "learning_rate": 1.0610624409729486e-07, "loss": 0.2452, "step": 26879, "teacher_loss": 0.19901379942893982 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.2833385467529297, "learning_rate": 1.0583674315019775e-07, "loss": 0.2028, "step": 26880, "teacher_loss": 0.19384312629699707 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 1.1867039203643799, "learning_rate": 1.0556758368160946e-07, "loss": 0.3226, "step": 26881, "teacher_loss": 0.22660709917545319 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.34611034393310547, "learning_rate": 1.0529876569770002e-07, "loss": 0.1599, "step": 26882, "teacher_loss": 0.139179989695549 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.47628331184387207, "learning_rate": 1.050302892046312e-07, "loss": 0.2594, "step": 26883, "teacher_loss": 0.23525890707969666 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.6330571174621582, "learning_rate": 1.0476215420856305e-07, "loss": 0.2692, "step": 26884, "teacher_loss": 0.22881156206130981 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.6273962259292603, "learning_rate": 1.04494360715639e-07, "loss": 0.2424, "step": 26885, "teacher_loss": 0.19962969422340393 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.21046707034111023, "learning_rate": 1.042269087320008e-07, "loss": 0.1677, "step": 26886, "teacher_loss": 0.16296815872192383 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.554200291633606, "learning_rate": 1.0395979826378022e-07, "loss": 0.2614, "step": 26887, "teacher_loss": 0.22891667485237122 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.16884300112724304, "learning_rate": 1.0369302931710067e-07, "loss": 0.1522, "step": 26888, "teacher_loss": 0.15033525228500366 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.6290130615234375, "learning_rate": 1.0342660189807728e-07, "loss": 0.2803, "step": 26889, "teacher_loss": 0.24153928458690643 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.36152154207229614, "learning_rate": 1.0316051601282184e-07, "loss": 0.2117, "step": 26890, "teacher_loss": 0.19505517184734344 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.4527718126773834, "learning_rate": 1.0289477166743111e-07, "loss": 0.2129, "step": 26891, "teacher_loss": 0.18628495931625366 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.5007895827293396, "learning_rate": 1.0262936886800023e-07, "loss": 0.3523, "step": 26892, "teacher_loss": 0.33583980798721313 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.3459768295288086, "learning_rate": 1.0236430762061267e-07, "loss": 0.2065, "step": 26893, "teacher_loss": 0.1910001039505005 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.3753030300140381, "learning_rate": 1.0209958793134688e-07, "loss": 0.1867, "step": 26894, "teacher_loss": 0.16571786999702454 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.37625032663345337, "learning_rate": 1.018352098062697e-07, "loss": 0.1983, "step": 26895, "teacher_loss": 0.1785639375448227 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.22999843955039978, "learning_rate": 1.015711732514446e-07, "loss": 0.1813, "step": 26896, "teacher_loss": 0.1758955717086792 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.4910023808479309, "learning_rate": 1.0130747827292674e-07, "loss": 0.2718, "step": 26897, "teacher_loss": 0.24744448065757751 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.35166680812835693, "learning_rate": 1.0104412487675796e-07, "loss": 0.1826, "step": 26898, "teacher_loss": 0.16386157274246216 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.6086627244949341, "learning_rate": 1.0078111306897841e-07, "loss": 0.1905, "step": 26899, "teacher_loss": 0.14408454298973083 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.4664129614830017, "learning_rate": 1.0051844285561662e-07, "loss": 0.2025, "step": 26900, "teacher_loss": 0.17314422130584717 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.33844536542892456, "learning_rate": 1.0025611424269775e-07, "loss": 0.1985, "step": 26901, "teacher_loss": 0.18290764093399048 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.2469913810491562, "learning_rate": 9.999412723623369e-08, "loss": 0.2363, "step": 26902, "teacher_loss": 0.23509052395820618 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.21917571127414703, "learning_rate": 9.973248184223127e-08, "loss": 0.199, "step": 26903, "teacher_loss": 0.196795254945755 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.6069926023483276, "learning_rate": 9.94711780666907e-08, "loss": 0.23, "step": 26904, "teacher_loss": 0.18806637823581696 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.2098299264907837, "learning_rate": 9.92102159156022e-08, "loss": 0.1354, "step": 26905, "teacher_loss": 0.12716060876846313 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.3721007704734802, "learning_rate": 9.894959539494764e-08, "loss": 0.1739, "step": 26906, "teacher_loss": 0.15183955430984497 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.5748435258865356, "learning_rate": 9.86893165107039e-08, "loss": 0.238, "step": 26907, "teacher_loss": 0.20058538019657135 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.48926517367362976, "learning_rate": 9.842937926883621e-08, "loss": 0.2206, "step": 26908, "teacher_loss": 0.19075724482536316 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 1.1773569583892822, "learning_rate": 9.816978367530649e-08, "loss": 0.2969, "step": 26909, "teacher_loss": 0.199039489030838 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.3033674359321594, "learning_rate": 9.791052973606662e-08, "loss": 0.1878, "step": 26910, "teacher_loss": 0.17495176196098328 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.5259586572647095, "learning_rate": 9.765161745705852e-08, "loss": 0.302, "step": 26911, "teacher_loss": 0.27715086936950684 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.28036826848983765, "learning_rate": 9.739304684421913e-08, "loss": 0.212, "step": 26912, "teacher_loss": 0.20440194010734558 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.49443504214286804, "learning_rate": 9.7134817903477e-08, "loss": 0.2623, "step": 26913, "teacher_loss": 0.23647359013557434 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.2858453392982483, "learning_rate": 9.687693064075243e-08, "loss": 0.2268, "step": 26914, "teacher_loss": 0.2202746570110321 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.6475076079368591, "learning_rate": 9.6619385061959e-08, "loss": 0.2637, "step": 26915, "teacher_loss": 0.22107955813407898 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.292072057723999, "learning_rate": 9.636218117299867e-08, "loss": 0.2365, "step": 26916, "teacher_loss": 0.23037467896938324 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.7503155469894409, "learning_rate": 9.610531897977004e-08, "loss": 0.2528, "step": 26917, "teacher_loss": 0.1975124329328537 }, { "compression_loss": 0.0, "epoch": 4.86, "label_loss": 0.5498650074005127, "learning_rate": 9.584879848816341e-08, "loss": 0.2034, "step": 26918, "teacher_loss": 0.16485415399074554 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.42921704053878784, "learning_rate": 9.559261970405908e-08, "loss": 0.2402, "step": 26919, "teacher_loss": 0.21922513842582703 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.23813998699188232, "learning_rate": 9.533678263332901e-08, "loss": 0.1897, "step": 26920, "teacher_loss": 0.1843372881412506 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.5730845332145691, "learning_rate": 9.508128728184185e-08, "loss": 0.2284, "step": 26921, "teacher_loss": 0.19011476635932922 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.4014793634414673, "learning_rate": 9.482613365545123e-08, "loss": 0.2667, "step": 26922, "teacher_loss": 0.2517184913158417 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.5667757987976074, "learning_rate": 9.457132176001081e-08, "loss": 0.2403, "step": 26923, "teacher_loss": 0.20404933393001556 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.6729482412338257, "learning_rate": 9.431685160136094e-08, "loss": 0.2244, "step": 26924, "teacher_loss": 0.17452484369277954 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.21187181770801544, "learning_rate": 9.406272318533692e-08, "loss": 0.2029, "step": 26925, "teacher_loss": 0.2019166648387909 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.24084073305130005, "learning_rate": 9.380893651776412e-08, "loss": 0.2205, "step": 26926, "teacher_loss": 0.21822954714298248 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.3511536717414856, "learning_rate": 9.355549160445953e-08, "loss": 0.2424, "step": 26927, "teacher_loss": 0.2303141951560974 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.34826019406318665, "learning_rate": 9.330238845123685e-08, "loss": 0.2061, "step": 26928, "teacher_loss": 0.19033831357955933 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.19356344640254974, "learning_rate": 9.304962706389807e-08, "loss": 0.1587, "step": 26929, "teacher_loss": 0.15479756891727448 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.22328755259513855, "learning_rate": 9.279720744823695e-08, "loss": 0.2216, "step": 26930, "teacher_loss": 0.22145822644233704 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.3461264967918396, "learning_rate": 9.25451296100438e-08, "loss": 0.2568, "step": 26931, "teacher_loss": 0.24685505032539368 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.28883126378059387, "learning_rate": 9.229339355509403e-08, "loss": 0.1769, "step": 26932, "teacher_loss": 0.16442866623401642 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.3900011479854584, "learning_rate": 9.204199928916135e-08, "loss": 0.1523, "step": 26933, "teacher_loss": 0.12585732340812683 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.24980461597442627, "learning_rate": 9.179094681800948e-08, "loss": 0.211, "step": 26934, "teacher_loss": 0.20663785934448242 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.27025848627090454, "learning_rate": 9.154023614739382e-08, "loss": 0.1659, "step": 26935, "teacher_loss": 0.15429073572158813 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.22401012480258942, "learning_rate": 9.12898672830631e-08, "loss": 0.1832, "step": 26936, "teacher_loss": 0.17864039540290833 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.6324148178100586, "learning_rate": 9.103984023075773e-08, "loss": 0.2369, "step": 26937, "teacher_loss": 0.19290286302566528 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.35115230083465576, "learning_rate": 9.079015499620979e-08, "loss": 0.1596, "step": 26938, "teacher_loss": 0.1383356899023056 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.5517417788505554, "learning_rate": 9.054081158514138e-08, "loss": 0.3361, "step": 26939, "teacher_loss": 0.31210970878601074 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.30918264389038086, "learning_rate": 9.029181000327291e-08, "loss": 0.1497, "step": 26940, "teacher_loss": 0.13193444907665253 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.5207072496414185, "learning_rate": 9.004315025631149e-08, "loss": 0.2081, "step": 26941, "teacher_loss": 0.17334526777267456 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.4288703501224518, "learning_rate": 8.979483234995756e-08, "loss": 0.2422, "step": 26942, "teacher_loss": 0.221428781747818 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.5181955695152283, "learning_rate": 8.954685628990488e-08, "loss": 0.3419, "step": 26943, "teacher_loss": 0.32225918769836426 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.22621512413024902, "learning_rate": 8.92992220818406e-08, "loss": 0.1504, "step": 26944, "teacher_loss": 0.1419895589351654 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.6003322005271912, "learning_rate": 8.90519297314385e-08, "loss": 0.3517, "step": 26945, "teacher_loss": 0.3241175711154938 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.49202728271484375, "learning_rate": 8.88049792443707e-08, "loss": 0.2183, "step": 26946, "teacher_loss": 0.18785125017166138 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.582703709602356, "learning_rate": 8.855837062629935e-08, "loss": 0.2924, "step": 26947, "teacher_loss": 0.26019084453582764 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.3892633318901062, "learning_rate": 8.831210388287825e-08, "loss": 0.2255, "step": 26948, "teacher_loss": 0.20731154084205627 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 1.0840959548950195, "learning_rate": 8.806617901975122e-08, "loss": 0.3463, "step": 26949, "teacher_loss": 0.26427167654037476 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.6072372198104858, "learning_rate": 8.782059604256043e-08, "loss": 0.2492, "step": 26950, "teacher_loss": 0.20942285656929016 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.422825425863266, "learning_rate": 8.757535495693303e-08, "loss": 0.2112, "step": 26951, "teacher_loss": 0.18766620755195618 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.3810744881629944, "learning_rate": 8.733045576849286e-08, "loss": 0.2435, "step": 26952, "teacher_loss": 0.22820061445236206 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.3230610191822052, "learning_rate": 8.708589848285542e-08, "loss": 0.1597, "step": 26953, "teacher_loss": 0.1415894627571106 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.6178602576255798, "learning_rate": 8.68416831056279e-08, "loss": 0.2656, "step": 26954, "teacher_loss": 0.22648707032203674 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.23812153935432434, "learning_rate": 8.659780964240583e-08, "loss": 0.1797, "step": 26955, "teacher_loss": 0.17318286001682281 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.34047946333885193, "learning_rate": 8.635427809878638e-08, "loss": 0.264, "step": 26956, "teacher_loss": 0.2555543780326843 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.4074346423149109, "learning_rate": 8.611108848034844e-08, "loss": 0.2065, "step": 26957, "teacher_loss": 0.18419039249420166 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.29244089126586914, "learning_rate": 8.586824079266919e-08, "loss": 0.1583, "step": 26958, "teacher_loss": 0.14344489574432373 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.5036815404891968, "learning_rate": 8.562573504131588e-08, "loss": 0.2101, "step": 26959, "teacher_loss": 0.17745059728622437 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.8516224026679993, "learning_rate": 8.538357123184904e-08, "loss": 0.3237, "step": 26960, "teacher_loss": 0.26505112648010254 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.2982197403907776, "learning_rate": 8.514174936982088e-08, "loss": 0.1911, "step": 26961, "teacher_loss": 0.17919257283210754 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.31127387285232544, "learning_rate": 8.490026946077533e-08, "loss": 0.3228, "step": 26962, "teacher_loss": 0.32406747341156006 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.36412665247917175, "learning_rate": 8.465913151024962e-08, "loss": 0.1608, "step": 26963, "teacher_loss": 0.1382312774658203 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.3741481900215149, "learning_rate": 8.441833552377098e-08, "loss": 0.1796, "step": 26964, "teacher_loss": 0.1579703688621521 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.935228705406189, "learning_rate": 8.417788150686001e-08, "loss": 0.283, "step": 26965, "teacher_loss": 0.2105245292186737 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.5443079471588135, "learning_rate": 8.393776946503062e-08, "loss": 0.2345, "step": 26966, "teacher_loss": 0.20010888576507568 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.7597070932388306, "learning_rate": 8.369799940378676e-08, "loss": 0.2624, "step": 26967, "teacher_loss": 0.20718635618686676 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.2906755805015564, "learning_rate": 8.3458571328629e-08, "loss": 0.1848, "step": 26968, "teacher_loss": 0.17302021384239197 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.38782039284706116, "learning_rate": 8.321948524504131e-08, "loss": 0.2496, "step": 26969, "teacher_loss": 0.23420333862304688 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 1.0714486837387085, "learning_rate": 8.298074115850929e-08, "loss": 0.3424, "step": 26970, "teacher_loss": 0.2613499164581299 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.14442592859268188, "learning_rate": 8.274233907450523e-08, "loss": 0.1673, "step": 26971, "teacher_loss": 0.16988718509674072 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.10151538997888565, "learning_rate": 8.250427899849477e-08, "loss": 0.1439, "step": 26972, "teacher_loss": 0.14863327145576477 }, { "compression_loss": 0.0, "epoch": 4.87, "label_loss": 0.693641185760498, "learning_rate": 8.226656093593687e-08, "loss": 0.2743, "step": 26973, "teacher_loss": 0.2277432680130005 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.28727343678474426, "learning_rate": 8.202918489227885e-08, "loss": 0.2051, "step": 26974, "teacher_loss": 0.19595034420490265 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.2765795886516571, "learning_rate": 8.179215087296798e-08, "loss": 0.2029, "step": 26975, "teacher_loss": 0.1946794092655182 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.6374483704566956, "learning_rate": 8.155545888343497e-08, "loss": 0.2895, "step": 26976, "teacher_loss": 0.2508276700973511 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.3559282422065735, "learning_rate": 8.13191089291071e-08, "loss": 0.2602, "step": 26977, "teacher_loss": 0.24951744079589844 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.41212737560272217, "learning_rate": 8.10831010154034e-08, "loss": 0.1948, "step": 26978, "teacher_loss": 0.1706516444683075 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.49746596813201904, "learning_rate": 8.084743514773618e-08, "loss": 0.2473, "step": 26979, "teacher_loss": 0.2195485532283783 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.22243882715702057, "learning_rate": 8.061211133150614e-08, "loss": 0.155, "step": 26980, "teacher_loss": 0.14746588468551636 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.3271652162075043, "learning_rate": 8.037712957211063e-08, "loss": 0.183, "step": 26981, "teacher_loss": 0.16697517037391663 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.48929011821746826, "learning_rate": 8.014248987493533e-08, "loss": 0.1914, "step": 26982, "teacher_loss": 0.15829148888587952 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.40144023299217224, "learning_rate": 7.990819224535928e-08, "loss": 0.2107, "step": 26983, "teacher_loss": 0.18951021134853363 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.342797189950943, "learning_rate": 7.967423668875817e-08, "loss": 0.2168, "step": 26984, "teacher_loss": 0.2028241604566574 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.4028443694114685, "learning_rate": 7.944062321049273e-08, "loss": 0.1889, "step": 26985, "teacher_loss": 0.16509510576725006 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.4127182960510254, "learning_rate": 7.920735181591865e-08, "loss": 0.1893, "step": 26986, "teacher_loss": 0.16445937752723694 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.755481481552124, "learning_rate": 7.897442251038666e-08, "loss": 0.28, "step": 26987, "teacher_loss": 0.22712722420692444 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.3776565194129944, "learning_rate": 7.874183529923584e-08, "loss": 0.261, "step": 26988, "teacher_loss": 0.2480432093143463 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.2794235646724701, "learning_rate": 7.850959018779691e-08, "loss": 0.1912, "step": 26989, "teacher_loss": 0.18141989409923553 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.43169325590133667, "learning_rate": 7.827768718139728e-08, "loss": 0.1926, "step": 26990, "teacher_loss": 0.16603052616119385 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.3657906949520111, "learning_rate": 7.804612628535268e-08, "loss": 0.2273, "step": 26991, "teacher_loss": 0.2118605226278305 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.6287577748298645, "learning_rate": 7.781490750497056e-08, "loss": 0.2849, "step": 26992, "teacher_loss": 0.2466711550951004 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.5052841305732727, "learning_rate": 7.758403084555499e-08, "loss": 0.2622, "step": 26993, "teacher_loss": 0.23524263501167297 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.6546868681907654, "learning_rate": 7.735349631239674e-08, "loss": 0.2295, "step": 26994, "teacher_loss": 0.182245135307312 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.3491426110267639, "learning_rate": 7.712330391078326e-08, "loss": 0.2038, "step": 26995, "teacher_loss": 0.18763618171215057 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.5493866205215454, "learning_rate": 7.689345364599031e-08, "loss": 0.1901, "step": 26996, "teacher_loss": 0.15021564066410065 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.3134749233722687, "learning_rate": 7.666394552329037e-08, "loss": 0.1879, "step": 26997, "teacher_loss": 0.17394420504570007 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.6477165222167969, "learning_rate": 7.643477954794253e-08, "loss": 0.3081, "step": 26998, "teacher_loss": 0.2703627049922943 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.3517632782459259, "learning_rate": 7.620595572520262e-08, "loss": 0.1777, "step": 26999, "teacher_loss": 0.1583964228630066 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.6143736839294434, "learning_rate": 7.597747406031641e-08, "loss": 0.2618, "step": 27000, "teacher_loss": 0.22265347838401794 }, { "epoch": 4.88, "eval_exact_match": 80.61494796594134, "eval_f1": 87.85323560190852, "step": 27000 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.990003764629364, "learning_rate": 7.57493345585214e-08, "loss": 0.3204, "step": 27001, "teacher_loss": 0.24595433473587036 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.13803142309188843, "learning_rate": 7.55215372250484e-08, "loss": 0.1501, "step": 27002, "teacher_loss": 0.15142732858657837 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.3989904820919037, "learning_rate": 7.529408206512157e-08, "loss": 0.279, "step": 27003, "teacher_loss": 0.2657226026058197 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.5006909370422363, "learning_rate": 7.506696908395505e-08, "loss": 0.1878, "step": 27004, "teacher_loss": 0.15299145877361298 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.2738187313079834, "learning_rate": 7.484019828675636e-08, "loss": 0.2925, "step": 27005, "teacher_loss": 0.29454100131988525 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.28321799635887146, "learning_rate": 7.461376967872135e-08, "loss": 0.1628, "step": 27006, "teacher_loss": 0.14941895008087158 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.25728052854537964, "learning_rate": 7.438768326504752e-08, "loss": 0.206, "step": 27007, "teacher_loss": 0.20030301809310913 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.8489676117897034, "learning_rate": 7.41619390509124e-08, "loss": 0.278, "step": 27008, "teacher_loss": 0.2146041989326477 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.6287211179733276, "learning_rate": 7.393653704149351e-08, "loss": 0.3116, "step": 27009, "teacher_loss": 0.276381254196167 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.514577329158783, "learning_rate": 7.37114772419617e-08, "loss": 0.1723, "step": 27010, "teacher_loss": 0.13423390686511993 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.8977474570274353, "learning_rate": 7.348675965747286e-08, "loss": 0.2598, "step": 27011, "teacher_loss": 0.18886876106262207 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.4740731418132782, "learning_rate": 7.326238429317956e-08, "loss": 0.2073, "step": 27012, "teacher_loss": 0.17764054238796234 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.49320465326309204, "learning_rate": 7.30383511542293e-08, "loss": 0.2088, "step": 27013, "teacher_loss": 0.17717820405960083 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.2525772750377655, "learning_rate": 7.281466024575468e-08, "loss": 0.1683, "step": 27014, "teacher_loss": 0.15897008776664734 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.4661669135093689, "learning_rate": 7.259131157288656e-08, "loss": 0.2114, "step": 27015, "teacher_loss": 0.18312813341617584 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 1.1807246208190918, "learning_rate": 7.236830514074422e-08, "loss": 0.2959, "step": 27016, "teacher_loss": 0.19760479032993317 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.6378553509712219, "learning_rate": 7.214564095444187e-08, "loss": 0.226, "step": 27017, "teacher_loss": 0.18024098873138428 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.6821727156639099, "learning_rate": 7.192331901908378e-08, "loss": 0.2232, "step": 27018, "teacher_loss": 0.1722208708524704 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.4021399915218353, "learning_rate": 7.170133933976752e-08, "loss": 0.1921, "step": 27019, "teacher_loss": 0.1688055843114853 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.17300496995449066, "learning_rate": 7.147970192158237e-08, "loss": 0.2065, "step": 27020, "teacher_loss": 0.21025945246219635 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.5621718168258667, "learning_rate": 7.125840676960926e-08, "loss": 0.2409, "step": 27021, "teacher_loss": 0.20523107051849365 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.2354016900062561, "learning_rate": 7.103745388892246e-08, "loss": 0.1972, "step": 27022, "teacher_loss": 0.19298574328422546 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 1.3270000219345093, "learning_rate": 7.081684328458793e-08, "loss": 0.317, "step": 27023, "teacher_loss": 0.20477090775966644 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.7821627259254456, "learning_rate": 7.059657496166161e-08, "loss": 0.2464, "step": 27024, "teacher_loss": 0.18689489364624023 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.3820337653160095, "learning_rate": 7.037664892519613e-08, "loss": 0.2123, "step": 27025, "teacher_loss": 0.19349345564842224 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.9937379360198975, "learning_rate": 7.015706518023246e-08, "loss": 0.3254, "step": 27026, "teacher_loss": 0.251126229763031 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.38988345861434937, "learning_rate": 6.993782373180658e-08, "loss": 0.2222, "step": 27027, "teacher_loss": 0.20359401404857635 }, { "compression_loss": 0.0, "epoch": 4.88, "label_loss": 0.5343022346496582, "learning_rate": 6.971892458494277e-08, "loss": 0.1743, "step": 27028, "teacher_loss": 0.13431471586227417 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.45604053139686584, "learning_rate": 6.95003677446604e-08, "loss": 0.2591, "step": 27029, "teacher_loss": 0.2372336983680725 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.3492296636104584, "learning_rate": 6.928215321597042e-08, "loss": 0.1878, "step": 27030, "teacher_loss": 0.16985324025154114 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.5472367405891418, "learning_rate": 6.906428100387718e-08, "loss": 0.1734, "step": 27031, "teacher_loss": 0.13187339901924133 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.4447746276855469, "learning_rate": 6.884675111337335e-08, "loss": 0.2911, "step": 27032, "teacher_loss": 0.27404358983039856 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.5377547144889832, "learning_rate": 6.862956354944993e-08, "loss": 0.2554, "step": 27033, "teacher_loss": 0.2239760458469391 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.40607982873916626, "learning_rate": 6.841271831708129e-08, "loss": 0.2025, "step": 27034, "teacher_loss": 0.1798466444015503 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.43143123388290405, "learning_rate": 6.819621542124343e-08, "loss": 0.2185, "step": 27035, "teacher_loss": 0.19488167762756348 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.2815118432044983, "learning_rate": 6.79800548668974e-08, "loss": 0.1929, "step": 27036, "teacher_loss": 0.18305926024913788 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.3911646604537964, "learning_rate": 6.776423665900089e-08, "loss": 0.1731, "step": 27037, "teacher_loss": 0.1488664150238037 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.3050857186317444, "learning_rate": 6.754876080250161e-08, "loss": 0.1993, "step": 27038, "teacher_loss": 0.1875143200159073 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.7925775647163391, "learning_rate": 6.733362730233894e-08, "loss": 0.3813, "step": 27039, "teacher_loss": 0.33561527729034424 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.20236626267433167, "learning_rate": 6.71188361634456e-08, "loss": 0.1682, "step": 27040, "teacher_loss": 0.16436725854873657 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.8982300758361816, "learning_rate": 6.690438739074767e-08, "loss": 0.2628, "step": 27041, "teacher_loss": 0.19222791492938995 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.3453550934791565, "learning_rate": 6.669028098915786e-08, "loss": 0.2066, "step": 27042, "teacher_loss": 0.1911918967962265 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.11725917458534241, "learning_rate": 6.647651696358891e-08, "loss": 0.2616, "step": 27043, "teacher_loss": 0.2776561975479126 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.4788707196712494, "learning_rate": 6.626309531894025e-08, "loss": 0.2269, "step": 27044, "teacher_loss": 0.19889532029628754 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.32354623079299927, "learning_rate": 6.60500160601063e-08, "loss": 0.1736, "step": 27045, "teacher_loss": 0.15694619715213776 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.29166552424430847, "learning_rate": 6.58372791919698e-08, "loss": 0.1625, "step": 27046, "teacher_loss": 0.1481645107269287 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.49191218614578247, "learning_rate": 6.56248847194102e-08, "loss": 0.2183, "step": 27047, "teacher_loss": 0.1879333257675171 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.4695451557636261, "learning_rate": 6.541283264729525e-08, "loss": 0.2039, "step": 27048, "teacher_loss": 0.1744372546672821 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.5325410962104797, "learning_rate": 6.520112298048942e-08, "loss": 0.3994, "step": 27049, "teacher_loss": 0.38458847999572754 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.4130170941352844, "learning_rate": 6.49897557238438e-08, "loss": 0.2703, "step": 27050, "teacher_loss": 0.2543904185295105 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.4602157771587372, "learning_rate": 6.47787308822062e-08, "loss": 0.1922, "step": 27051, "teacher_loss": 0.16237199306488037 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.5678970813751221, "learning_rate": 6.456804846041442e-08, "loss": 0.1961, "step": 27052, "teacher_loss": 0.15480750799179077 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.35894322395324707, "learning_rate": 6.435770846329792e-08, "loss": 0.2468, "step": 27053, "teacher_loss": 0.23438018560409546 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.3186366558074951, "learning_rate": 6.41477108956795e-08, "loss": 0.2124, "step": 27054, "teacher_loss": 0.2006058394908905 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.15130293369293213, "learning_rate": 6.393805576237533e-08, "loss": 0.3023, "step": 27055, "teacher_loss": 0.3190426826477051 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.09584814310073853, "learning_rate": 6.372874306818988e-08, "loss": 0.1309, "step": 27056, "teacher_loss": 0.13473960757255554 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.5275307893753052, "learning_rate": 6.351977281792264e-08, "loss": 0.1815, "step": 27057, "teacher_loss": 0.1430739462375641 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.2654992938041687, "learning_rate": 6.331114501636648e-08, "loss": 0.1548, "step": 27058, "teacher_loss": 0.1424965262413025 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.40629467368125916, "learning_rate": 6.310285966830253e-08, "loss": 0.1618, "step": 27059, "teacher_loss": 0.13466358184814453 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.2601078152656555, "learning_rate": 6.289491677850701e-08, "loss": 0.2128, "step": 27060, "teacher_loss": 0.20751792192459106 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.5499464273452759, "learning_rate": 6.268731635174773e-08, "loss": 0.1907, "step": 27061, "teacher_loss": 0.15077224373817444 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.3831576406955719, "learning_rate": 6.248005839278258e-08, "loss": 0.1827, "step": 27062, "teacher_loss": 0.1604071706533432 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.33850520849227905, "learning_rate": 6.22731429063661e-08, "loss": 0.162, "step": 27063, "teacher_loss": 0.14237526059150696 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.5370922684669495, "learning_rate": 6.206656989723947e-08, "loss": 0.2261, "step": 27064, "teacher_loss": 0.1915908008813858 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.3327372074127197, "learning_rate": 6.186033937014224e-08, "loss": 0.2028, "step": 27065, "teacher_loss": 0.1883716583251953 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.4545358419418335, "learning_rate": 6.165445132979896e-08, "loss": 0.2345, "step": 27066, "teacher_loss": 0.2100464105606079 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.2022644281387329, "learning_rate": 6.144890578093088e-08, "loss": 0.1732, "step": 27067, "teacher_loss": 0.16999413073062897 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.24027806520462036, "learning_rate": 6.124370272825253e-08, "loss": 0.1929, "step": 27068, "teacher_loss": 0.1875852644443512 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.30600887537002563, "learning_rate": 6.103884217646682e-08, "loss": 0.2184, "step": 27069, "teacher_loss": 0.20863887667655945 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.2520320415496826, "learning_rate": 6.083432413027168e-08, "loss": 0.1825, "step": 27070, "teacher_loss": 0.17477816343307495 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.5499439835548401, "learning_rate": 6.0630148594355e-08, "loss": 0.2285, "step": 27071, "teacher_loss": 0.19272997975349426 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.2057221233844757, "learning_rate": 6.042631557339973e-08, "loss": 0.1563, "step": 27072, "teacher_loss": 0.1507650762796402 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.2951309382915497, "learning_rate": 6.02228250720771e-08, "loss": 0.1485, "step": 27073, "teacher_loss": 0.13221988081932068 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.3287563920021057, "learning_rate": 6.00196770950534e-08, "loss": 0.1652, "step": 27074, "teacher_loss": 0.1470545530319214 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.2450924813747406, "learning_rate": 5.981687164698491e-08, "loss": 0.1441, "step": 27075, "teacher_loss": 0.13286477327346802 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.09722648561000824, "learning_rate": 5.961440873252289e-08, "loss": 0.1207, "step": 27076, "teacher_loss": 0.12328669428825378 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.415448933839798, "learning_rate": 5.9412288356310294e-08, "loss": 0.2345, "step": 27077, "teacher_loss": 0.21435707807540894 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.300531804561615, "learning_rate": 5.921051052297843e-08, "loss": 0.152, "step": 27078, "teacher_loss": 0.13554108142852783 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.36359208822250366, "learning_rate": 5.9009075237155266e-08, "loss": 0.2028, "step": 27079, "teacher_loss": 0.18488989770412445 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.6681549549102783, "learning_rate": 5.8807982503458755e-08, "loss": 0.3453, "step": 27080, "teacher_loss": 0.3094103932380676 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.4312153458595276, "learning_rate": 5.860723232649856e-08, "loss": 0.2003, "step": 27081, "teacher_loss": 0.17463012039661407 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.3790202736854553, "learning_rate": 5.840682471087933e-08, "loss": 0.2273, "step": 27082, "teacher_loss": 0.21047043800354004 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.3613826036453247, "learning_rate": 5.8206759661194043e-08, "loss": 0.2659, "step": 27083, "teacher_loss": 0.25531256198883057 }, { "compression_loss": 0.0, "epoch": 4.89, "label_loss": 0.26578474044799805, "learning_rate": 5.800703718202904e-08, "loss": 0.1637, "step": 27084, "teacher_loss": 0.1523694097995758 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.4076429009437561, "learning_rate": 5.7807657277963997e-08, "loss": 0.2595, "step": 27085, "teacher_loss": 0.2430737316608429 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.18823029100894928, "learning_rate": 5.7608619953570254e-08, "loss": 0.1812, "step": 27086, "teacher_loss": 0.18037733435630798 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.4601285457611084, "learning_rate": 5.740992521341249e-08, "loss": 0.2035, "step": 27087, "teacher_loss": 0.17493480443954468 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.5181673765182495, "learning_rate": 5.7211573062043723e-08, "loss": 0.2652, "step": 27088, "teacher_loss": 0.2371370494365692 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.2841357886791229, "learning_rate": 5.7013563504013654e-08, "loss": 0.2089, "step": 27089, "teacher_loss": 0.20051856338977814 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.4304525852203369, "learning_rate": 5.6815896543860325e-08, "loss": 0.1733, "step": 27090, "teacher_loss": 0.14474818110466003 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.1008550375699997, "learning_rate": 5.661857218611843e-08, "loss": 0.13, "step": 27091, "teacher_loss": 0.13324522972106934 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.2582976222038269, "learning_rate": 5.6421590435307704e-08, "loss": 0.165, "step": 27092, "teacher_loss": 0.1546216458082199 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.2739855945110321, "learning_rate": 5.622495129594785e-08, "loss": 0.1696, "step": 27093, "teacher_loss": 0.1579897254705429 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.33990907669067383, "learning_rate": 5.602865477254693e-08, "loss": 0.2346, "step": 27094, "teacher_loss": 0.22290325164794922 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.3388930559158325, "learning_rate": 5.583270086960301e-08, "loss": 0.136, "step": 27095, "teacher_loss": 0.11348383128643036 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.2645907998085022, "learning_rate": 5.5637089591610846e-08, "loss": 0.35, "step": 27096, "teacher_loss": 0.3594374656677246 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.6377970576286316, "learning_rate": 5.5441820943055166e-08, "loss": 0.296, "step": 27097, "teacher_loss": 0.25803548097610474 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.24293333292007446, "learning_rate": 5.5246894928412396e-08, "loss": 0.1999, "step": 27098, "teacher_loss": 0.19516006112098694 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.40824154019355774, "learning_rate": 5.505231155215063e-08, "loss": 0.2133, "step": 27099, "teacher_loss": 0.1916186511516571 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.3670286536216736, "learning_rate": 5.485807081873295e-08, "loss": 0.2313, "step": 27100, "teacher_loss": 0.21627283096313477 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.5660703182220459, "learning_rate": 5.466417273261248e-08, "loss": 0.3151, "step": 27101, "teacher_loss": 0.28723466396331787 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.35866492986679077, "learning_rate": 5.4470617298232326e-08, "loss": 0.2766, "step": 27102, "teacher_loss": 0.2675008177757263 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.3503758907318115, "learning_rate": 5.427740452003393e-08, "loss": 0.1927, "step": 27103, "teacher_loss": 0.1751406192779541 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.33513182401657104, "learning_rate": 5.4084534402443764e-08, "loss": 0.1625, "step": 27104, "teacher_loss": 0.14326468110084534 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.7419121265411377, "learning_rate": 5.389200694988494e-08, "loss": 0.2558, "step": 27105, "teacher_loss": 0.20182159543037415 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.5563478469848633, "learning_rate": 5.369982216677227e-08, "loss": 0.168, "step": 27106, "teacher_loss": 0.12481563538312912 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.24951507151126862, "learning_rate": 5.3507980057510565e-08, "loss": 0.2229, "step": 27107, "teacher_loss": 0.21995876729488373 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.4139758348464966, "learning_rate": 5.331648062649963e-08, "loss": 0.3235, "step": 27108, "teacher_loss": 0.3134072422981262 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.4854043126106262, "learning_rate": 5.3125323878129295e-08, "loss": 0.2215, "step": 27109, "teacher_loss": 0.1921657770872116 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.2621830105781555, "learning_rate": 5.293450981678272e-08, "loss": 0.2109, "step": 27110, "teacher_loss": 0.20519176125526428 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.25472041964530945, "learning_rate": 5.2744038446833065e-08, "loss": 0.1219, "step": 27111, "teacher_loss": 0.10714872181415558 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.42488065361976624, "learning_rate": 5.2553909772648515e-08, "loss": 0.2196, "step": 27112, "teacher_loss": 0.1967361569404602 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.2881413400173187, "learning_rate": 5.2364123798588904e-08, "loss": 0.1569, "step": 27113, "teacher_loss": 0.14228162169456482 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.6166718006134033, "learning_rate": 5.217468052900409e-08, "loss": 0.232, "step": 27114, "teacher_loss": 0.18931034207344055 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.3131742775440216, "learning_rate": 5.1985579968237275e-08, "loss": 0.2143, "step": 27115, "teacher_loss": 0.20329663157463074 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.7678805589675903, "learning_rate": 5.1796822120624976e-08, "loss": 0.2132, "step": 27116, "teacher_loss": 0.15161028504371643 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.4865327477455139, "learning_rate": 5.1608406990495406e-08, "loss": 0.1846, "step": 27117, "teacher_loss": 0.15110671520233154 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.23234021663665771, "learning_rate": 5.142033458216677e-08, "loss": 0.1659, "step": 27118, "teacher_loss": 0.15851858258247375 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.17262354493141174, "learning_rate": 5.1232604899952296e-08, "loss": 0.1941, "step": 27119, "teacher_loss": 0.19650419056415558 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.37491530179977417, "learning_rate": 5.10452179481552e-08, "loss": 0.1794, "step": 27120, "teacher_loss": 0.15766984224319458 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.31125926971435547, "learning_rate": 5.085817373107204e-08, "loss": 0.2586, "step": 27121, "teacher_loss": 0.25278741121292114 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.5615568161010742, "learning_rate": 5.067147225299107e-08, "loss": 0.3053, "step": 27122, "teacher_loss": 0.2768382728099823 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.2816427946090698, "learning_rate": 5.048511351819218e-08, "loss": 0.224, "step": 27123, "teacher_loss": 0.21754002571105957 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.7559839487075806, "learning_rate": 5.0299097530948634e-08, "loss": 0.2642, "step": 27124, "teacher_loss": 0.20950773358345032 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.24950365722179413, "learning_rate": 5.0113424295525345e-08, "loss": 0.3117, "step": 27125, "teacher_loss": 0.31856244802474976 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.5206633806228638, "learning_rate": 4.9928093816180575e-08, "loss": 0.2304, "step": 27126, "teacher_loss": 0.19819790124893188 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.6451200246810913, "learning_rate": 4.974310609716093e-08, "loss": 0.2408, "step": 27127, "teacher_loss": 0.19586661458015442 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.2832808792591095, "learning_rate": 4.955846114270801e-08, "loss": 0.1484, "step": 27128, "teacher_loss": 0.13342690467834473 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.24933744966983795, "learning_rate": 4.937415895705677e-08, "loss": 0.1958, "step": 27129, "teacher_loss": 0.18988710641860962 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.558329701423645, "learning_rate": 4.919019954443216e-08, "loss": 0.2213, "step": 27130, "teacher_loss": 0.1838216483592987 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.6621742248535156, "learning_rate": 4.90065829090508e-08, "loss": 0.3836, "step": 27131, "teacher_loss": 0.3525994122028351 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.46659573912620544, "learning_rate": 4.882330905512267e-08, "loss": 0.2425, "step": 27132, "teacher_loss": 0.21763202548027039 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.5516567230224609, "learning_rate": 4.8640377986851056e-08, "loss": 0.1739, "step": 27133, "teacher_loss": 0.1319749355316162 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.26513779163360596, "learning_rate": 4.8457789708427624e-08, "loss": 0.2007, "step": 27134, "teacher_loss": 0.1935759335756302 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.6361945867538452, "learning_rate": 4.827554422404068e-08, "loss": 0.247, "step": 27135, "teacher_loss": 0.20375439524650574 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.22280755639076233, "learning_rate": 4.809364153786855e-08, "loss": 0.1739, "step": 27136, "teacher_loss": 0.16850680112838745 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.1283850520849228, "learning_rate": 4.791208165408123e-08, "loss": 0.1839, "step": 27137, "teacher_loss": 0.1901058554649353 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.5657512545585632, "learning_rate": 4.773086457684039e-08, "loss": 0.2224, "step": 27138, "teacher_loss": 0.18422625958919525 }, { "compression_loss": 0.0, "epoch": 4.9, "label_loss": 0.6997939348220825, "learning_rate": 4.754999031030105e-08, "loss": 0.2788, "step": 27139, "teacher_loss": 0.23202776908874512 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.6043301820755005, "learning_rate": 4.736945885861155e-08, "loss": 0.1919, "step": 27140, "teacher_loss": 0.14602042734622955 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.5844278931617737, "learning_rate": 4.718927022591024e-08, "loss": 0.2198, "step": 27141, "teacher_loss": 0.17931698262691498 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.3621630072593689, "learning_rate": 4.700942441632716e-08, "loss": 0.196, "step": 27142, "teacher_loss": 0.17754696309566498 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.3844059705734253, "learning_rate": 4.6829921433987344e-08, "loss": 0.2433, "step": 27143, "teacher_loss": 0.22758972644805908 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.5002199411392212, "learning_rate": 4.665076128300583e-08, "loss": 0.207, "step": 27144, "teacher_loss": 0.17446695268154144 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.42030030488967896, "learning_rate": 4.647194396748933e-08, "loss": 0.279, "step": 27145, "teacher_loss": 0.263294517993927 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.5187579393386841, "learning_rate": 4.629346949153956e-08, "loss": 0.2322, "step": 27146, "teacher_loss": 0.20037315785884857 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.22861260175704956, "learning_rate": 4.611533785924493e-08, "loss": 0.2697, "step": 27147, "teacher_loss": 0.27423131465911865 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.4743575155735016, "learning_rate": 4.593754907469216e-08, "loss": 0.2015, "step": 27148, "teacher_loss": 0.17122861742973328 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.08944901823997498, "learning_rate": 4.5760103141956333e-08, "loss": 0.1309, "step": 27149, "teacher_loss": 0.1355268955230713 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.5087370872497559, "learning_rate": 4.558300006510752e-08, "loss": 0.2153, "step": 27150, "teacher_loss": 0.18273663520812988 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.5632104873657227, "learning_rate": 4.540623984820413e-08, "loss": 0.2611, "step": 27151, "teacher_loss": 0.22755977511405945 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.34635016322135925, "learning_rate": 4.522982249529961e-08, "loss": 0.2097, "step": 27152, "teacher_loss": 0.19446712732315063 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.6652717590332031, "learning_rate": 4.505374801043738e-08, "loss": 0.2035, "step": 27153, "teacher_loss": 0.15215249359607697 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.48002541065216064, "learning_rate": 4.487801639765754e-08, "loss": 0.2178, "step": 27154, "teacher_loss": 0.18870341777801514 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.3759750723838806, "learning_rate": 4.470262766098521e-08, "loss": 0.1961, "step": 27155, "teacher_loss": 0.17610427737236023 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.26902854442596436, "learning_rate": 4.4527581804445496e-08, "loss": 0.169, "step": 27156, "teacher_loss": 0.15784238278865814 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.23932792246341705, "learning_rate": 4.4352878832046864e-08, "loss": 0.1935, "step": 27157, "teacher_loss": 0.18837206065654755 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.29822540283203125, "learning_rate": 4.417851874779944e-08, "loss": 0.2311, "step": 27158, "teacher_loss": 0.22365616261959076 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.6749979257583618, "learning_rate": 4.4004501555696685e-08, "loss": 0.1858, "step": 27159, "teacher_loss": 0.13139258325099945 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.28848907351493835, "learning_rate": 4.383082725973209e-08, "loss": 0.1624, "step": 27160, "teacher_loss": 0.14841094613075256 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.41993457078933716, "learning_rate": 4.365749586388579e-08, "loss": 0.1975, "step": 27161, "teacher_loss": 0.17279371619224548 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.2872968316078186, "learning_rate": 4.348450737212961e-08, "loss": 0.1893, "step": 27162, "teacher_loss": 0.1784074306488037 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.4330679774284363, "learning_rate": 4.3311861788433715e-08, "loss": 0.1997, "step": 27163, "teacher_loss": 0.17376607656478882 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.2939988672733307, "learning_rate": 4.313955911675327e-08, "loss": 0.1456, "step": 27164, "teacher_loss": 0.12908178567886353 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.17502306401729584, "learning_rate": 4.296759936104011e-08, "loss": 0.1326, "step": 27165, "teacher_loss": 0.12787416577339172 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.7609520554542542, "learning_rate": 4.279598252523609e-08, "loss": 0.3183, "step": 27166, "teacher_loss": 0.269065797328949 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.9372122287750244, "learning_rate": 4.262470861327805e-08, "loss": 0.4318, "step": 27167, "teacher_loss": 0.3756694197654724 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.47166770696640015, "learning_rate": 4.2453777629087865e-08, "loss": 0.3089, "step": 27168, "teacher_loss": 0.2908185124397278 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.47114723920822144, "learning_rate": 4.228318957658905e-08, "loss": 0.2058, "step": 27169, "teacher_loss": 0.17637141048908234 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.7390127778053284, "learning_rate": 4.2112944459691825e-08, "loss": 0.2214, "step": 27170, "teacher_loss": 0.1639079451560974 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.5228934288024902, "learning_rate": 4.194304228229806e-08, "loss": 0.2315, "step": 27171, "teacher_loss": 0.19910486042499542 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.38782405853271484, "learning_rate": 4.177348304830297e-08, "loss": 0.2169, "step": 27172, "teacher_loss": 0.19791549444198608 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.34291785955429077, "learning_rate": 4.160426676159679e-08, "loss": 0.2408, "step": 27173, "teacher_loss": 0.22942662239074707 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.3724207580089569, "learning_rate": 4.1435393426054737e-08, "loss": 0.2592, "step": 27174, "teacher_loss": 0.24667003750801086 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.2793569564819336, "learning_rate": 4.126686304555205e-08, "loss": 0.1621, "step": 27175, "teacher_loss": 0.14902010560035706 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.3560646176338196, "learning_rate": 4.109867562395231e-08, "loss": 0.2029, "step": 27176, "teacher_loss": 0.18584847450256348 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.37448179721832275, "learning_rate": 4.093083116511076e-08, "loss": 0.1881, "step": 27177, "teacher_loss": 0.16744334995746613 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.33217182755470276, "learning_rate": 4.0763329672874326e-08, "loss": 0.195, "step": 27178, "teacher_loss": 0.17974720895290375 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.2114766240119934, "learning_rate": 4.0596171151084936e-08, "loss": 0.2504, "step": 27179, "teacher_loss": 0.25469255447387695 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.3199319839477539, "learning_rate": 4.042935560357619e-08, "loss": 0.2101, "step": 27180, "teacher_loss": 0.19786390662193298 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.5025237202644348, "learning_rate": 4.026288303417003e-08, "loss": 0.2092, "step": 27181, "teacher_loss": 0.17661133408546448 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.42017772793769836, "learning_rate": 4.0096753446685064e-08, "loss": 0.1867, "step": 27182, "teacher_loss": 0.16074426472187042 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.29449814558029175, "learning_rate": 3.9930966844928253e-08, "loss": 0.1691, "step": 27183, "teacher_loss": 0.15511909127235413 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.7279744148254395, "learning_rate": 3.9765523232703215e-08, "loss": 0.2409, "step": 27184, "teacher_loss": 0.1867751181125641 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.340764582157135, "learning_rate": 3.960042261380026e-08, "loss": 0.1878, "step": 27185, "teacher_loss": 0.17085853219032288 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.6382331848144531, "learning_rate": 3.943566499200635e-08, "loss": 0.2579, "step": 27186, "teacher_loss": 0.21563169360160828 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.25490856170654297, "learning_rate": 3.9271250371098463e-08, "loss": 0.1662, "step": 27187, "teacher_loss": 0.15635916590690613 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.16856330633163452, "learning_rate": 3.910717875484693e-08, "loss": 0.168, "step": 27188, "teacher_loss": 0.16789212822914124 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.488853394985199, "learning_rate": 3.894345014701206e-08, "loss": 0.2074, "step": 27189, "teacher_loss": 0.1761508733034134 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.2169104814529419, "learning_rate": 3.878006455134919e-08, "loss": 0.2258, "step": 27190, "teacher_loss": 0.22680553793907166 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.4675474762916565, "learning_rate": 3.8617021971602e-08, "loss": 0.2492, "step": 27191, "teacher_loss": 0.22493846714496613 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.3398829996585846, "learning_rate": 3.845432241151081e-08, "loss": 0.1751, "step": 27192, "teacher_loss": 0.15683090686798096 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.4625120162963867, "learning_rate": 3.829196587480433e-08, "loss": 0.2003, "step": 27193, "teacher_loss": 0.17118746042251587 }, { "compression_loss": 0.0, "epoch": 4.91, "label_loss": 0.4110548496246338, "learning_rate": 3.8129952365206225e-08, "loss": 0.2211, "step": 27194, "teacher_loss": 0.19997861981391907 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.6077495813369751, "learning_rate": 3.79682818864302e-08, "loss": 0.247, "step": 27195, "teacher_loss": 0.2069500982761383 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.9043715000152588, "learning_rate": 3.7806954442183294e-08, "loss": 0.5115, "step": 27196, "teacher_loss": 0.46781814098358154 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.3793017864227295, "learning_rate": 3.764597003616421e-08, "loss": 0.1996, "step": 27197, "teacher_loss": 0.17963796854019165 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.5727404952049255, "learning_rate": 3.748532867206167e-08, "loss": 0.2572, "step": 27198, "teacher_loss": 0.2221372276544571 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.362764835357666, "learning_rate": 3.732503035356272e-08, "loss": 0.27, "step": 27199, "teacher_loss": 0.2596767842769623 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.30974021553993225, "learning_rate": 3.716507508433942e-08, "loss": 0.2873, "step": 27200, "teacher_loss": 0.28475284576416016 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.2411240041255951, "learning_rate": 3.70054628680605e-08, "loss": 0.1832, "step": 27201, "teacher_loss": 0.1767248511314392 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.3144083321094513, "learning_rate": 3.684619370838305e-08, "loss": 0.2124, "step": 27202, "teacher_loss": 0.20107178390026093 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.6034168004989624, "learning_rate": 3.668726760896246e-08, "loss": 0.2841, "step": 27203, "teacher_loss": 0.2486649751663208 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.262861043214798, "learning_rate": 3.652868457344083e-08, "loss": 0.2505, "step": 27204, "teacher_loss": 0.24909096956253052 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.5080952048301697, "learning_rate": 3.6370444605451915e-08, "loss": 0.1992, "step": 27205, "teacher_loss": 0.1648540049791336 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.465829461812973, "learning_rate": 3.621254770862614e-08, "loss": 0.2015, "step": 27206, "teacher_loss": 0.1720913052558899 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.44718748331069946, "learning_rate": 3.6054993886583955e-08, "loss": 0.2542, "step": 27207, "teacher_loss": 0.2327965497970581 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.3789772391319275, "learning_rate": 3.589778314293579e-08, "loss": 0.2049, "step": 27208, "teacher_loss": 0.185560405254364 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.3501730263233185, "learning_rate": 3.5740915481287103e-08, "loss": 0.1589, "step": 27209, "teacher_loss": 0.13759855926036835 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.8791165351867676, "learning_rate": 3.558439090523336e-08, "loss": 0.247, "step": 27210, "teacher_loss": 0.17675358057022095 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.824072003364563, "learning_rate": 3.542820941836333e-08, "loss": 0.2286, "step": 27211, "teacher_loss": 0.16247406601905823 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.5101550817489624, "learning_rate": 3.527237102425918e-08, "loss": 0.2203, "step": 27212, "teacher_loss": 0.18809807300567627 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.6795488595962524, "learning_rate": 3.5116875726493025e-08, "loss": 0.2661, "step": 27213, "teacher_loss": 0.22017788887023926 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.20727121829986572, "learning_rate": 3.4961723528630364e-08, "loss": 0.1669, "step": 27214, "teacher_loss": 0.16241078078746796 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.6690523624420166, "learning_rate": 3.480691443422668e-08, "loss": 0.2374, "step": 27215, "teacher_loss": 0.1894506812095642 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.6132190823554993, "learning_rate": 3.465244844683246e-08, "loss": 0.2261, "step": 27216, "teacher_loss": 0.18311814963817596 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.42862042784690857, "learning_rate": 3.449832556998989e-08, "loss": 0.2218, "step": 27217, "teacher_loss": 0.19878533482551575 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.4561886489391327, "learning_rate": 3.4344545807232786e-08, "loss": 0.175, "step": 27218, "teacher_loss": 0.1437852680683136 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.19526931643486023, "learning_rate": 3.419110916208501e-08, "loss": 0.1621, "step": 27219, "teacher_loss": 0.15836980938911438 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.3678662180900574, "learning_rate": 3.4038015638063743e-08, "loss": 0.1982, "step": 27220, "teacher_loss": 0.17934614419937134 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.3500927686691284, "learning_rate": 3.388526523868285e-08, "loss": 0.1992, "step": 27221, "teacher_loss": 0.18246625363826752 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.24646639823913574, "learning_rate": 3.373285796744119e-08, "loss": 0.1902, "step": 27222, "teacher_loss": 0.18399181962013245 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 1.7218382358551025, "learning_rate": 3.3580793827832634e-08, "loss": 0.3495, "step": 27223, "teacher_loss": 0.19702279567718506 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.32849419116973877, "learning_rate": 3.3429072823346066e-08, "loss": 0.1928, "step": 27224, "teacher_loss": 0.17769652605056763 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.35484859347343445, "learning_rate": 3.327769495745869e-08, "loss": 0.2428, "step": 27225, "teacher_loss": 0.23036307096481323 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.42881619930267334, "learning_rate": 3.312666023364108e-08, "loss": 0.1738, "step": 27226, "teacher_loss": 0.14551720023155212 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.3842759132385254, "learning_rate": 3.297596865535713e-08, "loss": 0.26, "step": 27227, "teacher_loss": 0.2462140917778015 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.30774304270744324, "learning_rate": 3.282562022605906e-08, "loss": 0.2255, "step": 27228, "teacher_loss": 0.2163986712694168 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.24803279340267181, "learning_rate": 3.26756149491958e-08, "loss": 0.1581, "step": 27229, "teacher_loss": 0.1480870544910431 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.3253433406352997, "learning_rate": 3.252595282820792e-08, "loss": 0.1896, "step": 27230, "teacher_loss": 0.17456519603729248 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.37638387084007263, "learning_rate": 3.237663386652434e-08, "loss": 0.1659, "step": 27231, "teacher_loss": 0.14246827363967896 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.9958248734474182, "learning_rate": 3.2227658067567334e-08, "loss": 0.3689, "step": 27232, "teacher_loss": 0.29919421672821045 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.8602573275566101, "learning_rate": 3.207902543475749e-08, "loss": 0.2294, "step": 27233, "teacher_loss": 0.1592506468296051 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.35433417558670044, "learning_rate": 3.1930735971498756e-08, "loss": 0.266, "step": 27234, "teacher_loss": 0.2561963200569153 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.7800993323326111, "learning_rate": 3.178278968119008e-08, "loss": 0.4393, "step": 27235, "teacher_loss": 0.40144577622413635 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.2342272698879242, "learning_rate": 3.163518656722542e-08, "loss": 0.1476, "step": 27236, "teacher_loss": 0.1379505693912506 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.50400710105896, "learning_rate": 3.1487926632988714e-08, "loss": 0.2241, "step": 27237, "teacher_loss": 0.19301843643188477 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.26478996872901917, "learning_rate": 3.1341009881857286e-08, "loss": 0.1641, "step": 27238, "teacher_loss": 0.15291103720664978 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.20238935947418213, "learning_rate": 3.119443631719676e-08, "loss": 0.195, "step": 27239, "teacher_loss": 0.19413542747497559 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.2665554881095886, "learning_rate": 3.1048205942369454e-08, "loss": 0.1649, "step": 27240, "teacher_loss": 0.1536218374967575 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.417416512966156, "learning_rate": 3.0902318760726024e-08, "loss": 0.2321, "step": 27241, "teacher_loss": 0.2115330696105957 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.12876467406749725, "learning_rate": 3.0756774775613786e-08, "loss": 0.1736, "step": 27242, "teacher_loss": 0.178538978099823 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.45692765712738037, "learning_rate": 3.061157399037007e-08, "loss": 0.1773, "step": 27243, "teacher_loss": 0.14623737335205078 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.2966800332069397, "learning_rate": 3.046671640832055e-08, "loss": 0.1385, "step": 27244, "teacher_loss": 0.12093670666217804 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.4930235743522644, "learning_rate": 3.032220203278924e-08, "loss": 0.2378, "step": 27245, "teacher_loss": 0.20944710075855255 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.3952440619468689, "learning_rate": 3.017803086708848e-08, "loss": 0.2334, "step": 27246, "teacher_loss": 0.21546858549118042 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.2294948399066925, "learning_rate": 3.0034202914522303e-08, "loss": 0.2007, "step": 27247, "teacher_loss": 0.19748157262802124 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.2819310128688812, "learning_rate": 2.9890718178389733e-08, "loss": 0.2609, "step": 27248, "teacher_loss": 0.2585877776145935 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.18786928057670593, "learning_rate": 2.9747576661981467e-08, "loss": 0.2069, "step": 27249, "teacher_loss": 0.2089783400297165 }, { "compression_loss": 0.0, "epoch": 4.92, "label_loss": 0.38050684332847595, "learning_rate": 2.9604778368578223e-08, "loss": 0.1872, "step": 27250, "teacher_loss": 0.1657276153564453 }, { "epoch": 4.92, "eval_exact_match": 80.51087984862819, "eval_f1": 87.73776363746359, "step": 27250 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.8388234972953796, "learning_rate": 2.9462323301452375e-08, "loss": 0.2213, "step": 27251, "teacher_loss": 0.15266813337802887 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.4925694465637207, "learning_rate": 2.9320211463872983e-08, "loss": 0.1839, "step": 27252, "teacher_loss": 0.14957721531391144 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.49059203267097473, "learning_rate": 2.9178442859095765e-08, "loss": 0.2888, "step": 27253, "teacher_loss": 0.26632705330848694 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.4242071509361267, "learning_rate": 2.903701749037313e-08, "loss": 0.2202, "step": 27254, "teacher_loss": 0.19754642248153687 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.4898921549320221, "learning_rate": 2.8895935360944147e-08, "loss": 0.1929, "step": 27255, "teacher_loss": 0.15985625982284546 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.37135374546051025, "learning_rate": 2.8755196474047896e-08, "loss": 0.2335, "step": 27256, "teacher_loss": 0.21816231310367584 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.360440731048584, "learning_rate": 2.8614800832908462e-08, "loss": 0.2309, "step": 27257, "teacher_loss": 0.21648791432380676 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.4608609974384308, "learning_rate": 2.847474844074327e-08, "loss": 0.2514, "step": 27258, "teacher_loss": 0.228127583861351 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.5668257474899292, "learning_rate": 2.8335039300768083e-08, "loss": 0.1774, "step": 27259, "teacher_loss": 0.13416814804077148 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.3504267632961273, "learning_rate": 2.819567341618201e-08, "loss": 0.2198, "step": 27260, "teacher_loss": 0.20528699457645416 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.14782309532165527, "learning_rate": 2.8056650790180826e-08, "loss": 0.1473, "step": 27261, "teacher_loss": 0.14729085564613342 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.27218472957611084, "learning_rate": 2.7917971425951984e-08, "loss": 0.1888, "step": 27262, "teacher_loss": 0.17949530482292175 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.1996544450521469, "learning_rate": 2.7779635326676266e-08, "loss": 0.1977, "step": 27263, "teacher_loss": 0.19748085737228394 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.538507342338562, "learning_rate": 2.7641642495522813e-08, "loss": 0.2077, "step": 27264, "teacher_loss": 0.17096316814422607 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.36515355110168457, "learning_rate": 2.7503992935659083e-08, "loss": 0.2115, "step": 27265, "teacher_loss": 0.1944802701473236 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.42003577947616577, "learning_rate": 2.736668665023756e-08, "loss": 0.2365, "step": 27266, "teacher_loss": 0.21614588797092438 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.1723507195711136, "learning_rate": 2.7229723642409055e-08, "loss": 0.279, "step": 27267, "teacher_loss": 0.2908739149570465 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.44696834683418274, "learning_rate": 2.709310391531106e-08, "loss": 0.2481, "step": 27268, "teacher_loss": 0.2260361909866333 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.5240640044212341, "learning_rate": 2.695682747207773e-08, "loss": 0.2191, "step": 27269, "teacher_loss": 0.18516308069229126 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.9417322874069214, "learning_rate": 2.6820894315833233e-08, "loss": 0.2403, "step": 27270, "teacher_loss": 0.16238191723823547 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.33713316917419434, "learning_rate": 2.6685304449693416e-08, "loss": 0.1532, "step": 27271, "teacher_loss": 0.1328173279762268 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.21509377658367157, "learning_rate": 2.6550057876765787e-08, "loss": 0.1929, "step": 27272, "teacher_loss": 0.19046145677566528 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.3062918782234192, "learning_rate": 2.6415154600154535e-08, "loss": 0.1417, "step": 27273, "teacher_loss": 0.12335780262947083 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.6491742134094238, "learning_rate": 2.628059462294885e-08, "loss": 0.242, "step": 27274, "teacher_loss": 0.1967647671699524 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.14101050794124603, "learning_rate": 2.6146377948236268e-08, "loss": 0.1748, "step": 27275, "teacher_loss": 0.17856170237064362 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.5065450668334961, "learning_rate": 2.601250457909432e-08, "loss": 0.3498, "step": 27276, "teacher_loss": 0.33236175775527954 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.29500824213027954, "learning_rate": 2.587897451859056e-08, "loss": 0.2013, "step": 27277, "teacher_loss": 0.19084566831588745 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.23485131561756134, "learning_rate": 2.5745787769787532e-08, "loss": 0.1705, "step": 27278, "teacher_loss": 0.1633768081665039 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.4976060390472412, "learning_rate": 2.56129443357378e-08, "loss": 0.1838, "step": 27279, "teacher_loss": 0.14889778196811676 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.2179468274116516, "learning_rate": 2.5480444219488918e-08, "loss": 0.1504, "step": 27280, "teacher_loss": 0.14291837811470032 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.15918374061584473, "learning_rate": 2.5348287424075133e-08, "loss": 0.1418, "step": 27281, "teacher_loss": 0.1398783028125763 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.3268272876739502, "learning_rate": 2.5216473952530683e-08, "loss": 0.1533, "step": 27282, "teacher_loss": 0.13404807448387146 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.8736242055892944, "learning_rate": 2.5085003807876484e-08, "loss": 0.5081, "step": 27283, "teacher_loss": 0.46744057536125183 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.26289522647857666, "learning_rate": 2.4953876993125123e-08, "loss": 0.1835, "step": 27284, "teacher_loss": 0.17472237348556519 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.667728066444397, "learning_rate": 2.4823093511282537e-08, "loss": 0.2971, "step": 27285, "teacher_loss": 0.2559564709663391 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.49340909719467163, "learning_rate": 2.469265336534965e-08, "loss": 0.2108, "step": 27286, "teacher_loss": 0.1794375479221344 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.4741397798061371, "learning_rate": 2.4562556558315742e-08, "loss": 0.2195, "step": 27287, "teacher_loss": 0.1911698579788208 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.5706460475921631, "learning_rate": 2.4432803093163425e-08, "loss": 0.2347, "step": 27288, "teacher_loss": 0.1974194496870041 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.41952651739120483, "learning_rate": 2.4303392972868653e-08, "loss": 0.1985, "step": 27289, "teacher_loss": 0.1739703267812729 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.2455453872680664, "learning_rate": 2.417432620039739e-08, "loss": 0.1589, "step": 27290, "teacher_loss": 0.14924058318138123 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.45929330587387085, "learning_rate": 2.4045602778708932e-08, "loss": 0.2442, "step": 27291, "teacher_loss": 0.22024746239185333 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.20468580722808838, "learning_rate": 2.391722271075425e-08, "loss": 0.1776, "step": 27292, "teacher_loss": 0.17459973692893982 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.33150699734687805, "learning_rate": 2.3789185999477657e-08, "loss": 0.1879, "step": 27293, "teacher_loss": 0.1719357967376709 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.5419533252716064, "learning_rate": 2.3661492647813475e-08, "loss": 0.2013, "step": 27294, "teacher_loss": 0.16350258886814117 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.28830790519714355, "learning_rate": 2.353414265868936e-08, "loss": 0.225, "step": 27295, "teacher_loss": 0.21791645884513855 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.19991937279701233, "learning_rate": 2.340713603502631e-08, "loss": 0.1389, "step": 27296, "teacher_loss": 0.13212768733501434 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.57981276512146, "learning_rate": 2.3280472779736996e-08, "loss": 0.2674, "step": 27297, "teacher_loss": 0.2326354831457138 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.44037240743637085, "learning_rate": 2.315415289572076e-08, "loss": 0.2649, "step": 27298, "teacher_loss": 0.24537897109985352 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.5216214060783386, "learning_rate": 2.302817638587862e-08, "loss": 0.2915, "step": 27299, "teacher_loss": 0.2659744322299957 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.5061221122741699, "learning_rate": 2.2902543253098262e-08, "loss": 0.2168, "step": 27300, "teacher_loss": 0.18465137481689453 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.4794410765171051, "learning_rate": 2.2777253500257388e-08, "loss": 0.2355, "step": 27301, "teacher_loss": 0.2083888053894043 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.9210695028305054, "learning_rate": 2.26523071302287e-08, "loss": 0.4524, "step": 27302, "teacher_loss": 0.4003167152404785 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.3906829357147217, "learning_rate": 2.2527704145879902e-08, "loss": 0.179, "step": 27303, "teacher_loss": 0.15546326339244843 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.2119235098361969, "learning_rate": 2.2403444550067042e-08, "loss": 0.1963, "step": 27304, "teacher_loss": 0.19453731179237366 }, { "compression_loss": 0.0, "epoch": 4.93, "label_loss": 0.37741318345069885, "learning_rate": 2.2279528345636178e-08, "loss": 0.1973, "step": 27305, "teacher_loss": 0.17729097604751587 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.6084326505661011, "learning_rate": 2.2155955535431703e-08, "loss": 0.2576, "step": 27306, "teacher_loss": 0.2185666263103485 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.46420708298683167, "learning_rate": 2.2032726122284686e-08, "loss": 0.165, "step": 27307, "teacher_loss": 0.1317114531993866 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.533197283744812, "learning_rate": 2.190984010901953e-08, "loss": 0.2394, "step": 27308, "teacher_loss": 0.20678508281707764 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.2281150221824646, "learning_rate": 2.1787297498457314e-08, "loss": 0.1733, "step": 27309, "teacher_loss": 0.16721247136592865 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.17798678576946259, "learning_rate": 2.1665098293404127e-08, "loss": 0.1612, "step": 27310, "teacher_loss": 0.15930208563804626 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.7696337103843689, "learning_rate": 2.154324249666273e-08, "loss": 0.2566, "step": 27311, "teacher_loss": 0.19963182508945465 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.20741623640060425, "learning_rate": 2.1421730111027548e-08, "loss": 0.1995, "step": 27312, "teacher_loss": 0.19857752323150635 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.5982731580734253, "learning_rate": 2.1300561139283026e-08, "loss": 0.5331, "step": 27313, "teacher_loss": 0.5258342027664185 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.22416728734970093, "learning_rate": 2.117973558421027e-08, "loss": 0.1494, "step": 27314, "teacher_loss": 0.14107252657413483 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.28178322315216064, "learning_rate": 2.10592534485754e-08, "loss": 0.1895, "step": 27315, "teacher_loss": 0.17920875549316406 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.17816588282585144, "learning_rate": 2.0939114735142872e-08, "loss": 0.1386, "step": 27316, "teacher_loss": 0.1341525912284851 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.3682921230792999, "learning_rate": 2.081931944666715e-08, "loss": 0.2081, "step": 27317, "teacher_loss": 0.19030600786209106 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.4814732074737549, "learning_rate": 2.069986758589437e-08, "loss": 0.2116, "step": 27318, "teacher_loss": 0.1815616339445114 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.37017959356307983, "learning_rate": 2.058075915556401e-08, "loss": 0.2564, "step": 27319, "teacher_loss": 0.243787944316864 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.6033472418785095, "learning_rate": 2.0461994158407214e-08, "loss": 0.213, "step": 27320, "teacher_loss": 0.16968348622322083 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.5793178677558899, "learning_rate": 2.0343572597143477e-08, "loss": 0.2384, "step": 27321, "teacher_loss": 0.20055986940860748 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.189001202583313, "learning_rate": 2.022549447449229e-08, "loss": 0.1604, "step": 27322, "teacher_loss": 0.15716800093650818 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.3469178080558777, "learning_rate": 2.0107759793158154e-08, "loss": 0.2323, "step": 27323, "teacher_loss": 0.21959000825881958 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.3674091696739197, "learning_rate": 1.9990368555840577e-08, "loss": 0.1397, "step": 27324, "teacher_loss": 0.11439620703458786 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.232811838388443, "learning_rate": 1.9873320765230742e-08, "loss": 0.1533, "step": 27325, "teacher_loss": 0.1444646269083023 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.528118371963501, "learning_rate": 1.975661642401483e-08, "loss": 0.2265, "step": 27326, "teacher_loss": 0.19294780492782593 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.5963113307952881, "learning_rate": 1.9640255534865702e-08, "loss": 0.2207, "step": 27327, "teacher_loss": 0.17899803817272186 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.40467405319213867, "learning_rate": 1.9524238100451232e-08, "loss": 0.1528, "step": 27328, "teacher_loss": 0.12479636818170547 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.296396404504776, "learning_rate": 1.9408564123432614e-08, "loss": 0.1448, "step": 27329, "teacher_loss": 0.128006249666214 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.18449819087982178, "learning_rate": 1.9293233606462733e-08, "loss": 0.16, "step": 27330, "teacher_loss": 0.15726742148399353 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.28699901700019836, "learning_rate": 1.917824655218281e-08, "loss": 0.2016, "step": 27331, "teacher_loss": 0.19209644198417664 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.4118339419364929, "learning_rate": 1.9063602963232395e-08, "loss": 0.1764, "step": 27332, "teacher_loss": 0.15021434426307678 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.31176403164863586, "learning_rate": 1.8949302842237725e-08, "loss": 0.2277, "step": 27333, "teacher_loss": 0.21831128001213074 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.1907312273979187, "learning_rate": 1.8835346191820035e-08, "loss": 0.1671, "step": 27334, "teacher_loss": 0.16442441940307617 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.2841472923755646, "learning_rate": 1.872173301459057e-08, "loss": 0.147, "step": 27335, "teacher_loss": 0.13173261284828186 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.31766629219055176, "learning_rate": 1.860846331315724e-08, "loss": 0.2093, "step": 27336, "teacher_loss": 0.1972164809703827 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.4394126832485199, "learning_rate": 1.8495537090114644e-08, "loss": 0.1867, "step": 27337, "teacher_loss": 0.15859206020832062 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.7279329299926758, "learning_rate": 1.838295434805404e-08, "loss": 0.2006, "step": 27338, "teacher_loss": 0.14203011989593506 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.6464483141899109, "learning_rate": 1.827071508955336e-08, "loss": 0.219, "step": 27339, "teacher_loss": 0.171522319316864 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.4175868630409241, "learning_rate": 1.815881931718888e-08, "loss": 0.2476, "step": 27340, "teacher_loss": 0.2287532389163971 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.30160772800445557, "learning_rate": 1.804726703352355e-08, "loss": 0.2044, "step": 27341, "teacher_loss": 0.19355884194374084 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.48113200068473816, "learning_rate": 1.7936058241115328e-08, "loss": 0.2018, "step": 27342, "teacher_loss": 0.17076489329338074 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.32314589619636536, "learning_rate": 1.7825192942517166e-08, "loss": 0.2076, "step": 27343, "teacher_loss": 0.1947435438632965 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.2733955383300781, "learning_rate": 1.7714671140267036e-08, "loss": 0.1969, "step": 27344, "teacher_loss": 0.18837718665599823 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.34554824233055115, "learning_rate": 1.7604492836901243e-08, "loss": 0.1941, "step": 27345, "teacher_loss": 0.1773087978363037 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.27896997332572937, "learning_rate": 1.7494658034942768e-08, "loss": 0.2094, "step": 27346, "teacher_loss": 0.2017216980457306 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.6373122930526733, "learning_rate": 1.7385166736914594e-08, "loss": 0.2429, "step": 27347, "teacher_loss": 0.19912102818489075 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.4754981994628906, "learning_rate": 1.7276018945323045e-08, "loss": 0.1722, "step": 27348, "teacher_loss": 0.13851326704025269 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.2820902466773987, "learning_rate": 1.7167214662671127e-08, "loss": 0.1475, "step": 27349, "teacher_loss": 0.1325712949037552 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.30252161622047424, "learning_rate": 1.705875389145517e-08, "loss": 0.1764, "step": 27350, "teacher_loss": 0.16242536902427673 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.4487345218658447, "learning_rate": 1.6950636634159857e-08, "loss": 0.1719, "step": 27351, "teacher_loss": 0.1411798894405365 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.8015564680099487, "learning_rate": 1.684286289326653e-08, "loss": 0.3717, "step": 27352, "teacher_loss": 0.32398757338523865 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.5629347562789917, "learning_rate": 1.6735432671243223e-08, "loss": 0.2357, "step": 27353, "teacher_loss": 0.1993437111377716 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.5005576610565186, "learning_rate": 1.6628345970554626e-08, "loss": 0.1913, "step": 27354, "teacher_loss": 0.15688841044902802 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.324212908744812, "learning_rate": 1.6521602793655445e-08, "loss": 0.2874, "step": 27355, "teacher_loss": 0.2833639085292816 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 1.059966802597046, "learning_rate": 1.6415203142993717e-08, "loss": 0.3559, "step": 27356, "teacher_loss": 0.27763837575912476 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.29694128036499023, "learning_rate": 1.6309147021007497e-08, "loss": 0.2624, "step": 27357, "teacher_loss": 0.25850945711135864 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.4783290922641754, "learning_rate": 1.6203434430129838e-08, "loss": 0.236, "step": 27358, "teacher_loss": 0.20907726883888245 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.3703649044036865, "learning_rate": 1.6098065372782132e-08, "loss": 0.1938, "step": 27359, "teacher_loss": 0.17415931820869446 }, { "compression_loss": 0.0, "epoch": 4.94, "label_loss": 0.32406532764434814, "learning_rate": 1.5993039851384118e-08, "loss": 0.1995, "step": 27360, "teacher_loss": 0.18562602996826172 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.5709090828895569, "learning_rate": 1.5888357868338864e-08, "loss": 0.2427, "step": 27361, "teacher_loss": 0.20626124739646912 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.1959119439125061, "learning_rate": 1.578401942605112e-08, "loss": 0.1845, "step": 27362, "teacher_loss": 0.18326109647750854 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 1.2141883373260498, "learning_rate": 1.5680024526908974e-08, "loss": 0.3051, "step": 27363, "teacher_loss": 0.20411217212677002 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.42922112345695496, "learning_rate": 1.557637317329885e-08, "loss": 0.1626, "step": 27364, "teacher_loss": 0.13293735682964325 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.3733193874359131, "learning_rate": 1.5473065367597185e-08, "loss": 0.2167, "step": 27365, "teacher_loss": 0.1993289440870285 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.3852805495262146, "learning_rate": 1.5370101112172074e-08, "loss": 0.1731, "step": 27366, "teacher_loss": 0.14955738186836243 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.24276641011238098, "learning_rate": 1.526748040938497e-08, "loss": 0.1581, "step": 27367, "teacher_loss": 0.1487015336751938 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.2290360927581787, "learning_rate": 1.5165203261587325e-08, "loss": 0.1352, "step": 27368, "teacher_loss": 0.12481711804866791 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.10584958642721176, "learning_rate": 1.506326967112559e-08, "loss": 0.1363, "step": 27369, "teacher_loss": 0.13973398506641388 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.6088733077049255, "learning_rate": 1.496167964033457e-08, "loss": 0.2209, "step": 27370, "teacher_loss": 0.17783257365226746 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.5373728275299072, "learning_rate": 1.486043317154573e-08, "loss": 0.2738, "step": 27371, "teacher_loss": 0.24447016417980194 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.4337577223777771, "learning_rate": 1.475953026707888e-08, "loss": 0.1811, "step": 27372, "teacher_loss": 0.15305882692337036 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.21538357436656952, "learning_rate": 1.4658970929248838e-08, "loss": 0.1546, "step": 27373, "teacher_loss": 0.14779934287071228 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.3028721213340759, "learning_rate": 1.4558755160358762e-08, "loss": 0.1623, "step": 27374, "teacher_loss": 0.1466514617204666 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.42742568254470825, "learning_rate": 1.4458882962708476e-08, "loss": 0.2361, "step": 27375, "teacher_loss": 0.21479959785938263 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.6747928857803345, "learning_rate": 1.4359354338587815e-08, "loss": 0.3177, "step": 27376, "teacher_loss": 0.2780238389968872 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.2036166489124298, "learning_rate": 1.4260169290276625e-08, "loss": 0.1578, "step": 27377, "teacher_loss": 0.15270090103149414 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.3436180353164673, "learning_rate": 1.4161327820049752e-08, "loss": 0.1959, "step": 27378, "teacher_loss": 0.17950814962387085 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.44679689407348633, "learning_rate": 1.406282993017538e-08, "loss": 0.205, "step": 27379, "teacher_loss": 0.17810294032096863 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.5852981805801392, "learning_rate": 1.396467562291004e-08, "loss": 0.1945, "step": 27380, "teacher_loss": 0.1511288434267044 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.3098621368408203, "learning_rate": 1.386686490050193e-08, "loss": 0.1761, "step": 27381, "teacher_loss": 0.16123417019844055 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.5501828193664551, "learning_rate": 1.3769397765197589e-08, "loss": 0.2417, "step": 27382, "teacher_loss": 0.207429438829422 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.9767274856567383, "learning_rate": 1.367227421923023e-08, "loss": 0.2661, "step": 27383, "teacher_loss": 0.18715143203735352 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.4967823028564453, "learning_rate": 1.3575494264826404e-08, "loss": 0.1847, "step": 27384, "teacher_loss": 0.15003859996795654 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.3808002173900604, "learning_rate": 1.3479057904204339e-08, "loss": 0.1778, "step": 27385, "teacher_loss": 0.15529832243919373 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.4258047342300415, "learning_rate": 1.3382965139575598e-08, "loss": 0.2355, "step": 27386, "teacher_loss": 0.21433480083942413 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.4240236282348633, "learning_rate": 1.3287215973143419e-08, "loss": 0.2099, "step": 27387, "teacher_loss": 0.1860915571451187 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.5301865339279175, "learning_rate": 1.3191810407102711e-08, "loss": 0.317, "step": 27388, "teacher_loss": 0.2933564782142639 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.41852420568466187, "learning_rate": 1.3096748443640061e-08, "loss": 0.1934, "step": 27389, "teacher_loss": 0.16838085651397705 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.3680052161216736, "learning_rate": 1.3002030084937055e-08, "loss": 0.2103, "step": 27390, "teacher_loss": 0.1927962750196457 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.5279438495635986, "learning_rate": 1.2907655333163626e-08, "loss": 0.2608, "step": 27391, "teacher_loss": 0.23112955689430237 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.5434454679489136, "learning_rate": 1.2813624190484707e-08, "loss": 0.2314, "step": 27392, "teacher_loss": 0.19670307636260986 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.4102453589439392, "learning_rate": 1.2719936659055242e-08, "loss": 0.2087, "step": 27393, "teacher_loss": 0.18629616498947144 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.316523015499115, "learning_rate": 1.2626592741023513e-08, "loss": 0.1939, "step": 27394, "teacher_loss": 0.18028274178504944 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.2855168879032135, "learning_rate": 1.2533592438529473e-08, "loss": 0.2101, "step": 27395, "teacher_loss": 0.20167964696884155 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.2954165041446686, "learning_rate": 1.2440935753704752e-08, "loss": 0.1361, "step": 27396, "teacher_loss": 0.11841673403978348 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.685572624206543, "learning_rate": 1.2348622688674317e-08, "loss": 0.2259, "step": 27397, "teacher_loss": 0.17483890056610107 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.3097909092903137, "learning_rate": 1.2256653245554805e-08, "loss": 0.2123, "step": 27398, "teacher_loss": 0.20147904753684998 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.22962921857833862, "learning_rate": 1.21650274264562e-08, "loss": 0.1828, "step": 27399, "teacher_loss": 0.17762508988380432 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.09828333556652069, "learning_rate": 1.2073745233475153e-08, "loss": 0.1795, "step": 27400, "teacher_loss": 0.18850922584533691 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.5816867351531982, "learning_rate": 1.1982806668708323e-08, "loss": 0.2512, "step": 27401, "teacher_loss": 0.21445298194885254 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.3319658637046814, "learning_rate": 1.1892211734239045e-08, "loss": 0.2465, "step": 27402, "teacher_loss": 0.23700670897960663 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.30225861072540283, "learning_rate": 1.1801960432145652e-08, "loss": 0.1875, "step": 27403, "teacher_loss": 0.17469573020935059 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.3967258036136627, "learning_rate": 1.171205276449483e-08, "loss": 0.1461, "step": 27404, "teacher_loss": 0.11822549998760223 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.20937903225421906, "learning_rate": 1.1622488733351588e-08, "loss": 0.1573, "step": 27405, "teacher_loss": 0.15150919556617737 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.41705453395843506, "learning_rate": 1.1533268340765956e-08, "loss": 0.252, "step": 27406, "teacher_loss": 0.23366060853004456 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.18043196201324463, "learning_rate": 1.1444391588784631e-08, "loss": 0.1422, "step": 27407, "teacher_loss": 0.13795411586761475 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.2314760833978653, "learning_rate": 1.1355858479444314e-08, "loss": 0.1478, "step": 27408, "teacher_loss": 0.13854004442691803 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.7101960182189941, "learning_rate": 1.1267669014776716e-08, "loss": 0.2683, "step": 27409, "teacher_loss": 0.21918924152851105 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.7630780935287476, "learning_rate": 1.1179823196803552e-08, "loss": 0.4353, "step": 27410, "teacher_loss": 0.3989092707633972 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.7172695994377136, "learning_rate": 1.109232102753821e-08, "loss": 0.3216, "step": 27411, "teacher_loss": 0.2776871919631958 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.1739330291748047, "learning_rate": 1.1005162508985756e-08, "loss": 0.1428, "step": 27412, "teacher_loss": 0.13934530317783356 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.4445819854736328, "learning_rate": 1.0918347643146254e-08, "loss": 0.1701, "step": 27413, "teacher_loss": 0.13960358500480652 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 1.0175824165344238, "learning_rate": 1.083187643200978e-08, "loss": 0.3309, "step": 27414, "teacher_loss": 0.25457894802093506 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.2083263397216797, "learning_rate": 1.0745748877558082e-08, "loss": 0.2481, "step": 27415, "teacher_loss": 0.25254327058792114 }, { "compression_loss": 0.0, "epoch": 4.95, "label_loss": 0.35163596272468567, "learning_rate": 1.0659964981766246e-08, "loss": 0.1903, "step": 27416, "teacher_loss": 0.17237040400505066 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.31885814666748047, "learning_rate": 1.0574524746601032e-08, "loss": 0.2, "step": 27417, "teacher_loss": 0.18677537143230438 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.6641566157341003, "learning_rate": 1.0489428174020877e-08, "loss": 0.2146, "step": 27418, "teacher_loss": 0.164662167429924 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.39604002237319946, "learning_rate": 1.0404675265979213e-08, "loss": 0.1799, "step": 27419, "teacher_loss": 0.15588515996932983 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.223249614238739, "learning_rate": 1.0320266024416158e-08, "loss": 0.1491, "step": 27420, "teacher_loss": 0.14090079069137573 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.2374151051044464, "learning_rate": 1.023620045126683e-08, "loss": 0.1982, "step": 27421, "teacher_loss": 0.1938934475183487 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.5648295879364014, "learning_rate": 1.0152478548461353e-08, "loss": 0.22, "step": 27422, "teacher_loss": 0.18172124028205872 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.1778237670660019, "learning_rate": 1.0069100317916525e-08, "loss": 0.1771, "step": 27423, "teacher_loss": 0.17707067728042603 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.7423708438873291, "learning_rate": 9.986065761545815e-09, "loss": 0.2338, "step": 27424, "teacher_loss": 0.17733559012413025 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.5083366632461548, "learning_rate": 9.903374881251038e-09, "loss": 0.2957, "step": 27425, "teacher_loss": 0.27208876609802246 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.6743139028549194, "learning_rate": 9.821027678930672e-09, "loss": 0.3166, "step": 27426, "teacher_loss": 0.27680733799934387 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.5603856444358826, "learning_rate": 9.739024156469878e-09, "loss": 0.2574, "step": 27427, "teacher_loss": 0.22368650138378143 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.7192492485046387, "learning_rate": 9.657364315752148e-09, "loss": 0.2613, "step": 27428, "teacher_loss": 0.210372194647789 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 1.0452371835708618, "learning_rate": 9.576048158645988e-09, "loss": 0.3589, "step": 27429, "teacher_loss": 0.2826218605041504 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.4234424829483032, "learning_rate": 9.49507568701824e-09, "loss": 0.2543, "step": 27430, "teacher_loss": 0.23555180430412292 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.6050143241882324, "learning_rate": 9.414446902724084e-09, "loss": 0.2664, "step": 27431, "teacher_loss": 0.2287229299545288 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.5789109468460083, "learning_rate": 9.33416180761204e-09, "loss": 0.2625, "step": 27432, "teacher_loss": 0.22731518745422363 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.28201615810394287, "learning_rate": 9.254220403523972e-09, "loss": 0.218, "step": 27433, "teacher_loss": 0.2108529508113861 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.3046315312385559, "learning_rate": 9.174622692291745e-09, "loss": 0.1749, "step": 27434, "teacher_loss": 0.1604430377483368 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.2433900237083435, "learning_rate": 9.095368675740568e-09, "loss": 0.186, "step": 27435, "teacher_loss": 0.17965860664844513 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.5615106225013733, "learning_rate": 9.016458355688983e-09, "loss": 0.1962, "step": 27436, "teacher_loss": 0.15562137961387634 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.6059384346008301, "learning_rate": 8.937891733943881e-09, "loss": 0.2197, "step": 27437, "teacher_loss": 0.17677879333496094 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.5561261177062988, "learning_rate": 8.859668812305488e-09, "loss": 0.3018, "step": 27438, "teacher_loss": 0.2735700011253357 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.20074233412742615, "learning_rate": 8.781789592572364e-09, "loss": 0.1727, "step": 27439, "teacher_loss": 0.16953718662261963 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.4483322501182556, "learning_rate": 8.704254076524753e-09, "loss": 0.2148, "step": 27440, "teacher_loss": 0.18881048262119293 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.5684724450111389, "learning_rate": 8.627062265942898e-09, "loss": 0.2193, "step": 27441, "teacher_loss": 0.18053314089775085 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.3103572428226471, "learning_rate": 8.550214162595382e-09, "loss": 0.186, "step": 27442, "teacher_loss": 0.17219781875610352 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.5197340846061707, "learning_rate": 8.473709768245796e-09, "loss": 0.2199, "step": 27443, "teacher_loss": 0.1865616738796234 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.39908987283706665, "learning_rate": 8.397549084646072e-09, "loss": 0.1782, "step": 27444, "teacher_loss": 0.1536574512720108 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.15417301654815674, "learning_rate": 8.321732113543146e-09, "loss": 0.1317, "step": 27445, "teacher_loss": 0.1291760504245758 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.31224191188812256, "learning_rate": 8.246258856677292e-09, "loss": 0.1449, "step": 27446, "teacher_loss": 0.1262732744216919 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.42918744683265686, "learning_rate": 8.17112931577546e-09, "loss": 0.1824, "step": 27447, "teacher_loss": 0.15496668219566345 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.9333703517913818, "learning_rate": 8.09634349256294e-09, "loss": 0.2914, "step": 27448, "teacher_loss": 0.22002869844436646 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 1.0443010330200195, "learning_rate": 8.021901388751696e-09, "loss": 0.4776, "step": 27449, "teacher_loss": 0.41463571786880493 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.3577002286911011, "learning_rate": 7.947803006052023e-09, "loss": 0.1653, "step": 27450, "teacher_loss": 0.14390233159065247 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.21934303641319275, "learning_rate": 7.874048346159235e-09, "loss": 0.1863, "step": 27451, "teacher_loss": 0.18262454867362976 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.4999401271343231, "learning_rate": 7.800637410766975e-09, "loss": 0.1875, "step": 27452, "teacher_loss": 0.15276643633842468 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.4586106836795807, "learning_rate": 7.727570201555568e-09, "loss": 0.1883, "step": 27453, "teacher_loss": 0.15822699666023254 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.4455593526363373, "learning_rate": 7.654846720202003e-09, "loss": 0.2184, "step": 27454, "teacher_loss": 0.19317969679832458 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.45450031757354736, "learning_rate": 7.582466968374946e-09, "loss": 0.2563, "step": 27455, "teacher_loss": 0.23432859778404236 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.5565842986106873, "learning_rate": 7.510430947731406e-09, "loss": 0.202, "step": 27456, "teacher_loss": 0.16260556876659393 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.2711261808872223, "learning_rate": 7.4387386599233945e-09, "loss": 0.211, "step": 27457, "teacher_loss": 0.2042895257472992 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.14549775421619415, "learning_rate": 7.367390106596261e-09, "loss": 0.1654, "step": 27458, "teacher_loss": 0.16763275861740112 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.6332069635391235, "learning_rate": 7.2963852893837e-09, "loss": 0.1874, "step": 27459, "teacher_loss": 0.13790559768676758 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.673799991607666, "learning_rate": 7.225724209914408e-09, "loss": 0.2209, "step": 27460, "teacher_loss": 0.1705591380596161 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.46197324991226196, "learning_rate": 7.155406869808756e-09, "loss": 0.2056, "step": 27461, "teacher_loss": 0.17709805071353912 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.2200189232826233, "learning_rate": 7.085433270678788e-09, "loss": 0.1922, "step": 27462, "teacher_loss": 0.18906234204769135 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.6935212016105652, "learning_rate": 7.015803414129884e-09, "loss": 0.3301, "step": 27463, "teacher_loss": 0.2897651195526123 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.3277333378791809, "learning_rate": 6.946517301755773e-09, "loss": 0.1918, "step": 27464, "teacher_loss": 0.1766470968723297 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.252109169960022, "learning_rate": 6.877574935146846e-09, "loss": 0.1897, "step": 27465, "teacher_loss": 0.1828039288520813 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.18426474928855896, "learning_rate": 6.808976315885174e-09, "loss": 0.1549, "step": 27466, "teacher_loss": 0.15160970389842987 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.24602842330932617, "learning_rate": 6.740721445539499e-09, "loss": 0.1844, "step": 27467, "teacher_loss": 0.1775653064250946 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.3321061134338379, "learning_rate": 6.6728103256785685e-09, "loss": 0.1755, "step": 27468, "teacher_loss": 0.15806333720684052 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.46465831995010376, "learning_rate": 6.605242957856139e-09, "loss": 0.1995, "step": 27469, "teacher_loss": 0.17007450759410858 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.17472025752067566, "learning_rate": 6.538019343624302e-09, "loss": 0.1682, "step": 27470, "teacher_loss": 0.16752079129219055 }, { "compression_loss": 0.0, "epoch": 4.96, "label_loss": 0.6558524966239929, "learning_rate": 6.4711394845218265e-09, "loss": 0.2164, "step": 27471, "teacher_loss": 0.1675567626953125 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.9810300469398499, "learning_rate": 6.404603382084151e-09, "loss": 0.2825, "step": 27472, "teacher_loss": 0.20487993955612183 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.6487252116203308, "learning_rate": 6.3384110378350565e-09, "loss": 0.2733, "step": 27473, "teacher_loss": 0.2316192090511322 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.20401354134082794, "learning_rate": 6.272562453293329e-09, "loss": 0.1912, "step": 27474, "teacher_loss": 0.1898222267627716 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.38726797699928284, "learning_rate": 6.207057629967761e-09, "loss": 0.2535, "step": 27475, "teacher_loss": 0.2386307567358017 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.4196363687515259, "learning_rate": 6.141896569362149e-09, "loss": 0.2433, "step": 27476, "teacher_loss": 0.2237209677696228 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.8509251475334167, "learning_rate": 6.0770792729669675e-09, "loss": 0.2278, "step": 27477, "teacher_loss": 0.15857374668121338 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5932056903839111, "learning_rate": 6.012605742271027e-09, "loss": 0.1892, "step": 27478, "teacher_loss": 0.14429336786270142 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5873541831970215, "learning_rate": 5.948475978751477e-09, "loss": 0.2273, "step": 27479, "teacher_loss": 0.1872641146183014 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.6393393278121948, "learning_rate": 5.8846899838788106e-09, "loss": 0.2247, "step": 27480, "teacher_loss": 0.17863944172859192 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.4376890957355499, "learning_rate": 5.821247759116855e-09, "loss": 0.183, "step": 27481, "teacher_loss": 0.1547470986843109 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.2300207018852234, "learning_rate": 5.7581493059177814e-09, "loss": 0.2465, "step": 27482, "teacher_loss": 0.2482975423336029 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5016427040100098, "learning_rate": 5.695394625728767e-09, "loss": 0.1859, "step": 27483, "teacher_loss": 0.15085095167160034 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.492129921913147, "learning_rate": 5.6329837199903254e-09, "loss": 0.2325, "step": 27484, "teacher_loss": 0.20369312167167664 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5660818815231323, "learning_rate": 5.570916590131314e-09, "loss": 0.228, "step": 27485, "teacher_loss": 0.190460205078125 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.25193965435028076, "learning_rate": 5.509193237575594e-09, "loss": 0.1793, "step": 27486, "teacher_loss": 0.17127811908721924 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5627959370613098, "learning_rate": 5.4478136637386986e-09, "loss": 0.3247, "step": 27487, "teacher_loss": 0.29829204082489014 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.2996610403060913, "learning_rate": 5.386777870026172e-09, "loss": 0.2291, "step": 27488, "teacher_loss": 0.22125495970249176 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.2689604163169861, "learning_rate": 5.326085857840224e-09, "loss": 0.1759, "step": 27489, "teacher_loss": 0.16561414301395416 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5038433074951172, "learning_rate": 5.265737628569744e-09, "loss": 0.1887, "step": 27490, "teacher_loss": 0.15363475680351257 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.3490161597728729, "learning_rate": 5.205733183600292e-09, "loss": 0.2235, "step": 27491, "teacher_loss": 0.20952293276786804 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5548093914985657, "learning_rate": 5.146072524305767e-09, "loss": 0.2135, "step": 27492, "teacher_loss": 0.17556053400039673 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.4501809775829315, "learning_rate": 5.086755652055075e-09, "loss": 0.1936, "step": 27493, "teacher_loss": 0.16507355868816376 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5887829065322876, "learning_rate": 5.0277825682071285e-09, "loss": 0.2144, "step": 27494, "teacher_loss": 0.17281441390514374 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.3334571123123169, "learning_rate": 4.969153274115845e-09, "loss": 0.2092, "step": 27495, "teacher_loss": 0.1953645497560501 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.4587499499320984, "learning_rate": 4.910867771125149e-09, "loss": 0.2166, "step": 27496, "teacher_loss": 0.18971970677375793 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5540093779563904, "learning_rate": 4.8529260605706396e-09, "loss": 0.2793, "step": 27497, "teacher_loss": 0.24879075586795807 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.7645010948181152, "learning_rate": 4.795328143779587e-09, "loss": 0.3185, "step": 27498, "teacher_loss": 0.2689834535121918 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.4637625813484192, "learning_rate": 4.738074022074268e-09, "loss": 0.2606, "step": 27499, "teacher_loss": 0.23798829317092896 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.2227734625339508, "learning_rate": 4.681163696766966e-09, "loss": 0.1985, "step": 27500, "teacher_loss": 0.19574931263923645 }, { "epoch": 4.97, "eval_exact_match": 80.43519394512772, "eval_f1": 87.75946461130846, "step": 27500 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.7744127511978149, "learning_rate": 4.624597169161637e-09, "loss": 0.4296, "step": 27501, "teacher_loss": 0.3912474513053894 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.11572528630495071, "learning_rate": 4.568374440555579e-09, "loss": 0.1379, "step": 27502, "teacher_loss": 0.14036807417869568 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5398659706115723, "learning_rate": 4.512495512239423e-09, "loss": 0.18, "step": 27503, "teacher_loss": 0.14005735516548157 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.6325831413269043, "learning_rate": 4.456960385492148e-09, "loss": 0.2234, "step": 27504, "teacher_loss": 0.17788681387901306 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.14070340991020203, "learning_rate": 4.4017690615877345e-09, "loss": 0.1746, "step": 27505, "teacher_loss": 0.1783391386270523 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.21218222379684448, "learning_rate": 4.346921541791837e-09, "loss": 0.1471, "step": 27506, "teacher_loss": 0.13982637226581573 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.779812216758728, "learning_rate": 4.292417827361783e-09, "loss": 0.2316, "step": 27507, "teacher_loss": 0.17073094844818115 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.2014297991991043, "learning_rate": 4.238257919546573e-09, "loss": 0.1872, "step": 27508, "teacher_loss": 0.18566396832466125 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.36356109380722046, "learning_rate": 4.184441819588547e-09, "loss": 0.3131, "step": 27509, "teacher_loss": 0.3075053095817566 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.4877352714538574, "learning_rate": 4.130969528721718e-09, "loss": 0.2111, "step": 27510, "teacher_loss": 0.18041452765464783 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.40076351165771484, "learning_rate": 4.077841048171771e-09, "loss": 0.2282, "step": 27511, "teacher_loss": 0.20897814631462097 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.2958611249923706, "learning_rate": 4.025056379157732e-09, "loss": 0.193, "step": 27512, "teacher_loss": 0.18159453570842743 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.19614139199256897, "learning_rate": 3.972615522888634e-09, "loss": 0.1743, "step": 27513, "teacher_loss": 0.17184802889823914 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.45678213238716125, "learning_rate": 3.920518480565183e-09, "loss": 0.2006, "step": 27514, "teacher_loss": 0.17218300700187683 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.5941406488418579, "learning_rate": 3.868765253386419e-09, "loss": 0.244, "step": 27515, "teacher_loss": 0.2051214575767517 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.3487972021102905, "learning_rate": 3.817355842534731e-09, "loss": 0.2165, "step": 27516, "teacher_loss": 0.20176644623279572 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.2592461407184601, "learning_rate": 3.766290249189175e-09, "loss": 0.2102, "step": 27517, "teacher_loss": 0.20476830005645752 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.182743102312088, "learning_rate": 3.715568474522146e-09, "loss": 0.146, "step": 27518, "teacher_loss": 0.1419590413570404 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.4459419846534729, "learning_rate": 3.6651905196977144e-09, "loss": 0.2168, "step": 27519, "teacher_loss": 0.19130343198776245 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.2272041141986847, "learning_rate": 3.615156385866625e-09, "loss": 0.1788, "step": 27520, "teacher_loss": 0.1734289824962616 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.4762156307697296, "learning_rate": 3.5654660741796243e-09, "loss": 0.3242, "step": 27521, "teacher_loss": 0.3072817921638489 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.8389103412628174, "learning_rate": 3.516119585775801e-09, "loss": 0.2315, "step": 27522, "teacher_loss": 0.16399261355400085 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.34218621253967285, "learning_rate": 3.4671169217842524e-09, "loss": 0.2469, "step": 27523, "teacher_loss": 0.236338809132576 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.35364311933517456, "learning_rate": 3.418458083329079e-09, "loss": 0.1867, "step": 27524, "teacher_loss": 0.16813933849334717 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.4257160425186157, "learning_rate": 3.3701430715277202e-09, "loss": 0.23, "step": 27525, "teacher_loss": 0.20826703310012817 }, { "compression_loss": 0.0, "epoch": 4.97, "label_loss": 0.36059433221817017, "learning_rate": 3.322171887487624e-09, "loss": 0.2238, "step": 27526, "teacher_loss": 0.20856182277202606 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.21072140336036682, "learning_rate": 3.274544532306245e-09, "loss": 0.1263, "step": 27527, "teacher_loss": 0.1168941855430603 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.32397204637527466, "learning_rate": 3.2272610070777087e-09, "loss": 0.215, "step": 27528, "teacher_loss": 0.2029438018798828 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.427308052778244, "learning_rate": 3.1803213128861476e-09, "loss": 0.2302, "step": 27529, "teacher_loss": 0.20825974643230438 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.4462602734565735, "learning_rate": 3.1337254508057023e-09, "loss": 0.2291, "step": 27530, "teacher_loss": 0.20498919486999512 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.26617613434791565, "learning_rate": 3.087473421907183e-09, "loss": 0.2169, "step": 27531, "teacher_loss": 0.2114514708518982 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.5748363733291626, "learning_rate": 3.0415652272480776e-09, "loss": 0.2889, "step": 27532, "teacher_loss": 0.2570781111717224 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.6317458152770996, "learning_rate": 2.9960008678842076e-09, "loss": 0.3225, "step": 27533, "teacher_loss": 0.28814035654067993 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.47134700417518616, "learning_rate": 2.950780344858073e-09, "loss": 0.2002, "step": 27534, "teacher_loss": 0.17009586095809937 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.2957739233970642, "learning_rate": 2.9059036592071766e-09, "loss": 0.206, "step": 27535, "teacher_loss": 0.19598287343978882 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.13057076930999756, "learning_rate": 2.8613708119606953e-09, "loss": 0.1536, "step": 27536, "teacher_loss": 0.1562126874923706 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.4538521468639374, "learning_rate": 2.817181804137814e-09, "loss": 0.2216, "step": 27537, "teacher_loss": 0.1957799792289734 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.2572081983089447, "learning_rate": 2.7733366367543867e-09, "loss": 0.1491, "step": 27538, "teacher_loss": 0.13706140220165253 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.8566602468490601, "learning_rate": 2.72983531081461e-09, "loss": 0.3163, "step": 27539, "teacher_loss": 0.2562290132045746 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.5353735089302063, "learning_rate": 2.6866778273143544e-09, "loss": 0.2462, "step": 27540, "teacher_loss": 0.21406307816505432 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.22799766063690186, "learning_rate": 2.6438641872444937e-09, "loss": 0.2316, "step": 27541, "teacher_loss": 0.23204541206359863 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.21811546385288239, "learning_rate": 2.601394391587575e-09, "loss": 0.1618, "step": 27542, "teacher_loss": 0.15553322434425354 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.35627442598342896, "learning_rate": 2.5592684413144885e-09, "loss": 0.1594, "step": 27543, "teacher_loss": 0.1375793218612671 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.7956832051277161, "learning_rate": 2.517486337394459e-09, "loss": 0.2115, "step": 27544, "teacher_loss": 0.14664483070373535 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.4127131998538971, "learning_rate": 2.4760480807833885e-09, "loss": 0.1922, "step": 27545, "teacher_loss": 0.1677536964416504 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.3261672854423523, "learning_rate": 2.4349536724305178e-09, "loss": 0.2379, "step": 27546, "teacher_loss": 0.22809630632400513 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.6406009197235107, "learning_rate": 2.394203113280091e-09, "loss": 0.2682, "step": 27547, "teacher_loss": 0.2267988622188568 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.196292445063591, "learning_rate": 2.3537964042646963e-09, "loss": 0.1454, "step": 27548, "teacher_loss": 0.13974981009960175 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 1.0665326118469238, "learning_rate": 2.3137335463119246e-09, "loss": 0.4244, "step": 27549, "teacher_loss": 0.3530510663986206 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.5212533473968506, "learning_rate": 2.2740145403393753e-09, "loss": 0.3378, "step": 27550, "teacher_loss": 0.3174550533294678 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.16549430787563324, "learning_rate": 2.234639387257986e-09, "loss": 0.1296, "step": 27551, "teacher_loss": 0.12561437487602234 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.46392738819122314, "learning_rate": 2.195608087972034e-09, "loss": 0.2364, "step": 27552, "teacher_loss": 0.21113426983356476 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.28397253155708313, "learning_rate": 2.1569206433741384e-09, "loss": 0.145, "step": 27553, "teacher_loss": 0.1296006143093109 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.3771470785140991, "learning_rate": 2.1185770543519223e-09, "loss": 0.1916, "step": 27554, "teacher_loss": 0.17095869779586792 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.38208138942718506, "learning_rate": 2.0805773217863477e-09, "loss": 0.2004, "step": 27555, "teacher_loss": 0.18021932244300842 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.26384854316711426, "learning_rate": 2.042921446545054e-09, "loss": 0.1497, "step": 27556, "teacher_loss": 0.13701125979423523 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.4151048958301544, "learning_rate": 2.005609429494015e-09, "loss": 0.2027, "step": 27557, "teacher_loss": 0.1791197806596756 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.21027398109436035, "learning_rate": 1.968641271487548e-09, "loss": 0.2087, "step": 27558, "teacher_loss": 0.20852135121822357 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.30989012122154236, "learning_rate": 1.9320169733749726e-09, "loss": 0.2273, "step": 27559, "teacher_loss": 0.21814700961112976 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.4591987729072571, "learning_rate": 1.8957365359939525e-09, "loss": 0.2317, "step": 27560, "teacher_loss": 0.20642301440238953 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.08170595020055771, "learning_rate": 1.8597999601771554e-09, "loss": 0.1505, "step": 27561, "teacher_loss": 0.15809917449951172 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.4144291877746582, "learning_rate": 1.8242072467489213e-09, "loss": 0.1895, "step": 27562, "teacher_loss": 0.16453976929187775 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.20232263207435608, "learning_rate": 1.7889583965235988e-09, "loss": 0.1896, "step": 27563, "teacher_loss": 0.18823260068893433 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.35350364446640015, "learning_rate": 1.754053410310541e-09, "loss": 0.1832, "step": 27564, "teacher_loss": 0.16428744792938232 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.4116061329841614, "learning_rate": 1.7194922889107734e-09, "loss": 0.2857, "step": 27565, "teacher_loss": 0.27167975902557373 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.3763733208179474, "learning_rate": 1.6852750331153299e-09, "loss": 0.2071, "step": 27566, "teacher_loss": 0.1882752776145935 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.4310913681983948, "learning_rate": 1.6514016437102485e-09, "loss": 0.2082, "step": 27567, "teacher_loss": 0.18340416252613068 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.2760845720767975, "learning_rate": 1.61787212146991e-09, "loss": 0.1809, "step": 27568, "teacher_loss": 0.1703006625175476 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.18415653705596924, "learning_rate": 1.5846864671653637e-09, "loss": 0.1539, "step": 27569, "teacher_loss": 0.15051865577697754 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.36839258670806885, "learning_rate": 1.5518446815560029e-09, "loss": 0.2173, "step": 27570, "teacher_loss": 0.20047537982463837 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.5297494530677795, "learning_rate": 1.519346765394558e-09, "loss": 0.2653, "step": 27571, "teacher_loss": 0.23596641421318054 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.38183921575546265, "learning_rate": 1.4871927194270995e-09, "loss": 0.2135, "step": 27572, "teacher_loss": 0.19480198621749878 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.23843765258789062, "learning_rate": 1.4553825443913705e-09, "loss": 0.2083, "step": 27573, "teacher_loss": 0.2049260139465332 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.9499783515930176, "learning_rate": 1.4239162410134565e-09, "loss": 0.5286, "step": 27574, "teacher_loss": 0.48180079460144043 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.48866426944732666, "learning_rate": 1.3927938100194436e-09, "loss": 0.2664, "step": 27575, "teacher_loss": 0.24172326922416687 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.17492449283599854, "learning_rate": 1.3620152521187646e-09, "loss": 0.1587, "step": 27576, "teacher_loss": 0.15684616565704346 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.4849300980567932, "learning_rate": 1.3315805680208514e-09, "loss": 0.2366, "step": 27577, "teacher_loss": 0.20905955135822296 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.3565036356449127, "learning_rate": 1.3014897584201491e-09, "loss": 0.1827, "step": 27578, "teacher_loss": 0.16338159143924713 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.566371738910675, "learning_rate": 1.271742824007771e-09, "loss": 0.2402, "step": 27579, "teacher_loss": 0.20392413437366486 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.2686460018157959, "learning_rate": 1.2423397654648395e-09, "loss": 0.199, "step": 27580, "teacher_loss": 0.19121350347995758 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.8185237050056458, "learning_rate": 1.2132805834674798e-09, "loss": 0.2669, "step": 27581, "teacher_loss": 0.205555260181427 }, { "compression_loss": 0.0, "epoch": 4.98, "label_loss": 0.6501341462135315, "learning_rate": 1.1845652786818261e-09, "loss": 0.2493, "step": 27582, "teacher_loss": 0.20477700233459473 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.6021891832351685, "learning_rate": 1.1561938517640203e-09, "loss": 0.2558, "step": 27583, "teacher_loss": 0.2172819972038269 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.36796504259109497, "learning_rate": 1.1281663033668731e-09, "loss": 0.2545, "step": 27584, "teacher_loss": 0.24187731742858887 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.47607868909835815, "learning_rate": 1.100482634131539e-09, "loss": 0.2693, "step": 27585, "teacher_loss": 0.24634069204330444 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.277593731880188, "learning_rate": 1.0731428446925095e-09, "loss": 0.1885, "step": 27586, "teacher_loss": 0.17863085865974426 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.3477177023887634, "learning_rate": 1.0461469356776165e-09, "loss": 0.222, "step": 27587, "teacher_loss": 0.20803695917129517 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.7328449487686157, "learning_rate": 1.0194949077046988e-09, "loss": 0.205, "step": 27588, "teacher_loss": 0.1463773250579834 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.3501174747943878, "learning_rate": 9.931867613865998e-10, "loss": 0.1965, "step": 27589, "teacher_loss": 0.17941896617412567 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.3202378749847412, "learning_rate": 9.672224973245046e-10, "loss": 0.1812, "step": 27590, "teacher_loss": 0.1657719761133194 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.4764711856842041, "learning_rate": 9.41602116116269e-10, "loss": 0.248, "step": 27591, "teacher_loss": 0.22257961332798004 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.23351150751113892, "learning_rate": 9.163256183447599e-10, "loss": 0.2203, "step": 27592, "teacher_loss": 0.21879927814006805 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.4641985297203064, "learning_rate": 8.913930045945096e-10, "loss": 0.2747, "step": 27593, "teacher_loss": 0.25368964672088623 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.24879541993141174, "learning_rate": 8.668042754350624e-10, "loss": 0.1956, "step": 27594, "teacher_loss": 0.1896853744983673 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.372722864151001, "learning_rate": 8.425594314293017e-10, "loss": 0.184, "step": 27595, "teacher_loss": 0.16307485103607178 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.26477089524269104, "learning_rate": 8.18658473133449e-10, "loss": 0.1814, "step": 27596, "teacher_loss": 0.17209315299987793 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.634881854057312, "learning_rate": 7.951014010953994e-10, "loss": 0.2371, "step": 27597, "teacher_loss": 0.1929101049900055 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.37849438190460205, "learning_rate": 7.718882158563867e-10, "loss": 0.1997, "step": 27598, "teacher_loss": 0.17986878752708435 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.7605476379394531, "learning_rate": 7.490189179493178e-10, "loss": 0.2403, "step": 27599, "teacher_loss": 0.1825028955936432 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.5034005045890808, "learning_rate": 7.264935078954427e-10, "loss": 0.21, "step": 27600, "teacher_loss": 0.17743176221847534 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.3318106532096863, "learning_rate": 7.043119862143455e-10, "loss": 0.1845, "step": 27601, "teacher_loss": 0.16809909045696259 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.6110537648200989, "learning_rate": 6.824743534122879e-10, "loss": 0.2521, "step": 27602, "teacher_loss": 0.21222075819969177 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.443381667137146, "learning_rate": 6.609806099905358e-10, "loss": 0.1937, "step": 27603, "teacher_loss": 0.1659296303987503 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.5111098289489746, "learning_rate": 6.39830756442028e-10, "loss": 0.2153, "step": 27604, "teacher_loss": 0.18239913880825043 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 1.0771595239639282, "learning_rate": 6.190247932513771e-10, "loss": 0.3066, "step": 27605, "teacher_loss": 0.2209721952676773 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.5104868412017822, "learning_rate": 5.985627208981992e-10, "loss": 0.1957, "step": 27606, "teacher_loss": 0.1607687771320343 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.4440404176712036, "learning_rate": 5.784445398471227e-10, "loss": 0.2663, "step": 27607, "teacher_loss": 0.246603325009346 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.4980778098106384, "learning_rate": 5.586702505627761e-10, "loss": 0.2638, "step": 27608, "teacher_loss": 0.23773658275604248 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.7881306409835815, "learning_rate": 5.392398534981302e-10, "loss": 0.3089, "step": 27609, "teacher_loss": 0.25565391778945923 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.5794239044189453, "learning_rate": 5.201533490961641e-10, "loss": 0.2281, "step": 27610, "teacher_loss": 0.18905283510684967 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.42243507504463196, "learning_rate": 5.014107377981913e-10, "loss": 0.1598, "step": 27611, "teacher_loss": 0.1306273639202118 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.9417681694030762, "learning_rate": 4.830120200305377e-10, "loss": 0.2444, "step": 27612, "teacher_loss": 0.16686177253723145 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.2890573740005493, "learning_rate": 4.6495719621786337e-10, "loss": 0.1893, "step": 27613, "teacher_loss": 0.1781834065914154 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.2749296724796295, "learning_rate": 4.47246266771506e-10, "loss": 0.1992, "step": 27614, "teacher_loss": 0.19079604744911194 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.6402239203453064, "learning_rate": 4.2987923209947265e-10, "loss": 0.2227, "step": 27615, "teacher_loss": 0.17627288401126862 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.5840927958488464, "learning_rate": 4.1285609259977817e-10, "loss": 0.2714, "step": 27616, "teacher_loss": 0.2367095947265625 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.3084568977355957, "learning_rate": 3.9617684866044557e-10, "loss": 0.1604, "step": 27617, "teacher_loss": 0.14393800497055054 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.6089762449264526, "learning_rate": 3.798415006678324e-10, "loss": 0.2102, "step": 27618, "teacher_loss": 0.16585098206996918 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.9442403316497803, "learning_rate": 3.6385004899330833e-10, "loss": 0.2963, "step": 27619, "teacher_loss": 0.22426702082157135 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.386463463306427, "learning_rate": 3.482024940032469e-10, "loss": 0.1842, "step": 27620, "teacher_loss": 0.16178038716316223 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.6466434597969055, "learning_rate": 3.3289883605902573e-10, "loss": 0.2472, "step": 27621, "teacher_loss": 0.2027946263551712 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.4422330856323242, "learning_rate": 3.1793907551036507e-10, "loss": 0.2195, "step": 27622, "teacher_loss": 0.19469739496707916 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.4953764081001282, "learning_rate": 3.0332321269865847e-10, "loss": 0.1938, "step": 27623, "teacher_loss": 0.16026544570922852 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.2941608428955078, "learning_rate": 2.890512479619689e-10, "loss": 0.1609, "step": 27624, "teacher_loss": 0.14606410264968872 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.6070818305015564, "learning_rate": 2.7512318162503656e-10, "loss": 0.2224, "step": 27625, "teacher_loss": 0.17960235476493835 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.2976424992084503, "learning_rate": 2.6153901400760574e-10, "loss": 0.1732, "step": 27626, "teacher_loss": 0.15941032767295837 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.7105516791343689, "learning_rate": 2.482987454227592e-10, "loss": 0.3345, "step": 27627, "teacher_loss": 0.2927194833755493 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.567573606967926, "learning_rate": 2.354023761719226e-10, "loss": 0.2724, "step": 27628, "teacher_loss": 0.23956577479839325 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.2901977300643921, "learning_rate": 2.2284990655152549e-10, "loss": 0.1663, "step": 27629, "teacher_loss": 0.15252584218978882 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.41549891233444214, "learning_rate": 2.1064133684967068e-10, "loss": 0.1654, "step": 27630, "teacher_loss": 0.13761994242668152 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.4282142221927643, "learning_rate": 1.9877666734613443e-10, "loss": 0.219, "step": 27631, "teacher_loss": 0.1957518458366394 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.6815993785858154, "learning_rate": 1.8725589831236623e-10, "loss": 0.2622, "step": 27632, "teacher_loss": 0.21558091044425964 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.5314069986343384, "learning_rate": 1.760790300148196e-10, "loss": 0.2742, "step": 27633, "teacher_loss": 0.24561455845832825 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.7228803634643555, "learning_rate": 1.6524606270662546e-10, "loss": 0.4785, "step": 27634, "teacher_loss": 0.45131272077560425 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.3449806571006775, "learning_rate": 1.547569966375839e-10, "loss": 0.2096, "step": 27635, "teacher_loss": 0.19455364346504211 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.32586073875427246, "learning_rate": 1.4461183204750318e-10, "loss": 0.1723, "step": 27636, "teacher_loss": 0.15521715581417084 }, { "compression_loss": 0.0, "epoch": 4.99, "label_loss": 0.4190394878387451, "learning_rate": 1.3481056917119538e-10, "loss": 0.2181, "step": 27637, "teacher_loss": 0.1958131492137909 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.4304284453392029, "learning_rate": 1.2535320823015006e-10, "loss": 0.2156, "step": 27638, "teacher_loss": 0.19176256656646729 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.15546372532844543, "learning_rate": 1.1623974944419136e-10, "loss": 0.1674, "step": 27639, "teacher_loss": 0.16876819729804993 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.4677286148071289, "learning_rate": 1.0747019301982075e-10, "loss": 0.2037, "step": 27640, "teacher_loss": 0.1744040995836258 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.2546035349369049, "learning_rate": 9.904453915854372e-11, "loss": 0.1623, "step": 27641, "teacher_loss": 0.15201327204704285 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.5096229314804077, "learning_rate": 9.096278805520441e-11, "loss": 0.281, "step": 27642, "teacher_loss": 0.25558340549468994 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.7262166738510132, "learning_rate": 8.322493989465496e-11, "loss": 0.2783, "step": 27643, "teacher_loss": 0.2285018116235733 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.28646451234817505, "learning_rate": 7.583099485342083e-11, "loss": 0.1582, "step": 27644, "teacher_loss": 0.14391304552555084 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.5626187324523926, "learning_rate": 6.878095309970079e-11, "loss": 0.2256, "step": 27645, "teacher_loss": 0.1881694495677948 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.361987829208374, "learning_rate": 6.207481479836297e-11, "loss": 0.1998, "step": 27646, "teacher_loss": 0.18183302879333496 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.49351149797439575, "learning_rate": 5.5712580100952816e-11, "loss": 0.2562, "step": 27647, "teacher_loss": 0.22986529767513275 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.4970219135284424, "learning_rate": 4.9694249154019766e-11, "loss": 0.2747, "step": 27648, "teacher_loss": 0.2499682605266571 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.4484044909477234, "learning_rate": 4.4019822095786585e-11, "loss": 0.2547, "step": 27649, "teacher_loss": 0.23317383229732513 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.7607696056365967, "learning_rate": 3.868929905448404e-11, "loss": 0.2415, "step": 27650, "teacher_loss": 0.18380752205848694 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.46764880418777466, "learning_rate": 3.370268015501221e-11, "loss": 0.3332, "step": 27651, "teacher_loss": 0.31824544072151184 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.25581175088882446, "learning_rate": 2.9059965510613852e-11, "loss": 0.1913, "step": 27652, "teacher_loss": 0.18416239321231842 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.1968652904033661, "learning_rate": 2.4761155226205035e-11, "loss": 0.1628, "step": 27653, "teacher_loss": 0.15905970335006714 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.317949116230011, "learning_rate": 2.0806249403371168e-11, "loss": 0.1914, "step": 27654, "teacher_loss": 0.17738312482833862 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.48965245485305786, "learning_rate": 1.7195248130374986e-11, "loss": 0.3629, "step": 27655, "teacher_loss": 0.3487917482852936 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 1.0910085439682007, "learning_rate": 1.3928151492148544e-11, "loss": 0.2718, "step": 27656, "teacher_loss": 0.1808234453201294 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.914210319519043, "learning_rate": 1.1004959560301232e-11, "loss": 0.2901, "step": 27657, "teacher_loss": 0.22077783942222595 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.07420274615287781, "learning_rate": 8.425672404777095e-12, "loss": 0.0952, "step": 27658, "teacher_loss": 0.09752441942691803 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.5301076173782349, "learning_rate": 6.190290083862849e-12, "loss": 0.2611, "step": 27659, "teacher_loss": 0.23117710649967194 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.35024869441986084, "learning_rate": 4.298812649183859e-12, "loss": 0.1993, "step": 27660, "teacher_loss": 0.1825425624847412 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.20631736516952515, "learning_rate": 2.75124014237349e-12, "loss": 0.1415, "step": 27661, "teacher_loss": 0.1342630535364151 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.36388373374938965, "learning_rate": 1.5475726000691026e-12, "loss": 0.2164, "step": 27662, "teacher_loss": 0.20005418360233307 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.8099757432937622, "learning_rate": 6.878100505813834e-13, "loss": 0.2862, "step": 27663, "teacher_loss": 0.22799943387508392 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.2459041178226471, "learning_rate": 1.7195251389434674e-13, "loss": 0.1873, "step": 27664, "teacher_loss": 0.18073615431785583 }, { "compression_loss": 0.0, "epoch": 5.0, "label_loss": 0.7424832582473755, "learning_rate": 0.0, "loss": 0.2669, "step": 27665, "teacher_loss": 0.21404887735843658 }, { "epoch": 5.0, "step": 27665, "total_flos": 2.066153355362304e+16, "train_loss": 0.262258041142534, "train_runtime": 29173.0059, "train_samples_per_second": 15.172, "train_steps_per_second": 0.948 } ], "max_steps": 27665, "num_train_epochs": 5, "total_flos": 2.066153355362304e+16, "trial_name": null, "trial_params": null }